biotite 1.1.0__cp311-cp311-macosx_11_0_arm64.whl → 1.3.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (160) hide show
  1. biotite/application/application.py +3 -3
  2. biotite/application/autodock/app.py +1 -1
  3. biotite/application/blast/webapp.py +1 -1
  4. biotite/application/clustalo/app.py +1 -1
  5. biotite/application/localapp.py +2 -2
  6. biotite/application/msaapp.py +10 -10
  7. biotite/application/muscle/app3.py +3 -3
  8. biotite/application/muscle/app5.py +3 -3
  9. biotite/application/sra/app.py +0 -5
  10. biotite/application/util.py +21 -1
  11. biotite/application/viennarna/rnaalifold.py +8 -8
  12. biotite/application/viennarna/rnaplot.py +10 -8
  13. biotite/application/viennarna/util.py +1 -1
  14. biotite/application/webapp.py +1 -1
  15. biotite/database/afdb/__init__.py +12 -0
  16. biotite/database/afdb/download.py +191 -0
  17. biotite/database/entrez/dbnames.py +10 -0
  18. biotite/database/entrez/download.py +9 -10
  19. biotite/database/entrez/key.py +1 -1
  20. biotite/database/entrez/query.py +5 -4
  21. biotite/database/pubchem/download.py +6 -6
  22. biotite/database/pubchem/error.py +10 -0
  23. biotite/database/pubchem/query.py +12 -23
  24. biotite/database/rcsb/download.py +3 -2
  25. biotite/database/rcsb/query.py +2 -3
  26. biotite/database/uniprot/check.py +2 -2
  27. biotite/database/uniprot/download.py +2 -5
  28. biotite/database/uniprot/query.py +3 -4
  29. biotite/file.py +14 -2
  30. biotite/interface/__init__.py +19 -0
  31. biotite/interface/openmm/__init__.py +20 -0
  32. biotite/interface/openmm/state.py +93 -0
  33. biotite/interface/openmm/system.py +227 -0
  34. biotite/interface/pymol/__init__.py +201 -0
  35. biotite/interface/pymol/cgo.py +346 -0
  36. biotite/interface/pymol/convert.py +185 -0
  37. biotite/interface/pymol/display.py +267 -0
  38. biotite/interface/pymol/object.py +1226 -0
  39. biotite/interface/pymol/shapes.py +178 -0
  40. biotite/interface/pymol/startup.py +169 -0
  41. biotite/interface/rdkit/__init__.py +19 -0
  42. biotite/interface/rdkit/mol.py +490 -0
  43. biotite/interface/version.py +94 -0
  44. biotite/interface/warning.py +19 -0
  45. biotite/sequence/align/__init__.py +0 -4
  46. biotite/sequence/align/alignment.py +33 -11
  47. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  48. biotite/sequence/align/banded.pyx +22 -22
  49. biotite/sequence/align/cigar.py +2 -2
  50. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  51. biotite/sequence/align/kmeralphabet.pyx +2 -2
  52. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  53. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  54. biotite/sequence/align/kmertable.pyx +6 -6
  55. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  56. biotite/sequence/align/localgapped.pyx +47 -47
  57. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  58. biotite/sequence/align/localungapped.pyx +10 -10
  59. biotite/sequence/align/matrix.py +12 -3
  60. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  61. biotite/sequence/align/multiple.pyx +1 -2
  62. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  63. biotite/sequence/align/pairwise.pyx +37 -39
  64. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  65. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  66. biotite/sequence/align/selector.pyx +2 -2
  67. biotite/sequence/align/statistics.py +1 -1
  68. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  69. biotite/sequence/alphabet.py +2 -2
  70. biotite/sequence/annotation.py +19 -13
  71. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  72. biotite/sequence/codon.py +1 -2
  73. biotite/sequence/graphics/alignment.py +25 -39
  74. biotite/sequence/graphics/dendrogram.py +4 -2
  75. biotite/sequence/graphics/features.py +2 -2
  76. biotite/sequence/graphics/logo.py +10 -12
  77. biotite/sequence/io/fasta/convert.py +1 -2
  78. biotite/sequence/io/fasta/file.py +1 -1
  79. biotite/sequence/io/fastq/file.py +3 -3
  80. biotite/sequence/io/genbank/file.py +3 -3
  81. biotite/sequence/io/genbank/sequence.py +2 -0
  82. biotite/sequence/io/gff/convert.py +1 -1
  83. biotite/sequence/io/gff/file.py +1 -2
  84. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  85. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  86. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  87. biotite/sequence/profile.py +19 -25
  88. biotite/sequence/search.py +0 -1
  89. biotite/sequence/seqtypes.py +12 -5
  90. biotite/sequence/sequence.py +1 -2
  91. biotite/structure/__init__.py +2 -0
  92. biotite/structure/alphabet/i3d.py +1 -2
  93. biotite/structure/alphabet/pb.py +1 -2
  94. biotite/structure/alphabet/unkerasify.py +8 -2
  95. biotite/structure/atoms.py +35 -27
  96. biotite/structure/basepairs.py +39 -40
  97. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  98. biotite/structure/bonds.pyx +8 -5
  99. biotite/structure/box.py +159 -23
  100. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  101. biotite/structure/celllist.pyx +83 -68
  102. biotite/structure/chains.py +17 -55
  103. biotite/structure/charges.cpython-311-darwin.so +0 -0
  104. biotite/structure/compare.py +420 -13
  105. biotite/structure/density.py +1 -1
  106. biotite/structure/dotbracket.py +31 -32
  107. biotite/structure/filter.py +8 -8
  108. biotite/structure/geometry.py +15 -15
  109. biotite/structure/graphics/rna.py +19 -16
  110. biotite/structure/hbond.py +18 -21
  111. biotite/structure/info/atoms.py +11 -2
  112. biotite/structure/info/ccd.py +0 -2
  113. biotite/structure/info/components.bcif +0 -0
  114. biotite/structure/info/groups.py +0 -3
  115. biotite/structure/info/misc.py +0 -1
  116. biotite/structure/info/radii.py +92 -22
  117. biotite/structure/info/standardize.py +1 -2
  118. biotite/structure/integrity.py +4 -6
  119. biotite/structure/io/general.py +2 -2
  120. biotite/structure/io/gro/file.py +8 -9
  121. biotite/structure/io/mol/convert.py +1 -1
  122. biotite/structure/io/mol/ctab.py +33 -28
  123. biotite/structure/io/mol/mol.py +1 -1
  124. biotite/structure/io/mol/sdf.py +39 -13
  125. biotite/structure/io/pdb/convert.py +86 -5
  126. biotite/structure/io/pdb/file.py +90 -24
  127. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  128. biotite/structure/io/pdbqt/file.py +4 -4
  129. biotite/structure/io/pdbx/bcif.py +22 -7
  130. biotite/structure/io/pdbx/cif.py +20 -7
  131. biotite/structure/io/pdbx/component.py +6 -0
  132. biotite/structure/io/pdbx/compress.py +71 -34
  133. biotite/structure/io/pdbx/convert.py +429 -77
  134. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  135. biotite/structure/io/pdbx/encoding.pyx +39 -23
  136. biotite/structure/io/trajfile.py +9 -6
  137. biotite/structure/io/util.py +38 -0
  138. biotite/structure/mechanics.py +0 -1
  139. biotite/structure/molecules.py +0 -15
  140. biotite/structure/pseudoknots.py +13 -19
  141. biotite/structure/repair.py +2 -4
  142. biotite/structure/residues.py +20 -48
  143. biotite/structure/rings.py +335 -0
  144. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  145. biotite/structure/sasa.pyx +30 -30
  146. biotite/structure/segments.py +123 -9
  147. biotite/structure/sequence.py +0 -1
  148. biotite/structure/spacegroups.json +1567 -0
  149. biotite/structure/spacegroups.license +26 -0
  150. biotite/structure/sse.py +0 -2
  151. biotite/structure/superimpose.py +75 -253
  152. biotite/structure/tm.py +581 -0
  153. biotite/structure/transform.py +232 -26
  154. biotite/structure/util.py +3 -3
  155. biotite/version.py +9 -4
  156. biotite/visualize.py +111 -1
  157. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/METADATA +8 -36
  158. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/RECORD +160 -138
  159. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +3 -1
  160. {biotite-1.1.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -24,6 +24,7 @@ from biotite.structure.io.pdb.hybrid36 import (
24
24
  encode_hybrid36,
25
25
  max_hybrid36_number,
26
26
  )
27
+ from biotite.structure.io.util import number_of_integer_digits
27
28
  from biotite.structure.repair import infer_elements
28
29
  from biotite.structure.util import matrix_rotate
29
30
 
@@ -70,10 +71,10 @@ class PDBFile(TextFile):
70
71
  records cannot be written.
71
72
  Additionally, *REMARK* records can be read
72
73
 
73
- See also
74
+ See Also
74
75
  --------
75
- CIFFile
76
- BinaryCIFFile
76
+ CIFFile : Interface to CIF files, a modern replacement for PDB files.
77
+ BinaryCIFFile : Interface to BinaryCIF files, a binary variant of CIF files.
77
78
 
78
79
  Examples
79
80
  --------
@@ -597,7 +598,7 @@ class PDBFile(TextFile):
597
598
  The array or stack to be saved into this file. If a stack
598
599
  is given, each array in the stack is saved as separate
599
600
  model.
600
- hybrid36: bool, optional
601
+ hybrid36 : bool, optional
601
602
  Defines wether the file should be written in hybrid-36
602
603
  format.
603
604
 
@@ -894,7 +895,7 @@ class PDBFile(TextFile):
894
895
  if assembly_start_i is None:
895
896
  if assembly_id is None:
896
897
  raise InvalidFileError(
897
- "File does not contain transformation " "expressions for assemblies"
898
+ "File does not contain transformation expressions for assemblies"
898
899
  )
899
900
  else:
900
901
  raise KeyError(f"The assembly ID '{assembly_id}' is not found")
@@ -953,7 +954,7 @@ class PDBFile(TextFile):
953
954
 
954
955
  return assembly
955
956
 
956
- def get_symmetry_mates(
957
+ def get_unit_cell(
957
958
  self, model=None, altloc="first", extra_fields=[], include_bonds=False
958
959
  ):
959
960
  """
@@ -1020,7 +1021,7 @@ class PDBFile(TextFile):
1020
1021
 
1021
1022
  >>> import os.path
1022
1023
  >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
1023
- >>> atoms_in_unit_cell = file.get_symmetry_mates(model=1)
1024
+ >>> atoms_in_unit_cell = file.get_unit_cell(model=1)
1024
1025
  """
1025
1026
  # Get base structure
1026
1027
  structure = self.get_structure(
@@ -1040,6 +1041,83 @@ class PDBFile(TextFile):
1040
1041
  rotations, translations = _parse_transformations(transform_lines)
1041
1042
  return _apply_transformations(structure, rotations, translations)
1042
1043
 
1044
+ def get_symmetry_mates(
1045
+ self, model=None, altloc="first", extra_fields=[], include_bonds=False
1046
+ ):
1047
+ """
1048
+ Build a structure model containing all symmetric copies
1049
+ of the structure within a single unit cell, given by the space
1050
+ group.
1051
+
1052
+ This function receives the data from ``REMARK 290`` records in
1053
+ the file.
1054
+ Consequently, this remark must be present in the file, which is
1055
+ usually only true for crystal structures.
1056
+
1057
+ DEPRECATED: Use :meth:`get_unit_cell()` instead.
1058
+
1059
+ Parameters
1060
+ ----------
1061
+ model : int, optional
1062
+ If this parameter is given, the function will return an
1063
+ :class:`AtomArray` from the atoms corresponding to the given
1064
+ model number (starting at 1).
1065
+ Negative values are used to index models starting from the
1066
+ last model instead of the first model.
1067
+ If this parameter is omitted, an :class:`AtomArrayStack`
1068
+ containing all models will be returned, even if the
1069
+ structure contains only one model.
1070
+ altloc : {'first', 'occupancy', 'all'}
1071
+ This parameter defines how *altloc* IDs are handled:
1072
+ - ``'first'`` - Use atoms that have the first
1073
+ *altloc* ID appearing in a residue.
1074
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
1075
+ with the highest occupancy for a residue.
1076
+ - ``'all'`` - Use all atoms.
1077
+ Note that this leads to duplicate atoms.
1078
+ When this option is chosen, the ``altloc_id``
1079
+ annotation array is added to the returned structure.
1080
+ extra_fields : list of str, optional
1081
+ The strings in the list are optional annotation categories
1082
+ that should be stored in the output array or stack.
1083
+ These are valid values:
1084
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
1085
+ ``'charge'``.
1086
+ include_bonds : bool, optional
1087
+ If set to true, a :class:`BondList` will be created for the
1088
+ resulting :class:`AtomArray` containing the bond information
1089
+ from the file.
1090
+ Bonds, whose order could not be determined from the
1091
+ *Chemical Component Dictionary*
1092
+ (e.g. especially inter-residue bonds),
1093
+ have :attr:`BondType.ANY`, since the PDB format itself does
1094
+ not support bond orders.
1095
+
1096
+ Returns
1097
+ -------
1098
+ symmetry_mates : AtomArray or AtomArrayStack
1099
+ All atoms within a single unit cell.
1100
+ The return type depends on the `model` parameter.
1101
+
1102
+ Notes
1103
+ -----
1104
+ To expand the structure beyond a single unit cell, use
1105
+ :func:`repeat_box()` with the return value as its
1106
+ input.
1107
+
1108
+ Examples
1109
+ --------
1110
+
1111
+ >>> import os.path
1112
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
1113
+ >>> atoms_in_unit_cell = file.get_symmetry_mates(model=1)
1114
+ """
1115
+ warnings.warn(
1116
+ "'get_symmetry_mates()' is deprecated, use 'get_unit_cell()' instead",
1117
+ DeprecationWarning,
1118
+ )
1119
+ return self.get_unit_cell(model, altloc, extra_fields, include_bonds)
1120
+
1043
1121
  def _index_models_and_atoms(self):
1044
1122
  # Line indices where a new model starts
1045
1123
  self._model_start_i = np.array(
@@ -1106,7 +1184,7 @@ class PDBFile(TextFile):
1106
1184
  length = model_length
1107
1185
  if model_length != length:
1108
1186
  raise InvalidFileError(
1109
- f"Model {model_i+1} has {model_length} atoms, "
1187
+ f"Model {model_i + 1} has {model_length} atoms, "
1110
1188
  f"but model 1 has {length} atoms, must be equal"
1111
1189
  )
1112
1190
  return length
@@ -1248,21 +1326,21 @@ def _check_pdb_compatibility(array, hybrid36):
1248
1326
  if any([len(name) > 4 for name in array.atom_name]):
1249
1327
  raise BadStructureError("Some atom names exceed 4 characters")
1250
1328
  for i, coord_name in enumerate(["x", "y", "z"]):
1251
- n_coord_digits = _number_of_integer_digits(array.coord[..., i])
1329
+ n_coord_digits = number_of_integer_digits(array.coord[..., i])
1252
1330
  if n_coord_digits > 4:
1253
1331
  raise BadStructureError(
1254
1332
  f"4 pre-decimal columns for {coord_name}-coordinates are "
1255
1333
  f"available, but array would require {n_coord_digits}"
1256
1334
  )
1257
1335
  if "b_factor" in annot_categories:
1258
- n_b_factor_digits = _number_of_integer_digits(array.b_factor)
1336
+ n_b_factor_digits = number_of_integer_digits(array.b_factor)
1259
1337
  if n_b_factor_digits > 3:
1260
1338
  raise BadStructureError(
1261
1339
  "3 pre-decimal columns for B-factor are available, "
1262
1340
  f"but array would require {n_b_factor_digits}"
1263
1341
  )
1264
1342
  if "occupancy" in annot_categories:
1265
- n_occupancy_digits = _number_of_integer_digits(array.occupancy)
1343
+ n_occupancy_digits = number_of_integer_digits(array.occupancy)
1266
1344
  if n_occupancy_digits > 3:
1267
1345
  raise BadStructureError(
1268
1346
  "3 pre-decimal columns for occupancy are available, "
@@ -1270,21 +1348,9 @@ def _check_pdb_compatibility(array, hybrid36):
1270
1348
  )
1271
1349
  if "charge" in annot_categories:
1272
1350
  # The sign can be omitted is it is put into the adjacent column
1273
- n_charge_digits = _number_of_integer_digits(np.abs(array.charge))
1351
+ n_charge_digits = number_of_integer_digits(np.abs(array.charge))
1274
1352
  if n_charge_digits > 1:
1275
1353
  raise BadStructureError(
1276
1354
  "1 column for charge is available, "
1277
1355
  f"but array would require {n_charge_digits}"
1278
1356
  )
1279
-
1280
-
1281
- def _number_of_integer_digits(values):
1282
- """
1283
- Get the maximum number of characters needed to represent the
1284
- pre-decimal positions of the given numeric values.
1285
- """
1286
- values = values.astype(int, copy=False)
1287
- n_digits = 0
1288
- n_digits = max(n_digits, len(str(np.min(values))))
1289
- n_digits = max(n_digits, len(str(np.max(values))))
1290
- return n_digits
@@ -525,9 +525,9 @@ class PDBQTFile(TextFile):
525
525
  f"{atoms.chain_id[i]:1}"
526
526
  f"{atoms.res_id[i]:>4d}"
527
527
  f"{atoms.ins_code[i]:1} "
528
- f"{atoms.coord[i,0]:>8.3f}"
529
- f"{atoms.coord[i,1]:>8.3f}"
530
- f"{atoms.coord[i,2]:>8.3f}"
528
+ f"{atoms.coord[i, 0]:>8.3f}"
529
+ f"{atoms.coord[i, 1]:>8.3f}"
530
+ f"{atoms.coord[i, 2]:>8.3f}"
531
531
  f"{occupancy[i]:>6.2f}"
532
532
  f"{b_factor[i]:>6.2f} "
533
533
  f"{charges[i]:>6.3f} "
@@ -604,7 +604,7 @@ class PDBQTFile(TextFile):
604
604
  length = model_length
605
605
  if model_length != length:
606
606
  raise InvalidFileError(
607
- f"Model {model_i+1} has {model_length} atoms, "
607
+ f"Model {model_i + 1} has {model_length} atoms, "
608
608
  f"but model 1 has {length} atoms, must be equal"
609
609
  )
610
610
  return length
@@ -195,7 +195,7 @@ class BinaryCIFColumn(_Component):
195
195
  mask = BinaryCIFData(mask)
196
196
  if len(data) != len(mask):
197
197
  raise IndexError(
198
- f"Data has length {len(data)}, " f"but mask has length {len(mask)}"
198
+ f"Data has length {len(data)}, but mask has length {len(mask)}"
199
199
  )
200
200
  self._data = data
201
201
  self._mask = mask
@@ -256,6 +256,11 @@ class BinaryCIFColumn(_Component):
256
256
  ``MaskValue.INAPPLICABLE`` or ``MaskValue.MISSING``.
257
257
  By default, masked elements are converted to ``'.'`` or
258
258
  ``'?'`` depending on the :class:`MaskValue`.
259
+
260
+ Returns
261
+ -------
262
+ array : ndarray
263
+ The column data as array.
259
264
  """
260
265
  if dtype is None:
261
266
  dtype = self._data.array.dtype
@@ -341,12 +346,15 @@ class BinaryCIFCategory(_HierarchicalContainer):
341
346
  into a :class:`BinaryCIFColumn`).
342
347
  By default, an empty category is created.
343
348
  Each column must have the same length.
349
+ row_count : int, optional
350
+ The number of rows in the category.
344
351
 
345
352
  Attributes
346
353
  ----------
347
354
  row_count : int
348
355
  The number of rows in the category, i.e. the length of each
349
356
  column.
357
+ By default, the row count is determined when the first column is added.
350
358
 
351
359
  Examples
352
360
  --------
@@ -526,6 +534,19 @@ class BinaryCIFFile(File, _HierarchicalContainer):
526
534
  object, use the high-level :func:`get_structure()` or
527
535
  :func:`set_structure()` function respectively.
528
536
 
537
+ Parameters
538
+ ----------
539
+ blocks : dict (str -> BinaryCIFBlock), optional
540
+ The initial blocks of the file.
541
+ Maps the block names to the corresponding :class:`BinaryCIFBlock` objects.
542
+ By default no initial blocks are added.
543
+
544
+ Attributes
545
+ ----------
546
+ block : BinaryCIFBlock
547
+ The sole block of the file.
548
+ If the file contains multiple blocks, an exception is raised.
549
+
529
550
  Notes
530
551
  -----
531
552
  The content of *BinaryCIF* files are lazily deserialized:
@@ -534,12 +555,6 @@ class BinaryCIFFile(File, _HierarchicalContainer):
534
555
  The decoded :class:`BinaryCIFBlock`/:class:`BinaryCIFCategory`
535
556
  objects are cached for subsequent accesses.
536
557
 
537
- Attributes
538
- ----------
539
- block : BinaryCIFBlock
540
- The sole block of the file.
541
- If the file contains multiple blocks, an exception is raised.
542
-
543
558
  Examples
544
559
  --------
545
560
  Read a *BinaryCIF* file and access its content:
@@ -149,7 +149,7 @@ class CIFColumn:
149
149
  mask = CIFData(mask, np.uint8)
150
150
  if len(mask) != len(data):
151
151
  raise IndexError(
152
- f"Data has length {len(data)}, " f"but mask has length {len(mask)}"
152
+ f"Data has length {len(data)}, but mask has length {len(mask)}"
153
153
  )
154
154
  self._data = data
155
155
  self._mask = mask
@@ -215,6 +215,11 @@ class CIFColumn:
215
215
  ``MaskValue.INAPPLICABLE`` or ``MaskValue.MISSING``.
216
216
  By default, masked elements are converted to ``'.'`` or
217
217
  ``'?'`` depending on the :class:`MaskValue`.
218
+
219
+ Returns
220
+ -------
221
+ array : ndarray
222
+ The column data as array.
218
223
  """
219
224
  if self._mask is None:
220
225
  return self._data.array.astype(dtype, copy=False)
@@ -721,6 +726,19 @@ class CIFFile(_Component, File, MutableMapping):
721
726
  use the high-level :func:`get_structure()` or
722
727
  :func:`set_structure()` function respectively.
723
728
 
729
+ Parameters
730
+ ----------
731
+ blocks : dict (str -> CIFBlock), optional
732
+ The initial blocks of the file.
733
+ Maps the block names to the corresponding :class:`CIFBlock` objects.
734
+ By default no initial blocks are added.
735
+
736
+ Attributes
737
+ ----------
738
+ block : CIFBlock
739
+ The sole block of the file.
740
+ If the file contains multiple blocks, an exception is raised.
741
+
724
742
  Notes
725
743
  -----
726
744
  The content of CIF files are lazily deserialized:
@@ -731,12 +749,6 @@ class CIFFile(_Component, File, MutableMapping):
731
749
  The deserialized :class:`CIFBlock`/:class:`CIFCategory` objects
732
750
  are cached for subsequent accesses.
733
751
 
734
- Attributes
735
- ----------
736
- block : CIFBlock
737
- The sole block of the file.
738
- If the file contains multiple blocks, an exception is raised.
739
-
740
752
  Examples
741
753
  --------
742
754
  Read a CIF file and access its content:
@@ -884,6 +896,7 @@ class CIFFile(_Component, File, MutableMapping):
884
896
  block = CIFBlock.deserialize(block)
885
897
  except Exception:
886
898
  raise DeserializationError(f"Failed to deserialize block '{key}'")
899
+ block.name = key
887
900
  # Update with deserialized object
888
901
  self._blocks[key] = block
889
902
  return block
@@ -120,6 +120,12 @@ class _HierarchicalContainer(_Component, MutableMapping, metaclass=ABCMeta):
120
120
  A component is only deserialized from the serialized data, if it
121
121
  is accessed.
122
122
  The deserialized component is then cached in the container.
123
+
124
+ Parameters
125
+ ----------
126
+ elements : dict, optional
127
+ The initial elements of the container.
128
+ By default no initial elements are added.
123
129
  """
124
130
 
125
131
  def __init__(self, elements=None):
@@ -3,6 +3,7 @@ __name__ = "biotite.structure.io.pdbx"
3
3
  __author__ = "Patrick Kunzmann"
4
4
 
5
5
  import itertools
6
+ import warnings
6
7
  import msgpack
7
8
  import numpy as np
8
9
  import biotite.structure.io.pdbx.bcif as bcif
@@ -17,7 +18,7 @@ from biotite.structure.io.pdbx.encoding import (
17
18
  )
18
19
 
19
20
 
20
- def compress(data, float_tolerance=1e-6):
21
+ def compress(data, float_tolerance=None, rtol=1e-6, atol=1e-4):
21
22
  """
22
23
  Try to reduce the size of a *BinaryCIF* file (or block, category, etc.) by testing
23
24
  different data encodings for each data array and selecting the one, which results in
@@ -27,6 +28,14 @@ def compress(data, float_tolerance=1e-6):
27
28
  ----------
28
29
  data : BinaryCIFFile or BinaryCIFBlock or BinaryCIFCategory or BinaryCIFColumn or BinaryCIFData
29
30
  The data to compress.
31
+ float_tolerance : float, optional
32
+ The relative error that is accepted when compressing floating point numbers.
33
+ DEPRECATED: Use `rtol` instead.
34
+ rtol, atol : float, optional
35
+ The compression factor of floating point numbers is chosen such that
36
+ either the relative (`rtol`) or absolute (`atol`) tolerance is fulfilled
37
+ for each value, i.e. the difference between the compressed and uncompressed
38
+ value is smaller than the tolerance.
30
39
 
31
40
  Returns
32
41
  -------
@@ -35,8 +44,6 @@ def compress(data, float_tolerance=1e-6):
35
44
  If no improved compression is found for a :class:`BinaryCIFData` array,
36
45
  the input data is kept.
37
46
  Hence, the return value is no deep copy of the input data.
38
- float_tolerance : float, optional
39
- The relative error that is accepted when compressing floating point numbers.
40
47
 
41
48
  Examples
42
49
  --------
@@ -58,55 +65,70 @@ def compress(data, float_tolerance=1e-6):
58
65
  >>> print(f"{len(compressed_file.read()) // 1000} KB")
59
66
  111 KB
60
67
  """
68
+ if float_tolerance is not None:
69
+ warnings.warn(
70
+ "The 'float_tolerance' parameter is deprecated, use 'rtol' instead",
71
+ DeprecationWarning,
72
+ )
73
+
61
74
  match type(data):
62
75
  case bcif.BinaryCIFFile:
63
- return _compress_file(data, float_tolerance)
76
+ return _compress_file(data, rtol, atol)
64
77
  case bcif.BinaryCIFBlock:
65
- return _compress_block(data, float_tolerance)
78
+ return _compress_block(data, rtol, atol)
66
79
  case bcif.BinaryCIFCategory:
67
- return _compress_category(data, float_tolerance)
80
+ return _compress_category(data, rtol, atol)
68
81
  case bcif.BinaryCIFColumn:
69
- return _compress_column(data, float_tolerance)
82
+ return _compress_column(data, rtol, atol)
70
83
  case bcif.BinaryCIFData:
71
- return _compress_data(data, float_tolerance)
84
+ return _compress_data(data, rtol, atol)
72
85
  case _:
73
86
  raise TypeError(f"Unsupported type {type(data).__name__}")
74
87
 
75
88
 
76
- def _compress_file(bcif_file, float_tolerance):
89
+ def _compress_file(bcif_file, rtol, atol):
77
90
  compressed_file = bcif.BinaryCIFFile()
78
91
  for block_name, bcif_block in bcif_file.items():
79
- compressed_block = _compress_block(bcif_block, float_tolerance)
92
+ try:
93
+ compressed_block = _compress_block(bcif_block, rtol, atol)
94
+ except Exception:
95
+ raise ValueError(f"Failed to compress block '{block_name}'")
80
96
  compressed_file[block_name] = compressed_block
81
97
  return compressed_file
82
98
 
83
99
 
84
- def _compress_block(bcif_block, float_tolerance):
100
+ def _compress_block(bcif_block, rtol, atol):
85
101
  compressed_block = bcif.BinaryCIFBlock()
86
102
  for category_name, bcif_category in bcif_block.items():
87
- compressed_category = _compress_category(bcif_category, float_tolerance)
103
+ try:
104
+ compressed_category = _compress_category(bcif_category, rtol, atol)
105
+ except Exception:
106
+ raise ValueError(f"Failed to compress category '{category_name}'")
88
107
  compressed_block[category_name] = compressed_category
89
108
  return compressed_block
90
109
 
91
110
 
92
- def _compress_category(bcif_category, float_tolerance):
111
+ def _compress_category(bcif_category, rtol, atol):
93
112
  compressed_category = bcif.BinaryCIFCategory()
94
113
  for column_name, bcif_column in bcif_category.items():
95
- compressed_column = _compress_column(bcif_column, float_tolerance)
114
+ try:
115
+ compressed_column = _compress_column(bcif_column, rtol, atol)
116
+ except Exception:
117
+ raise ValueError(f"Failed to compress column '{column_name}'")
96
118
  compressed_category[column_name] = compressed_column
97
119
  return compressed_category
98
120
 
99
121
 
100
- def _compress_column(bcif_column, float_tolerance):
101
- data = _compress_data(bcif_column.data, float_tolerance)
122
+ def _compress_column(bcif_column, rtol, atol):
123
+ data = _compress_data(bcif_column.data, rtol, atol)
102
124
  if bcif_column.mask is not None:
103
- mask = _compress_data(bcif_column.mask, float_tolerance)
125
+ mask = _compress_data(bcif_column.mask, rtol, atol)
104
126
  else:
105
127
  mask = None
106
128
  return bcif.BinaryCIFColumn(data, mask)
107
129
 
108
130
 
109
- def _compress_data(bcif_data, float_tolerance):
131
+ def _compress_data(bcif_data, rtol, atol):
110
132
  array = bcif_data.array
111
133
  if len(array) == 1:
112
134
  # No need to compress a single value -> Use default uncompressed encoding
@@ -123,16 +145,28 @@ def _compress_data(bcif_data, float_tolerance):
123
145
  return bcif.BinaryCIFData(array, [encoding])
124
146
 
125
147
  elif np.issubdtype(array.dtype, np.floating):
148
+ if not np.isfinite(array).all():
149
+ # NaN/inf values cannot be represented by integers
150
+ # -> do not use integer encoding
151
+ return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
126
152
  to_integer_encoding = FixedPointEncoding(
127
- 10 ** _get_decimal_places(array, float_tolerance)
153
+ 10 ** _get_decimal_places(array, rtol, atol)
128
154
  )
129
- integer_array = to_integer_encoding.encode(array)
130
- best_encoding, size_compressed = _find_best_integer_compression(integer_array)
131
- if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
132
- return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
133
- else:
134
- # The float array is smaller -> encode it directly as bytes
155
+ try:
156
+ integer_array = to_integer_encoding.encode(array)
157
+ except ValueError:
158
+ # With the given tolerances integer underflow/overflow would occur
159
+ # -> do not use integer encoding
135
160
  return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
161
+ else:
162
+ best_encoding, size_compressed = _find_best_integer_compression(
163
+ integer_array
164
+ )
165
+ if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
166
+ return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
167
+ else:
168
+ # The float array is smaller -> encode it directly as bytes
169
+ return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
136
170
 
137
171
  elif np.issubdtype(array.dtype, np.integer):
138
172
  array = _to_smallest_integer_type(array)
@@ -273,7 +307,7 @@ def _data_size_in_file(data):
273
307
  return len(bytes_in_file)
274
308
 
275
309
 
276
- def _get_decimal_places(array, tol):
310
+ def _get_decimal_places(array, rtol, atol):
277
311
  """
278
312
  Get the number of decimal places in a floating point array.
279
313
 
@@ -281,21 +315,24 @@ def _get_decimal_places(array, tol):
281
315
  ----------
282
316
  array : numpy.ndarray
283
317
  The array to analyze.
284
- tol : float, optional
285
- The relative tolerance allowed when the values are cut off after the returned
286
- number of decimal places.
318
+ rtol, atol : float, optional
319
+ The relative and absolute tolerance allowed when the values are cut off after
320
+ the returned number of decimal places.
287
321
 
288
322
  Returns
289
323
  -------
290
324
  decimals : int
291
325
  The number of decimal places.
292
326
  """
293
- # Decimals of NaN or infinite values do not make sense
294
- # and 0 would give NaN when rounding on decimals
295
- array = array[np.isfinite(array) & (array != 0)]
296
- for decimals in itertools.count(start=-_order_magnitude(array)):
327
+ if rtol <= 0 and atol <= 0:
328
+ raise ValueError("At least one of 'rtol' and 'atol' must be greater than 0")
329
+ # 0 would give NaN when rounding on decimals
330
+ array = array[array != 0]
331
+ for decimals in itertools.count(start=min(0, -_order_magnitude(array))):
297
332
  error = np.abs(np.round(array, decimals) - array)
298
- if np.all(error < tol * np.abs(array)):
333
+ if decimals == 100:
334
+ raise
335
+ if np.all((error < rtol * np.abs(array)) | (error < atol)):
299
336
  return decimals
300
337
 
301
338