biotite 0.40.0__cp311-cp311-win_amd64.whl → 0.41.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (90) hide show
  1. biotite/__init__.py +1 -1
  2. biotite/database/pubchem/download.py +23 -23
  3. biotite/database/pubchem/query.py +7 -7
  4. biotite/file.py +17 -9
  5. biotite/sequence/align/banded.c +119 -119
  6. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  7. biotite/sequence/align/cigar.py +60 -15
  8. biotite/sequence/align/kmeralphabet.c +119 -119
  9. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  10. biotite/sequence/align/kmersimilarity.c +119 -119
  11. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  12. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  13. biotite/sequence/align/kmertable.cpp +119 -119
  14. biotite/sequence/align/localgapped.c +119 -119
  15. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  16. biotite/sequence/align/localungapped.c +119 -119
  17. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  18. biotite/sequence/align/multiple.c +119 -119
  19. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  20. biotite/sequence/align/pairwise.c +119 -119
  21. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  22. biotite/sequence/align/permutation.c +119 -119
  23. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  24. biotite/sequence/align/selector.c +119 -119
  25. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  26. biotite/sequence/align/tracetable.c +119 -119
  27. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  28. biotite/sequence/annotation.py +2 -2
  29. biotite/sequence/codec.c +119 -119
  30. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  31. biotite/sequence/io/fasta/convert.py +27 -24
  32. biotite/sequence/phylo/nj.c +119 -119
  33. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  34. biotite/sequence/phylo/tree.c +119 -119
  35. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  36. biotite/sequence/phylo/upgma.c +119 -119
  37. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  38. biotite/structure/__init__.py +2 -0
  39. biotite/structure/bonds.c +1124 -915
  40. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  41. biotite/structure/celllist.c +119 -119
  42. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  43. biotite/structure/charges.c +119 -119
  44. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  45. biotite/structure/dotbracket.py +2 -0
  46. biotite/structure/info/atoms.py +6 -1
  47. biotite/structure/info/bonds.py +1 -1
  48. biotite/structure/info/ccd/amino_acids.txt +17 -0
  49. biotite/structure/info/ccd/carbohydrates.txt +2 -0
  50. biotite/structure/info/ccd/components.bcif +0 -0
  51. biotite/structure/info/ccd/nucleotides.txt +1 -0
  52. biotite/structure/info/misc.py +69 -5
  53. biotite/structure/integrity.py +19 -70
  54. biotite/structure/io/ctab.py +12 -106
  55. biotite/structure/io/general.py +157 -165
  56. biotite/structure/io/gro/file.py +16 -16
  57. biotite/structure/io/mmtf/convertarray.c +119 -119
  58. biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
  59. biotite/structure/io/mmtf/convertfile.c +119 -119
  60. biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
  61. biotite/structure/io/mmtf/decode.c +119 -119
  62. biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
  63. biotite/structure/io/mmtf/encode.c +119 -119
  64. biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
  65. biotite/structure/io/mol/__init__.py +4 -2
  66. biotite/structure/io/mol/convert.py +71 -7
  67. biotite/structure/io/mol/ctab.py +414 -0
  68. biotite/structure/io/mol/header.py +116 -0
  69. biotite/structure/io/mol/{file.py → mol.py} +69 -82
  70. biotite/structure/io/mol/sdf.py +909 -0
  71. biotite/structure/io/pdb/file.py +84 -31
  72. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  73. biotite/structure/io/pdbx/__init__.py +0 -1
  74. biotite/structure/io/pdbx/bcif.py +2 -3
  75. biotite/structure/io/pdbx/cif.py +9 -5
  76. biotite/structure/io/pdbx/component.py +4 -1
  77. biotite/structure/io/pdbx/convert.py +203 -79
  78. biotite/structure/io/pdbx/encoding.c +119 -119
  79. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  80. biotite/structure/repair.py +253 -0
  81. biotite/structure/sasa.c +119 -119
  82. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  83. biotite/structure/sequence.py +112 -0
  84. biotite/structure/superimpose.py +472 -13
  85. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/METADATA +2 -2
  86. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/RECORD +89 -85
  87. biotite/structure/io/pdbx/error.py +0 -14
  88. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
  89. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +0 -0
  90. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ from ...atoms import AtomArray, AtomArrayStack, repeat
12
12
  from ...bonds import BondList, connect_via_residue_names
13
13
  from ...box import vectors_from_unitcell, unitcell_from_vectors
14
14
  from ....file import TextFile, InvalidFileError
15
- from ..general import _guess_element as guess_element
15
+ from ...repair import infer_elements
16
16
  from ...error import BadStructureError
17
17
  from ...filter import (
18
18
  filter_first_altloc,
@@ -23,6 +23,9 @@ from ...util import matrix_rotate
23
23
  from .hybrid36 import encode_hybrid36, decode_hybrid36, max_hybrid36_number
24
24
 
25
25
 
26
+ _PDB_MAX_ATOMS = 99999
27
+ _PDB_MAX_RESIDUES = 9999
28
+
26
29
  # slice objects for readability
27
30
  # ATOM/HETATM
28
31
  _record = slice(0, 6)
@@ -460,15 +463,14 @@ class PDBFile(TextFile):
460
463
 
461
464
  # Replace empty strings for elements with guessed types
462
465
  # This is used e.g. for PDB files created by Gromacs
463
- if "" in array.element:
464
- rep_num = 0
465
- for idx in range(len(array.element)):
466
- if not array.element[idx]:
467
- atom_name = array.atom_name[idx]
468
- array.element[idx] = guess_element(atom_name)
469
- rep_num += 1
466
+ empty_element_mask = array.element == ""
467
+ if empty_element_mask.any():
470
468
  warnings.warn(
471
- "{} elements were guessed from atom_name.".format(rep_num)
469
+ f"{np.count_nonzero(empty_element_mask)} elements "
470
+ "were guessed from atom name"
471
+ )
472
+ array.element[empty_element_mask] = infer_elements(
473
+ array.atom_name[empty_element_mask]
472
474
  )
473
475
 
474
476
  # Fill in coordinates
@@ -574,6 +576,8 @@ class PDBFile(TextFile):
574
576
  records are also written for all non-water hetero residues
575
577
  and all inter-residue connections.
576
578
  """
579
+ _check_pdb_compatibility(array, hybrid36)
580
+
577
581
  natoms = array.array_length()
578
582
  annot_categories = array.get_annotation_categories()
579
583
  record = np.char.array(np.where(array.hetero, "HETATM", "ATOM"))
@@ -599,25 +603,6 @@ class PDBFile(TextFile):
599
603
  else:
600
604
  charge = np.char.array(np.full(natoms, " ", dtype="U2"))
601
605
 
602
- # Do checks on atom array (stack)
603
- if hybrid36:
604
- max_atoms = max_hybrid36_number(5)
605
- max_residues = max_hybrid36_number(4)
606
- else:
607
- max_atoms, max_residues = 99999, 9999
608
- if array.array_length() > max_atoms:
609
- warnings.warn(f"More then {max_atoms:,} atoms per model")
610
- if (array.res_id > max_residues).any():
611
- warnings.warn(f"Residue IDs exceed {max_residues:,}")
612
- if np.isnan(array.coord).any():
613
- raise BadStructureError("Coordinates contain 'NaN' values")
614
- if any([len(name) > 1 for name in array.chain_id]):
615
- raise BadStructureError("Some chain IDs exceed 1 character")
616
- if any([len(name) > 3 for name in array.res_name]):
617
- raise BadStructureError("Some residue names exceed 3 characters")
618
- if any([len(name) > 4 for name in array.atom_name]):
619
- raise BadStructureError("Some atom names exceed 4 characters")
620
-
621
606
  if hybrid36:
622
607
  pdb_atom_id = np.char.array(
623
608
  [encode_hybrid36(i, 5) for i in atom_id]
@@ -630,14 +615,14 @@ class PDBFile(TextFile):
630
615
  # but negative IDs are also possible
631
616
  pdb_atom_id = np.char.array(np.where(
632
617
  atom_id > 0,
633
- ((atom_id - 1) % 99999) + 1,
618
+ ((atom_id - 1) % _PDB_MAX_ATOMS) + 1,
634
619
  atom_id
635
620
  ).astype(str))
636
621
  # Residue IDs are supported up to 9999,
637
622
  # but negative IDs are also possible
638
623
  pdb_res_id = np.char.array(np.where(
639
624
  array.res_id > 0,
640
- ((array.res_id - 1) % 9999) + 1,
625
+ ((array.res_id - 1) % _PDB_MAX_RESIDUES) + 1,
641
626
  array.res_id
642
627
  ).astype(str))
643
628
 
@@ -1184,4 +1169,72 @@ def _apply_transformations(structure, rotations, translations):
1184
1169
  coord += translation
1185
1170
  assembly_coord[i] = coord
1186
1171
 
1187
- return repeat(structure, assembly_coord)
1172
+ return repeat(structure, assembly_coord)
1173
+
1174
+
1175
+ def _check_pdb_compatibility(array, hybrid36):
1176
+ annot_categories = array.get_annotation_categories()
1177
+
1178
+ if hybrid36:
1179
+ max_atoms = max_hybrid36_number(5)
1180
+ max_residues = max_hybrid36_number(4)
1181
+ else:
1182
+ max_atoms, max_residues = _PDB_MAX_ATOMS, _PDB_MAX_RESIDUES
1183
+ if "atom_id" in annot_categories:
1184
+ max_atom_id = np.max(array.atom_id)
1185
+ else:
1186
+ max_atom_id = array.array_length()
1187
+
1188
+ if max_atom_id > max_atoms:
1189
+ warnings.warn(f"Atom IDs exceed {max_atoms:,}, will be wrapped")
1190
+ if (array.res_id > max_residues).any():
1191
+ warnings.warn(f"Residue IDs exceed {max_residues:,}, will be wrapped")
1192
+ if np.isnan(array.coord).any():
1193
+ raise BadStructureError("Coordinates contain 'NaN' values")
1194
+ if any([len(name) > 1 for name in array.chain_id]):
1195
+ raise BadStructureError("Some chain IDs exceed 1 character")
1196
+ if any([len(name) > 3 for name in array.res_name]):
1197
+ raise BadStructureError("Some residue names exceed 3 characters")
1198
+ if any([len(name) > 4 for name in array.atom_name]):
1199
+ raise BadStructureError("Some atom names exceed 4 characters")
1200
+ for i, coord_name in enumerate(["x", "y", "z"]):
1201
+ n_coord_digits = _number_of_integer_digits(array.coord[..., i])
1202
+ if n_coord_digits > 4:
1203
+ raise BadStructureError(
1204
+ f"4 pre-decimal columns for {coord_name}-coordinates are "
1205
+ f"available, but array would require {n_coord_digits}"
1206
+ )
1207
+ if "b_factor" in annot_categories:
1208
+ n_b_factor_digits = _number_of_integer_digits(array.b_factor)
1209
+ if n_b_factor_digits > 3:
1210
+ raise BadStructureError(
1211
+ "3 pre-decimal columns for B-factor are available, "
1212
+ f"but array would require {n_b_factor_digits}"
1213
+ )
1214
+ if "occupancy" in annot_categories:
1215
+ n_occupancy_digits = _number_of_integer_digits(array.occupancy)
1216
+ if n_occupancy_digits > 3:
1217
+ raise BadStructureError(
1218
+ "3 pre-decimal columns for occupancy are available, "
1219
+ f"but array would require {n_occupancy_digits}"
1220
+ )
1221
+ if "charge" in annot_categories:
1222
+ # The sign can be omitted is it is put into the adjacent column
1223
+ n_charge_digits = _number_of_integer_digits(np.abs(array.charge))
1224
+ if n_charge_digits > 1:
1225
+ raise BadStructureError(
1226
+ "1 column for charge is available, "
1227
+ f"but array would require {n_charge_digits}"
1228
+ )
1229
+
1230
+
1231
+ def _number_of_integer_digits(values):
1232
+ """
1233
+ Get the maximum number of characters needed to represent the
1234
+ pre-decimal positions of the given numeric values.
1235
+ """
1236
+ values = values.astype(int, copy=False)
1237
+ n_digits = 0
1238
+ n_digits = max(n_digits, len(str(np.min(values))))
1239
+ n_digits = max(n_digits, len(str(np.max(values))))
1240
+ return n_digits
@@ -20,5 +20,4 @@ from .bcif import *
20
20
  from .cif import *
21
21
  from .component import *
22
22
  from .encoding import *
23
- from .error import *
24
23
  from .legacy import *
@@ -12,9 +12,8 @@ import numpy as np
12
12
  import msgpack
13
13
  from .component import _Component, _HierarchicalContainer, MaskValue
14
14
  from .encoding import decode_stepwise, encode_stepwise, deserialize_encoding, \
15
- create_uncompressed_encoding, ByteArrayEncoding
16
- from .error import SerializationError
17
- from ....file import File, is_binary, is_open_compatible
15
+ create_uncompressed_encoding
16
+ from ....file import File, is_binary, is_open_compatible, SerializationError
18
17
 
19
18
 
20
19
  class BinaryCIFData(_Component):
@@ -11,8 +11,8 @@ import shlex
11
11
  from collections.abc import MutableMapping, Sequence
12
12
  import numpy as np
13
13
  from .component import _Component, MaskValue
14
- from .error import DeserializationError, SerializationError
15
- from ....file import File, is_open_compatible, is_text
14
+ from ....file import File, is_open_compatible, is_text, DeserializationError, \
15
+ SerializationError
16
16
 
17
17
 
18
18
  UNICODE_CHAR_SIZE = 4
@@ -402,7 +402,9 @@ class CIFCategory(_Component, MutableMapping):
402
402
  f"while the first column has row_count {self._row_count}"
403
403
  )
404
404
 
405
- if self._row_count == 1:
405
+ if self._row_count == 0:
406
+ raise ValueError("At least one row is required")
407
+ elif self._row_count == 1:
406
408
  lines = self._serialize_single()
407
409
  else:
408
410
  lines = self._serialize_looped()
@@ -766,7 +768,7 @@ class CIFFile(_Component, File, MutableMapping):
766
768
 
767
769
  @property
768
770
  def lines(self):
769
- return "\n".join(self.serialize())
771
+ return self.serialize().splitlines()
770
772
 
771
773
  @property
772
774
  def block(self):
@@ -978,7 +980,7 @@ def _to_single(lines, is_looped):
978
980
  processed_lines[out_i - 1] += " " + shlex.quote(multi_line_str)
979
981
  in_i = j + 1
980
982
 
981
- elif not is_looped and lines[in_i][0] in ["'", '"']:
983
+ elif not is_looped and lines[in_i][0] != "_":
982
984
  # Singleline value in the line after the corresponding key
983
985
  processed_lines[out_i - 1] += " " + lines[in_i]
984
986
  in_i += 1
@@ -1025,4 +1027,6 @@ def _multiline(value):
1025
1027
  def _arrayfy(data):
1026
1028
  if not isinstance(data, (Sequence, np.ndarray)) or isinstance(data, str):
1027
1029
  data = [data]
1030
+ elif len(data) == 0:
1031
+ raise ValueError("Array must contain at least one element")
1028
1032
  return np.asarray(data)
@@ -14,7 +14,7 @@ __all__ = ["MaskValue"]
14
14
  from enum import IntEnum
15
15
  from abc import ABCMeta, abstractmethod
16
16
  from collections.abc import MutableMapping
17
- from .error import SerializationError, DeserializationError
17
+ from ....file import SerializationError, DeserializationError
18
18
 
19
19
 
20
20
  class MaskValue(IntEnum):
@@ -105,6 +105,9 @@ class _Component(metaclass=ABCMeta):
105
105
  """
106
106
  raise NotImplementedError()
107
107
 
108
+ def __str__(self):
109
+ return str(self.serialize())
110
+
108
111
 
109
112
  class _HierarchicalContainer(_Component, MutableMapping,
110
113
  metaclass=ABCMeta):