biotite 1.0.0__cp311-cp311-macosx_11_0_arm64.whl → 1.1.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (92) hide show
  1. biotite/application/dssp/app.py +13 -3
  2. biotite/application/localapp.py +34 -0
  3. biotite/application/muscle/app3.py +2 -15
  4. biotite/application/muscle/app5.py +2 -2
  5. biotite/application/util.py +1 -1
  6. biotite/application/viennarna/rnaplot.py +6 -2
  7. biotite/database/rcsb/query.py +6 -6
  8. biotite/database/uniprot/check.py +20 -15
  9. biotite/database/uniprot/download.py +1 -1
  10. biotite/database/uniprot/query.py +1 -1
  11. biotite/sequence/align/alignment.py +16 -3
  12. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  13. biotite/sequence/align/banded.pyx +5 -5
  14. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  15. biotite/sequence/align/kmeralphabet.pyx +17 -0
  16. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  17. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  18. biotite/sequence/align/kmertable.pyx +52 -42
  19. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  20. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  21. biotite/sequence/align/matrix.py +273 -55
  22. biotite/sequence/align/matrix_data/3Di.mat +24 -0
  23. biotite/sequence/align/matrix_data/PB.license +21 -0
  24. biotite/sequence/align/matrix_data/PB.mat +18 -0
  25. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  26. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  27. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  28. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  29. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  30. biotite/sequence/alphabet.py +3 -0
  31. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  32. biotite/sequence/graphics/color_schemes/3di_flower.json +48 -0
  33. biotite/sequence/graphics/color_schemes/pb_flower.json +2 -1
  34. biotite/sequence/graphics/colorschemes.py +44 -11
  35. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  36. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  37. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  38. biotite/sequence/profile.py +86 -4
  39. biotite/sequence/seqtypes.py +124 -3
  40. biotite/setup_ccd.py +197 -0
  41. biotite/structure/__init__.py +4 -3
  42. biotite/structure/alphabet/__init__.py +25 -0
  43. biotite/structure/alphabet/encoder.py +332 -0
  44. biotite/structure/alphabet/encoder_weights_3di.kerasify +0 -0
  45. biotite/structure/alphabet/i3d.py +110 -0
  46. biotite/structure/alphabet/layers.py +86 -0
  47. biotite/structure/alphabet/pb.license +21 -0
  48. biotite/structure/alphabet/pb.py +171 -0
  49. biotite/structure/alphabet/unkerasify.py +122 -0
  50. biotite/structure/atoms.py +156 -43
  51. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  52. biotite/structure/bonds.pyx +72 -21
  53. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  54. biotite/structure/charges.cpython-311-darwin.so +0 -0
  55. biotite/structure/filter.py +1 -1
  56. biotite/structure/geometry.py +60 -113
  57. biotite/structure/info/__init__.py +1 -0
  58. biotite/structure/info/atoms.py +13 -13
  59. biotite/structure/info/bonds.py +12 -6
  60. biotite/structure/info/ccd.py +125 -32
  61. biotite/structure/info/{ccd/components.bcif → components.bcif} +0 -0
  62. biotite/structure/info/groups.py +63 -17
  63. biotite/structure/info/masses.py +9 -6
  64. biotite/structure/info/misc.py +15 -21
  65. biotite/structure/info/standardize.py +3 -2
  66. biotite/structure/io/mol/sdf.py +41 -40
  67. biotite/structure/io/pdb/convert.py +2 -0
  68. biotite/structure/io/pdb/file.py +74 -3
  69. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  70. biotite/structure/io/pdbqt/file.py +32 -32
  71. biotite/structure/io/pdbx/__init__.py +1 -0
  72. biotite/structure/io/pdbx/bcif.py +32 -8
  73. biotite/structure/io/pdbx/cif.py +148 -107
  74. biotite/structure/io/pdbx/component.py +9 -4
  75. biotite/structure/io/pdbx/compress.py +321 -0
  76. biotite/structure/io/pdbx/convert.py +227 -68
  77. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  78. biotite/structure/io/pdbx/encoding.pyx +98 -17
  79. biotite/structure/io/trajfile.py +16 -16
  80. biotite/structure/molecules.py +141 -141
  81. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  82. biotite/structure/segments.py +1 -2
  83. biotite/structure/util.py +73 -1
  84. biotite/version.py +2 -2
  85. {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/METADATA +4 -1
  86. {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/RECORD +88 -78
  87. biotite/structure/info/ccd/README.rst +0 -8
  88. biotite/structure/info/ccd/amino_acids.txt +0 -1663
  89. biotite/structure/info/ccd/carbohydrates.txt +0 -1135
  90. biotite/structure/info/ccd/nucleotides.txt +0 -798
  91. {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/WHEEL +0 -0
  92. {biotite-1.0.0.dist-info → biotite-1.1.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -287,7 +287,8 @@ class FixedPointEncoding(Encoding):
287
287
  The data type of the array to be encoded.
288
288
  Either a NumPy dtype or a *BinaryCIF* type code is accepted.
289
289
  The dtype must be a float type.
290
- If omitted, 32-bit floats are assumed.
290
+ If omitted, the data type is taken from the data the
291
+ first time :meth:`encode()` is called.
291
292
 
292
293
  Attributes
293
294
  ----------
@@ -304,7 +305,7 @@ class FixedPointEncoding(Encoding):
304
305
  [987 654]
305
306
  """
306
307
  factor: ...
307
- src_type: ... = TypeCode.FLOAT32
308
+ src_type: ... = None
308
309
 
309
310
  def __post_init__(self):
310
311
  if self.src_type is not None:
@@ -315,6 +316,14 @@ class FixedPointEncoding(Encoding):
315
316
  )
316
317
 
317
318
  def encode(self, data):
319
+ # If not given in constructor, it is determined from the data
320
+ if self.src_type is None:
321
+ self.src_type = TypeCode.from_dtype(data.dtype)
322
+ if self.src_type not in (TypeCode.FLOAT32, TypeCode.FLOAT64):
323
+ raise ValueError(
324
+ "Only floating point types are supported"
325
+ )
326
+
318
327
  # Round to avoid wrong values due to floating point inaccuracies
319
328
  return np.round(data * self.factor).astype(np.int32)
320
329
 
@@ -340,7 +349,8 @@ class IntervalQuantizationEncoding(Encoding):
340
349
  The data type of the array to be encoded.
341
350
  Either a NumPy dtype or a *BinaryCIF* type code is accepted.
342
351
  The dtype must be a float type.
343
- If omitted, 32-bit floats are assumed.
352
+ If omitted, the data type is taken from the data the
353
+ first time :meth:`encode()` is called.
344
354
 
345
355
  Attributes
346
356
  ----------
@@ -367,13 +377,17 @@ class IntervalQuantizationEncoding(Encoding):
367
377
  min: ...
368
378
  max: ...
369
379
  num_steps: ...
370
- src_type: ... = TypeCode.FLOAT32
380
+ src_type: ... = None
371
381
 
372
382
  def __post_init__(self):
373
383
  if self.src_type is not None:
374
384
  self.src_type = TypeCode.from_dtype(self.src_type)
375
385
 
376
386
  def encode(self, data):
387
+ # If not given in constructor, it is determined from the data
388
+ if self.src_type is None:
389
+ self.src_type = TypeCode.from_dtype(data.dtype)
390
+
377
391
  steps = np.linspace(
378
392
  self.min, self.max, self.num_steps, dtype=data.dtype
379
393
  )
@@ -524,7 +538,8 @@ class DeltaEncoding(Encoding):
524
538
  first time :meth:`encode()` is called.
525
539
  origin : int, optional
526
540
  The starting value from which the differences are calculated.
527
- If omitted, the origin is set to 0.
541
+ If omitted, the value is taken from the first array element the
542
+ first time :meth:`encode()` is called.
528
543
 
529
544
  Attributes
530
545
  ----------
@@ -535,11 +550,14 @@ class DeltaEncoding(Encoding):
535
550
  --------
536
551
 
537
552
  >>> data = np.array([1, 1, 2, 3, 5, 8])
538
- >>> print(DeltaEncoding().encode(data))
539
- [1 0 1 1 2 3]
553
+ >>> encoding = DeltaEncoding()
554
+ >>> print(encoding.encode(data))
555
+ [0 0 1 1 2 3]
556
+ >>> print(encoding.origin)
557
+ 1
540
558
  """
541
559
  src_type: ... = None
542
- origin: ... = 0
560
+ origin: ... = None
543
561
 
544
562
  def __post_init__(self):
545
563
  if self.src_type is not None:
@@ -549,6 +567,8 @@ class DeltaEncoding(Encoding):
549
567
  # If not given in constructor, it is determined from the data
550
568
  if self.src_type is None:
551
569
  self.src_type = TypeCode.from_dtype(data.dtype)
570
+ if self.origin is None:
571
+ self.origin = data[0]
552
572
 
553
573
  data = data - self.origin
554
574
  return np.diff(data, prepend=0).astype(np.int32, copy=False)
@@ -582,7 +602,8 @@ class IntegerPackingEncoding(Encoding):
582
602
  is_unsigned : bool, optional
583
603
  Whether the values should be packed into signed or unsigned
584
604
  integers.
585
- If omitted, the values are packed into signed integers.
605
+ If omitted, first time :meth:`encode()` is called, determines whether
606
+ the values fit into unsigned integers.
586
607
 
587
608
  Attributes
588
609
  ----------
@@ -601,7 +622,7 @@ class IntegerPackingEncoding(Encoding):
601
622
  """
602
623
  byte_count: ...
603
624
  src_size: ... = None
604
- is_unsigned: ... = False
625
+ is_unsigned: ... = None
605
626
 
606
627
  def encode(self, data):
607
628
  if self.src_size is None:
@@ -610,6 +631,9 @@ class IntegerPackingEncoding(Encoding):
610
631
  raise IndexError(
611
632
  "Given source size does not match actual data size"
612
633
  )
634
+ if self.is_unsigned is None:
635
+ # Only positive values -> use unsigned integers
636
+ self.is_unsigned = data.min().item() >= 0
613
637
 
614
638
  data = data.astype(np.int32, copy=False)
615
639
  return self._encode(
@@ -672,7 +696,7 @@ class IntegerPackingEncoding(Encoding):
672
696
  # Get length of output array
673
697
  # by summing up required length of each element
674
698
  cdef int number
675
- cdef int length = 0
699
+ cdef long length = 0
676
700
  for i in range(data.shape[0]):
677
701
  number = data[i]
678
702
  if number < 0:
@@ -750,7 +774,7 @@ class StringArrayEncoding(Encoding):
750
774
  If omitted, the unique strings are determined from the data the
751
775
  first time :meth:`encode()` is called.
752
776
  data_encoding : list of Encoding, optional
753
- The encodings that are applied to the indiy array.
777
+ The encodings that are applied to the index array.
754
778
  If omitted, the array is directly encoded into bytes without
755
779
  further compression.
756
780
  offset_encoding : list of Encoding, optional
@@ -837,8 +861,11 @@ class StringArrayEncoding(Encoding):
837
861
  raise TypeError("Data must be of string type")
838
862
 
839
863
  if self.strings is None:
840
- # 'unique()' already sorts the strings
841
- self.strings = np.unique(data)
864
+ # 'unique()' already sorts the strings, but this is not necessarily
865
+ # desired, as this makes efficient encoding of the indices more difficult
866
+ # -> Bring into the original order
867
+ _, unique_indices = np.unique(data, return_index=True)
868
+ self.strings = data[np.sort(unique_indices)]
842
869
  check_present = False
843
870
  else:
844
871
  check_present = True
@@ -888,6 +915,19 @@ _encoding_classes_kinds = {
888
915
 
889
916
 
890
917
  def deserialize_encoding(content):
918
+ """
919
+ Create a :class:`Encoding` by deserializing the given *BinaryCIF* content.
920
+
921
+ Parameters
922
+ ----------
923
+ content : dict
924
+ The encoding represenet as *BinaryCIF* dictionary.
925
+
926
+ Returns
927
+ -------
928
+ encoding : Encoding
929
+ The deserialized encoding.
930
+ """
891
931
  try:
892
932
  encoding_class = _encoding_classes[content["kind"]]
893
933
  except KeyError:
@@ -898,28 +938,69 @@ def deserialize_encoding(content):
898
938
 
899
939
 
900
940
  def create_uncompressed_encoding(array):
901
- dtype = array.dtype
941
+ """
942
+ Create a simple encoding for the given array that does not compress the data.
902
943
 
903
- if np.issubdtype(dtype, np.str_):
944
+ Parameters
945
+ ----------
946
+ array : ndarray
947
+ The array to to create the encoding for.
948
+
949
+ Returns
950
+ -------
951
+ encoding : list of Encoding
952
+ The encoding for the data.
953
+ """
954
+ if np.issubdtype(array.dtype, np.str_):
904
955
  return [StringArrayEncoding()]
905
956
  else:
906
957
  return [ByteArrayEncoding()]
907
958
 
908
959
 
909
960
  def encode_stepwise(data, encoding):
961
+ """
962
+ Apply a list of encodings stepwise to the given data.
963
+
964
+ Parameters
965
+ ----------
966
+ data : ndarray
967
+ The data to be encoded.
968
+ encoding : list of Encoding
969
+ The encodings to be applied.
970
+
971
+ Returns
972
+ -------
973
+ encoded_data : ndarray or bytes
974
+ The encoded data.
975
+ """
910
976
  for encoding in encoding:
911
977
  data = encoding.encode(data)
912
978
  return data
913
979
 
914
980
 
915
981
  def decode_stepwise(data, encoding):
982
+ """
983
+ Apply a list of encodings stepwise to the given data.
984
+
985
+ Parameters
986
+ ----------
987
+ data : ndarray or bytes
988
+ The data to be decoded.
989
+ encoding : list of Encoding
990
+ The encodings to be applied.
991
+
992
+ Returns
993
+ -------
994
+ decoded_data : ndarray
995
+ The decoded data.
996
+ """
916
997
  for enc in reversed(encoding):
917
998
  data = enc.decode(data)
918
999
  return data
919
1000
 
920
1001
 
921
1002
  def _camel_to_snake_case(attribute_name):
922
- return re.sub(CAMEL_CASE_PATTERN, "_", attribute_name).lower()
1003
+ return CAMEL_CASE_PATTERN.sub("_", attribute_name).lower()
923
1004
 
924
1005
 
925
1006
  def _snake_to_camel_case(attribute_name):
@@ -54,9 +54,9 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
54
54
 
55
55
  Parameters
56
56
  ----------
57
- file_name : str
57
+ file_name : str or Path
58
58
  The path of the file to be read.
59
- A file-like-object cannot be used.
59
+ Any other file-like object cannot be used.
60
60
  start : int, optional
61
61
  The frame index, where file parsing is started. If no value
62
62
  is given, parsing starts at the first frame.
@@ -101,7 +101,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
101
101
  chunk_size = ((chunk_size // step) + 1) * step
102
102
 
103
103
  traj_type = cls.traj_type()
104
- with traj_type(file_name, "r") as f:
104
+ with traj_type(str(file_name), "r") as f:
105
105
  if start is None:
106
106
  start = 0
107
107
  # Discard atoms before start
@@ -153,9 +153,9 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
153
153
 
154
154
  Parameters
155
155
  ----------
156
- file_name : str
156
+ file_name : str or Path
157
157
  The path of the file to be read.
158
- A file-like-object cannot be used.
158
+ Any other file-like object cannot be used.
159
159
  start : int, optional
160
160
  The frame index, where file parsing is started. If no value
161
161
  is given, parsing starts at the first frame.
@@ -196,7 +196,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
196
196
  The `step` parameter does currently not work for *DCD* files.
197
197
  """
198
198
  traj_type = cls.traj_type()
199
- with traj_type(file_name, "r") as f:
199
+ with traj_type(str(file_name), "r") as f:
200
200
  if start is None:
201
201
  start = 0
202
202
  # Discard atoms before start
@@ -280,9 +280,9 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
280
280
 
281
281
  Parameters
282
282
  ----------
283
- file_name : str
283
+ file_name : str or Path
284
284
  The path of the file to be read.
285
- A file-like-object cannot be used.
285
+ Any other file-like object cannot be used.
286
286
  template : AtomArray or AtomArrayStack
287
287
  The template array or stack, where the atom annotation data
288
288
  is taken from.
@@ -354,13 +354,13 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
354
354
 
355
355
  Parameters
356
356
  ----------
357
- file_name : str
358
- The path of the file to be written to.
359
- A file-like-object cannot be used.
357
+ file_name : str or Path
358
+ The path of the file to be read.
359
+ Any other file-like object cannot be used.
360
360
  """
361
361
  traj_type = self.traj_type()
362
362
  param = self.prepare_write_values(self._coord, self._box, self._time)
363
- with traj_type(file_name, "w") as f:
363
+ with traj_type(str(file_name), "w") as f:
364
364
  f.write(**param)
365
365
 
366
366
  @classmethod
@@ -378,9 +378,9 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
378
378
 
379
379
  Parameters
380
380
  ----------
381
- file_name : str
382
- The path of the file to be written to.
383
- A file-like-object cannot be used.
381
+ file_name : str or Path
382
+ The path of the file to be read.
383
+ Any other file-like object cannot be used.
384
384
  coord : generator or array-like of ndarray, shape=(n,3), dtype=float
385
385
  The atom coordinates for each frame.
386
386
  box : generator or array-like of ndarray, shape=(3,3), dtype=float, optional
@@ -398,7 +398,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
398
398
  time = itertools.repeat(None)
399
399
 
400
400
  traj_type = cls.traj_type()
401
- with traj_type(file_name, "w") as f:
401
+ with traj_type(str(file_name), "w") as f:
402
402
  for c, b, t in zip(coord, box, time):
403
403
  if c.ndim != 2:
404
404
  raise IndexError(