biotite 0.38.0__cp311-cp311-win_amd64.whl → 0.40.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (124) hide show
  1. biotite/__init__.py +3 -3
  2. biotite/application/application.py +33 -28
  3. biotite/application/dssp/app.py +18 -18
  4. biotite/application/sra/__init__.py +5 -0
  5. biotite/application/sra/app.py +337 -55
  6. biotite/database/entrez/__init__.py +2 -1
  7. biotite/database/entrez/check.py +14 -3
  8. biotite/database/entrez/download.py +20 -13
  9. biotite/database/entrez/key.py +44 -0
  10. biotite/database/entrez/query.py +38 -34
  11. biotite/database/pubchem/query.py +44 -44
  12. biotite/database/rcsb/download.py +19 -14
  13. biotite/database/rcsb/query.py +46 -46
  14. biotite/sequence/align/__init__.py +5 -1
  15. biotite/sequence/align/banded.c +1408 -1025
  16. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  17. biotite/sequence/align/buckets.py +69 -0
  18. biotite/sequence/align/cigar.py +389 -0
  19. biotite/sequence/align/kmeralphabet.c +3220 -2850
  20. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  21. biotite/sequence/align/kmersimilarity.c +713 -663
  22. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  23. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  24. biotite/sequence/align/kmertable.cpp +68398 -0
  25. biotite/sequence/align/localgapped.c +1507 -1074
  26. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  27. biotite/sequence/align/localungapped.c +1143 -833
  28. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  29. biotite/sequence/align/multiple.c +1569 -1092
  30. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  31. biotite/sequence/align/pairwise.c +1612 -1212
  32. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  33. biotite/sequence/align/permutation.c +33259 -0
  34. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  35. biotite/sequence/align/primes.txt +821 -0
  36. biotite/sequence/align/{kmertable.c → selector.c} +9129 -16497
  37. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  38. biotite/sequence/align/tracetable.c +685 -646
  39. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  40. biotite/sequence/codec.c +1159 -841
  41. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  42. biotite/sequence/graphics/alignment.py +212 -2
  43. biotite/sequence/io/genbank/annotation.py +11 -11
  44. biotite/sequence/phylo/nj.c +684 -636
  45. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  46. biotite/sequence/phylo/tree.c +970 -673
  47. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  48. biotite/sequence/phylo/upgma.c +672 -626
  49. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  50. biotite/structure/__init__.py +1 -1
  51. biotite/structure/atoms.py +1 -1
  52. biotite/structure/basepairs.py +7 -12
  53. biotite/structure/bonds.c +3861 -3749
  54. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  55. biotite/structure/celllist.c +727 -707
  56. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  57. biotite/structure/charges.c +1561 -1560
  58. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  59. biotite/structure/filter.py +30 -37
  60. biotite/structure/info/__init__.py +5 -8
  61. biotite/structure/info/atoms.py +25 -67
  62. biotite/structure/info/bonds.py +46 -100
  63. biotite/structure/info/ccd/README.rst +8 -0
  64. biotite/structure/info/ccd/amino_acids.txt +1646 -0
  65. biotite/structure/info/ccd/carbohydrates.txt +1133 -0
  66. biotite/structure/info/ccd/components.bcif +0 -0
  67. biotite/structure/info/ccd/nucleotides.txt +797 -0
  68. biotite/structure/info/ccd.py +95 -0
  69. biotite/structure/info/groups.py +90 -0
  70. biotite/structure/info/masses.py +21 -20
  71. biotite/structure/info/misc.py +11 -22
  72. biotite/structure/info/standardize.py +17 -12
  73. biotite/structure/io/__init__.py +2 -4
  74. biotite/structure/io/ctab.py +1 -1
  75. biotite/structure/io/general.py +37 -43
  76. biotite/structure/io/mmtf/__init__.py +3 -0
  77. biotite/structure/io/mmtf/convertarray.c +528 -365
  78. biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
  79. biotite/structure/io/mmtf/convertfile.c +725 -676
  80. biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
  81. biotite/structure/io/mmtf/decode.c +1070 -754
  82. biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
  83. biotite/structure/io/mmtf/encode.c +727 -677
  84. biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
  85. biotite/structure/io/mmtf/file.py +34 -26
  86. biotite/structure/io/npz/__init__.py +3 -0
  87. biotite/structure/io/npz/file.py +21 -18
  88. biotite/structure/io/pdb/__init__.py +3 -3
  89. biotite/structure/io/pdb/file.py +72 -70
  90. biotite/structure/io/pdb/hybrid36.c +540 -478
  91. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  92. biotite/structure/io/pdbqt/file.py +82 -68
  93. biotite/structure/io/pdbx/__init__.py +13 -6
  94. biotite/structure/io/pdbx/bcif.py +649 -0
  95. biotite/structure/io/pdbx/cif.py +1028 -0
  96. biotite/structure/io/pdbx/component.py +243 -0
  97. biotite/structure/io/pdbx/convert.py +707 -359
  98. biotite/structure/io/pdbx/encoding.c +112813 -0
  99. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  100. biotite/structure/io/pdbx/error.py +14 -0
  101. biotite/structure/io/pdbx/legacy.py +267 -0
  102. biotite/structure/molecules.py +151 -151
  103. biotite/structure/residues.py +40 -40
  104. biotite/structure/sasa.c +713 -644
  105. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  106. biotite/structure/superimpose.py +158 -115
  107. biotite/visualize.py +9 -11
  108. {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/METADATA +2 -2
  109. {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/RECORD +112 -102
  110. {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/WHEEL +1 -1
  111. biotite/structure/info/amino_acids.json +0 -1556
  112. biotite/structure/info/amino_acids.py +0 -42
  113. biotite/structure/info/carbohydrates.json +0 -1122
  114. biotite/structure/info/carbohydrates.py +0 -39
  115. biotite/structure/info/intra_bonds.msgpack +0 -0
  116. biotite/structure/info/link_types.msgpack +0 -1
  117. biotite/structure/info/nucleotides.json +0 -772
  118. biotite/structure/info/nucleotides.py +0 -39
  119. biotite/structure/info/residue_masses.msgpack +0 -0
  120. biotite/structure/info/residue_names.msgpack +0 -3
  121. biotite/structure/info/residues.msgpack +0 -0
  122. biotite/structure/io/pdbx/file.py +0 -652
  123. {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/LICENSE.rst +0 -0
  124. {biotite-0.38.0.dist-info → biotite-0.40.0.dist-info}/top_level.txt +0 -0
Binary file
@@ -5,9 +5,9 @@
5
5
  __name__ = "biotite.sequence.graphics"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["SymbolPlotter", "LetterPlotter", "LetterSimilarityPlotter",
8
- "LetterTypePlotter",
8
+ "LetterTypePlotter","ArrayPlotter",
9
9
  "plot_alignment", "plot_alignment_similarity_based",
10
- "plot_alignment_type_based"]
10
+ "plot_alignment_type_based","plot_alignment_array"]
11
11
 
12
12
  import abc
13
13
  import numpy as np
@@ -345,6 +345,116 @@ class LetterTypePlotter(LetterPlotter):
345
345
  return self._colors[code]
346
346
 
347
347
 
348
+ class ArrayPlotter(LetterPlotter):
349
+ '''
350
+ This :class:`SymbolPlotter` quantitatively decorates sequences alignments, with molecular
351
+ recognition data obtained from e.g. microarrays. Symbols are visualized as characters
352
+ on a colored background box. The color of a given box represents the recognition
353
+ signal. The intensity of the color, is proportional to the strenght of the
354
+ recognition.
355
+
356
+ Parameters
357
+ ----------
358
+ axes : Axes
359
+ A Matplotlib axes, that is used as plotting area.
360
+ fl_score : numpy.ndarray
361
+ The ndarray to store recognition values corresponding to the score residues.
362
+ By default, the normalized score is 1 for maximum recognition
363
+ and 0 for non-recognition (no color).
364
+ color_symbols : bool, optional
365
+ If true, the symbols themselves are colored.
366
+ If false, the symbols are black, and the boxes behind the
367
+ symbols are colored.
368
+ font_size : float, optional
369
+ Font size of the sequence symbols.
370
+ font_param : dict, optional
371
+ Additional parameters that is given to the
372
+ :class:`matplotlib.Text` instance of each symbol.
373
+
374
+ '''
375
+ def __init__(self, axes, fl_score, color_symbols=False,
376
+ font_size=None, font_param=None):
377
+
378
+ super().__init__(axes, color_symbols, font_size, font_param)
379
+ self.fl_score = fl_score
380
+ self._cmap = self._generate_colormap(colors["dimorange"],
381
+ self._color_symbols)
382
+
383
+
384
+ def get_color(self, alignment, column_i, seq_i):
385
+ index1 = alignment.trace[column_i, seq_i]
386
+ if index1 == -1:
387
+ spot_signal = 0
388
+ else:
389
+ spot_signal = self._get_signal(self.fl_score, column_i, seq_i)
390
+ return self._cmap(spot_signal)
391
+
392
+
393
+ def _get_signal(self, fl_score, column_i, seq_i):
394
+ if fl_score is None:
395
+ signal = 0.0
396
+ else:
397
+ signal = fl_score[column_i, seq_i]
398
+ return signal
399
+
400
+ def get_cmap(self):
401
+ return self._cmap
402
+
403
+
404
+ def plot_symbol(self, bbox, alignment, column_i, seq_i):
405
+ from matplotlib.patches import Rectangle
406
+
407
+ trace = alignment.trace
408
+
409
+ if trace[column_i, seq_i] != -1:
410
+ key1 = alignment.sequences[1][trace[column_i, 1]]
411
+ key2 = alignment.sequences[0][trace[column_i, 0]]
412
+ if key1 == key2:
413
+ if seq_i == 1:
414
+ symbol = "*"
415
+ else:
416
+ symbol = alignment.sequences[seq_i][trace[column_i, seq_i]]
417
+ else:
418
+ symbol = alignment.sequences[seq_i][trace[column_i, seq_i]]
419
+ else:
420
+ symbol = "-"
421
+ color = self.get_color(alignment, column_i, seq_i)
422
+ box = Rectangle(bbox.p0, bbox.width, bbox.height)
423
+ self.axes.add_patch(box)
424
+ text = self.axes.text(
425
+ bbox.x0 + bbox.width/2, bbox.y0 + bbox.height/2,
426
+ symbol, color="black", ha="center", va="center",
427
+ size=self._font_size, **self._font_param)
428
+ text.set_clip_on(True)
429
+
430
+ if self._color_symbols:
431
+ box.set_color("None")
432
+ text.set_color(color)
433
+ else:
434
+ box.set_color(color)
435
+
436
+ @staticmethod
437
+ def _generate_colormap(color, to_black):
438
+ from matplotlib.colors import ListedColormap, to_rgb
439
+
440
+ color = to_rgb(color)
441
+ if to_black:
442
+ cmap_val = np.stack(
443
+ [
444
+ np.interp(np.linspace(0, 1, 100), [0, 1], [color[i], 0])
445
+ for i in range(len(color))
446
+ ]
447
+ ).transpose()
448
+ else:
449
+ cmap_val = np.stack(
450
+ [
451
+ np.interp(np.linspace(0, 1, 100), [0, 1], [1, color[i]])
452
+ for i in range(len(color))
453
+ ]
454
+ ).transpose()
455
+ return ListedColormap(cmap_val)
456
+
457
+
348
458
  def plot_alignment(axes, alignment, symbol_plotter, symbols_per_line=50,
349
459
  show_numbers=False, number_size=None, number_functions=None,
350
460
  labels=None, label_size=None,
@@ -800,6 +910,106 @@ def plot_alignment_type_based(axes, alignment, symbols_per_line=50,
800
910
  spacing=spacing, symbol_spacing=symbol_spacing
801
911
  )
802
912
 
913
+ def plot_alignment_array(axes, alignment, fl_score, symbols_per_line=50,
914
+ show_numbers=False, number_size=None,
915
+ number_functions=None, labels=None, label_size=None,
916
+ show_line_position=False, spacing=1, color=None,
917
+ cmap=None, symbol_spacing=None,
918
+ symbol_size=None, symbol_param=None):
919
+
920
+ '''
921
+ Plot a pairwise sequence alignment using an :class:`ArrayPlotter`
922
+ instance.
923
+
924
+ Highlights sequence recognition regions at the positions of the respective
925
+ score residue per alignment column.
926
+
927
+ Parameters
928
+ ----------
929
+ axes : Axes
930
+ A Matplotlib axes, that is used as plotting area.
931
+ alignment : Alignment
932
+ The pairwise sequence alignment to be plotted.
933
+ fl_score : ndarray
934
+ The array to map fluorescence values to score residues.
935
+ By default the normalized score is 1 for maximum recognition
936
+ and 0 for non-recognition (no color).
937
+ symbol_plotter : SymbolPlotter
938
+ Instance of ArrayPlotter. Defines how the symbols are drawn
939
+ in the alignment.
940
+ symbols_per_line : int, optional
941
+ The amount of alignment columns that are displayed per line.
942
+ show_numbers : bool, optional
943
+ If true, the sequence position of the symbols in the last
944
+ alignment column of a line is shown on the right side of the
945
+ plot.
946
+ If the last symbol is a gap, the position of the last actual
947
+ symbol before this gap is taken.
948
+ If the first symbol did not occur up to this point,
949
+ no number is shown for this line.
950
+ By default the first symbol of a sequence has the position 1,
951
+ but this behavior can be changed using the `number_functions`
952
+ parameter.
953
+ number_size : float, optional
954
+ The font size of the position numbers
955
+ number_functions : list of [(None or Callable(int -> int)], optional
956
+ By default the position of the first symbol in a sequence is 1,
957
+ i.e. the sequence position is the sequence index incremented by
958
+ 1.
959
+ The behavior can be changed with this parameter:
960
+ If supplied, the length of the list must match the number of
961
+ sequences in the alignment.
962
+ Every entry is a function that maps a sequence index (*int*) to
963
+ a sequence position (*int*) for the respective sequence.
964
+ A `None` entry means, that the default numbering is applied
965
+ for the sequence.
966
+ labels : list of str, optional
967
+ The sequence labels.
968
+ Must be the same size and order as the sequences in the
969
+ alignment.
970
+ label_size : float, optional
971
+ Font size of the labels
972
+ show_line_position : bool, optional
973
+ If true the position within a line is plotted below the
974
+ alignment.
975
+ spacing : float, optional
976
+ The spacing between the alignment lines. 1.0 means that the size
977
+ is equal to the size of a symbol box.
978
+ color : tuple or str, optional
979
+ A *Matplotlib* compatible color.
980
+ cmap : Colormap or str, optional
981
+ The boxes are
982
+ colored based on the normalized intensity value on the
983
+ given *Matplotlib* Colormap.
984
+ symbol_size : float, optional
985
+ Font size of the sequence symbols.
986
+ symbol_param : dict
987
+ Additional parameters that is given to the
988
+ :class:`matplotlib.Text` instance of each symbol.
989
+ symbol_spacing : int, optional
990
+ A space is placed between each number of elements desired
991
+ by variable.
992
+
993
+ Notes
994
+ -----
995
+ A '*' represents a sequence match on the alignment
996
+ A '-' represents a sequence gap on the alignment
997
+
998
+ '''
999
+ symbol_plotter = ArrayPlotter(
1000
+ axes, fl_score = fl_score, font_size = symbol_size, font_param = symbol_param,
1001
+ )
1002
+
1003
+ plot_alignment(
1004
+ axes=axes, alignment=alignment, symbol_plotter=symbol_plotter,
1005
+ symbols_per_line=symbols_per_line,
1006
+ show_numbers=show_numbers, number_size=number_size,
1007
+ number_functions=number_functions,
1008
+ labels=labels, label_size=label_size,
1009
+ show_line_position=show_line_position,
1010
+ spacing=spacing, symbol_spacing=symbol_spacing
1011
+ )
1012
+
803
1013
 
804
1014
  def _get_last_valid_index(alignment, column_i, seq_i):
805
1015
  """
@@ -25,7 +25,7 @@ def get_annotation(gb_file, include_only=None):
25
25
  """
26
26
  Get the sequence annotation from the *FEATURES* field of a
27
27
  GenBank file.
28
-
28
+
29
29
  Parameters
30
30
  ----------
31
31
  gb_file : GenBankFile
@@ -33,7 +33,7 @@ def get_annotation(gb_file, include_only=None):
33
33
  include_only : iterable object of str, optional
34
34
  List of names of feature keys, which should included
35
35
  in the annotation. By default all features are included.
36
-
36
+
37
37
  Returns
38
38
  -------
39
39
  annotation : Annotation
@@ -45,11 +45,11 @@ def get_annotation(gb_file, include_only=None):
45
45
  if len(fields) > 1:
46
46
  raise InvalidFileError("File has multiple 'FEATURES' fields")
47
47
  lines, _ = fields[0]
48
-
48
+
49
49
 
50
50
  ### Parse all lines to create an index of features,
51
51
  # i.e. pairs of the feature key
52
- # and the text belonging to the respective feature
52
+ # and the text belonging to the respective feature
53
53
  feature_list = []
54
54
  feature_key = None
55
55
  feature_value = ""
@@ -65,7 +65,7 @@ def get_annotation(gb_file, include_only=None):
65
65
  feature_value += line[_QUAL_START:] + " "
66
66
  # Store last feature key and value (loop already exited)
67
67
  feature_list.append((feature_key, feature_value))
68
-
68
+
69
69
 
70
70
  ### Process only relevant features and put them into an Annotation
71
71
  annotation = Annotation()
@@ -74,7 +74,7 @@ def get_annotation(gb_file, include_only=None):
74
74
  for key, val in feature_list:
75
75
  if include_only is None or key in include_only:
76
76
  qual_dict = {}
77
-
77
+
78
78
  # Split feature definition into parts
79
79
  # e.g.
80
80
  #
@@ -138,9 +138,9 @@ def get_annotation(gb_file, include_only=None):
138
138
  _set_qual(qual_dict, qual_key, qual_val)
139
139
  qual_key = None
140
140
  qual_val = None
141
-
141
+
142
142
  annotation.add_feature(Feature(key, locs, qual_dict))
143
-
143
+
144
144
  return annotation
145
145
 
146
146
 
@@ -149,11 +149,11 @@ def _parse_locs(loc_str):
149
149
  if loc_str.startswith(("join", "order")):
150
150
  str_list = loc_str[loc_str.index("(")+1:loc_str.rindex(")")].split(",")
151
151
  for s in str_list:
152
- locs.extend(_parse_locs(s))
152
+ locs.extend(_parse_locs(s.strip()))
153
153
  elif loc_str.startswith("complement"):
154
154
  compl_str = loc_str[loc_str.index("(")+1:loc_str.rindex(")")]
155
155
  compl_locs = [
156
- Location(loc.first, loc.last, Location.Strand.REVERSE, loc.defect)
156
+ Location(loc.first, loc.last, Location.Strand.REVERSE, loc.defect)
157
157
  for loc in _parse_locs(compl_str)
158
158
  ]
159
159
  locs.extend(compl_locs)
@@ -219,7 +219,7 @@ def _set_qual(qual_dict, key, val):
219
219
  def set_annotation(gb_file, annotation):
220
220
  """
221
221
  Set the *FEATURES* field of a GenBank file with an annotation.
222
-
222
+
223
223
  Parameters
224
224
  ----------
225
225
  gb_file : GenBankFile