biotite 0.41.1__cp310-cp310-win_amd64.whl → 1.0.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +36 -10
  3. biotite/application/application.py +22 -11
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +16 -5
  52. biotite/sequence/align/__init__.py +160 -6
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
  60. biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
  63. biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
  66. biotite/sequence/align/multiple.pyx +35 -35
  67. biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
  68. biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
  74. biotite/sequence/alphabet.py +112 -126
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
  102. biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
  103. biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +64 -64
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +226 -240
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
  112. biotite/structure/bonds.pyx +88 -100
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cp310-win_amd64.pyd +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +82 -77
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +21 -7
  130. biotite/structure/info/groups.py +10 -15
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -52
  159. biotite/structure/io/pdbx/cif.py +64 -62
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +235 -246
  162. biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
  163. biotite/structure/io/trajfile.py +76 -93
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/METADATA +6 -6
  184. biotite-1.0.0.dist-info/RECORD +322 -0
  185. {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/WHEEL +1 -1
  186. biotite/structure/io/ctab.py +0 -72
  187. biotite/structure/io/mmtf/__init__.py +0 -21
  188. biotite/structure/io/mmtf/assembly.py +0 -214
  189. biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
  190. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  191. biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
  192. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  193. biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
  194. biotite/structure/io/mmtf/decode.pyx +0 -152
  195. biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
  196. biotite/structure/io/mmtf/encode.pyx +0 -183
  197. biotite/structure/io/mmtf/file.py +0 -233
  198. biotite/structure/io/npz/__init__.py +0 -20
  199. biotite/structure/io/npz/file.py +0 -152
  200. biotite/structure/io/pdbx/legacy.py +0 -267
  201. biotite/structure/io/tng/__init__.py +0 -13
  202. biotite/structure/io/tng/file.py +0 -46
  203. biotite/temp.py +0 -86
  204. biotite-0.41.1.dist-info/RECORD +0 -340
  205. {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -4,58 +4,62 @@
4
4
 
5
5
  __name__ = "biotite.sequence"
6
6
  __author__ = "Patrick Kunzmann"
7
- __all__ = ["Alphabet", "LetterAlphabet", "AlphabetMapper", "AlphabetError",
8
- "common_alphabet"]
7
+ __all__ = [
8
+ "Alphabet",
9
+ "LetterAlphabet",
10
+ "AlphabetMapper",
11
+ "AlphabetError",
12
+ "common_alphabet",
13
+ ]
9
14
 
10
- import copy
11
- from numbers import Integral
12
15
  import string
16
+ from numbers import Integral
13
17
  import numpy as np
14
- from .codec import encode_chars, decode_to_chars, map_sequence_code
18
+ from biotite.sequence.codec import decode_to_chars, encode_chars, map_sequence_code
15
19
 
16
20
 
17
21
  class Alphabet(object):
18
22
  """
19
23
  This class defines the allowed symbols for a :class:`Sequence` and
20
24
  handles the encoding/decoding between symbols and symbol codes.
21
-
25
+
22
26
  An :class:`Alphabet` is created with the list of symbols, that can
23
27
  be used in this context.
24
28
  In most cases a symbol will be simply a letter, hence a string of
25
29
  length 1. But in principle every hashable Python object can serve
26
30
  as symbol.
27
-
31
+
28
32
  The encoding of a symbol into a symbol code is
29
33
  done in the following way: Find the first index in the symbol list,
30
34
  where the list element equals the symbol. This index is the
31
35
  symbol code. If the symbol is not found in the list, an
32
36
  :class:`AlphabetError` is raised.
33
-
37
+
34
38
  Internally, a dictionary is used for encoding, with symbols as keys
35
39
  and symbol codes as values. Therefore, every symbol must be
36
40
  hashable. For decoding the symbol list is indexed with the symbol
37
41
  code.
38
-
42
+
39
43
  If an alphabet *1* contains the same symbols and the same
40
44
  symbol-code-mappings like another alphabet *2*, but alphabet *1*
41
45
  introduces also new symbols, then alphabet *1* *extends* alphabet
42
46
  *2*.
43
47
  Per definition, every alphabet also extends itself.
44
-
48
+
45
49
  Objects of this class are immutable.
46
-
50
+
47
51
  Parameters
48
52
  ----------
49
53
  symbols : iterable object
50
54
  The symbols, that are allowed in this alphabet. The
51
55
  corresponding code for a symbol, is the index of that symbol
52
56
  in this list.
53
-
57
+
54
58
  Examples
55
59
  --------
56
60
  Create an Alphabet containing DNA letters and encode/decode a
57
61
  letter/code:
58
-
62
+
59
63
  >>> alph = Alphabet(["A","C","G","T"])
60
64
  >>> print(alph.encode("G"))
61
65
  2
@@ -66,9 +70,9 @@ class Alphabet(object):
66
70
  ... except Exception as e:
67
71
  ... print(e)
68
72
  Symbol 'foo' is not in the alphabet
69
-
73
+
70
74
  Create an Alphabet of arbitrary objects:
71
-
75
+
72
76
  >>> alph = Alphabet(["foo", 42, (1,2,3), 5, 3.141])
73
77
  >>> print(alph.encode((1,2,3)))
74
78
  2
@@ -77,53 +81,53 @@ class Alphabet(object):
77
81
 
78
82
  On the subject of alphabet extension:
79
83
  An alphabet always extends itself.
80
-
84
+
81
85
  >>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","G","T"]))
82
86
  True
83
87
 
84
88
  An alphabet extends an alphabet when it contains additional symbols...
85
-
89
+
86
90
  >>> Alphabet(["A","C","G","T","U"]).extends(Alphabet(["A","C","G","T"]))
87
91
  True
88
-
92
+
89
93
  ...but not vice versa
90
-
94
+
91
95
  >>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","G","T","U"]))
92
96
  False
93
-
97
+
94
98
  Two alphabets with same symbols but different symbol-code-mappings
95
-
96
- >>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","T","G"]))
99
+
100
+ >>> Alphabet(["A","C","G","T"]).extends(Alphabet(["A","C","T","G"]))
97
101
  False
98
102
  """
99
-
103
+
100
104
  def __init__(self, symbols):
101
105
  if len(symbols) == 0:
102
106
  raise ValueError("Symbol list is empty")
103
- self._symbols = copy.deepcopy(list(symbols))
107
+ self._symbols = tuple(symbols)
104
108
  self._symbol_dict = {}
105
109
  for i, symbol in enumerate(symbols):
106
110
  self._symbol_dict[symbol] = i
107
111
 
108
112
  def __repr__(self):
109
113
  """Represent Alphabet as a string for debugging."""
110
- return f'Alphabet({self._symbols})'
114
+ return f"Alphabet({self._symbols})"
111
115
 
112
116
  def get_symbols(self):
113
117
  """
114
118
  Get the symbols in the alphabet.
115
-
119
+
116
120
  Returns
117
121
  -------
118
- symbols : list
119
- Copy of the internal list of symbols.
122
+ symbols : tuple
123
+ The symbols.
120
124
  """
121
- return copy.deepcopy(self._symbols)
122
-
125
+ return self._symbols
126
+
123
127
  def extends(self, alphabet):
124
128
  """
125
129
  Check, if this alphabet extends another alphabet.
126
-
130
+
127
131
  Parameters
128
132
  ----------
129
133
  alphabet : Alphabet
@@ -139,23 +143,22 @@ class Alphabet(object):
139
143
  elif len(alphabet) > len(self):
140
144
  return False
141
145
  else:
142
- return alphabet.get_symbols() \
143
- == self.get_symbols()[:len(alphabet)]
144
-
146
+ return alphabet.get_symbols() == self.get_symbols()[: len(alphabet)]
147
+
145
148
  def encode(self, symbol):
146
149
  """
147
150
  Use the alphabet to encode a symbol.
148
-
151
+
149
152
  Parameters
150
153
  ----------
151
154
  symbol : object
152
155
  The object to encode into a symbol code.
153
-
156
+
154
157
  Returns
155
158
  -------
156
159
  code : int
157
160
  The symbol code of `symbol`.
158
-
161
+
159
162
  Raises
160
163
  ------
161
164
  AlphabetError
@@ -164,24 +167,22 @@ class Alphabet(object):
164
167
  try:
165
168
  return self._symbol_dict[symbol]
166
169
  except KeyError:
167
- raise AlphabetError(
168
- f"Symbol {repr(symbol)} is not in the alphabet"
169
- )
170
-
170
+ raise AlphabetError(f"Symbol {repr(symbol)} is not in the alphabet")
171
+
171
172
  def decode(self, code):
172
173
  """
173
174
  Use the alphabet to decode a symbol code.
174
-
175
+
175
176
  Parameters
176
177
  ----------
177
178
  code : int
178
179
  The symbol code to be decoded.
179
-
180
+
180
181
  Returns
181
182
  -------
182
183
  symbol : object
183
184
  The symbol corresponding to `code`.
184
-
185
+
185
186
  Raises
186
187
  ------
187
188
  AlphabetError
@@ -190,41 +191,41 @@ class Alphabet(object):
190
191
  if code < 0 or code >= len(self._symbols):
191
192
  raise AlphabetError(f"'{code:d}' is not a valid code")
192
193
  return self._symbols[code]
193
-
194
+
194
195
  def encode_multiple(self, symbols, dtype=np.int64):
195
196
  """
196
197
  Encode a list of symbols.
197
-
198
+
198
199
  Parameters
199
200
  ----------
200
201
  symbols : array-like
201
202
  The symbols to encode.
202
203
  dtype : dtype, optional
203
204
  The dtype of the output ndarray. (Default: `int64`)
204
-
205
+
205
206
  Returns
206
207
  -------
207
208
  code : ndarray
208
209
  The sequence code.
209
210
  """
210
211
  return np.array([self.encode(e) for e in symbols], dtype=dtype)
211
-
212
+
212
213
  def decode_multiple(self, code):
213
214
  """
214
215
  Decode a sequence code into a list of symbols.
215
-
216
+
216
217
  Parameters
217
218
  ----------
218
219
  code : ndarray
219
220
  The sequence code to decode.
220
-
221
+
221
222
  Returns
222
223
  -------
223
224
  symbols : list
224
225
  The decoded list of symbols.
225
226
  """
226
227
  return [self.decode(c) for c in code]
227
-
228
+
228
229
  def is_letter_alphabet(self):
229
230
  """
230
231
  Check whether the symbols in this alphabet are single printable
@@ -238,30 +239,33 @@ class Alphabet(object):
238
239
  have length 1 and are printable.
239
240
  """
240
241
  for symbol in self:
241
- if not isinstance(symbol, (str, bytes)) \
242
- or len(symbol) > 1:
243
- return False
242
+ if not isinstance(symbol, (str, bytes)) or len(symbol) > 1:
243
+ return False
244
244
  if isinstance(symbol, str):
245
245
  symbol = symbol.encode("ASCII")
246
- if symbol not in LetterAlphabet.PRINATBLES:
246
+ if symbol not in LetterAlphabet.PRINTABLES:
247
247
  return False
248
248
  return True
249
-
249
+
250
250
  def __str__(self):
251
251
  return str(self.get_symbols())
252
-
252
+
253
253
  def __len__(self):
254
254
  return len(self.get_symbols())
255
-
255
+
256
256
  def __iter__(self):
257
257
  return self.get_symbols().__iter__()
258
-
258
+
259
259
  def __contains__(self, symbol):
260
260
  return symbol in self.get_symbols()
261
-
261
+
262
262
  def __hash__(self):
263
- return hash(tuple(self._symbols))
264
-
263
+ symbols = self.get_symbols()
264
+ if isinstance(symbols, tuple):
265
+ return hash(symbols)
266
+ else:
267
+ return hash(tuple(symbols))
268
+
265
269
  def __eq__(self, item):
266
270
  if item is self:
267
271
  return True
@@ -291,9 +295,10 @@ class LetterAlphabet(Alphabet):
291
295
  corresponding code for a symbol, is the index of that symbol
292
296
  in this list.
293
297
  """
294
-
295
- PRINATBLES = (string.digits + string.ascii_letters + string.punctuation) \
296
- .encode("ASCII")
298
+
299
+ PRINTABLES = (string.digits + string.ascii_letters + string.punctuation).encode(
300
+ "ASCII"
301
+ )
297
302
 
298
303
  def __init__(self, symbols):
299
304
  if len(symbols) == 0:
@@ -304,7 +309,7 @@ class LetterAlphabet(Alphabet):
304
309
  raise ValueError(f"Symbol '{symbol}' is not a single letter")
305
310
  if isinstance(symbol, str):
306
311
  symbol = symbol.encode("ASCII")
307
- if symbol not in LetterAlphabet.PRINATBLES:
312
+ if symbol not in LetterAlphabet.PRINTABLES:
308
313
  raise ValueError(
309
314
  f"Symbol {repr(symbol)} is not printable or whitespace"
310
315
  )
@@ -312,57 +317,43 @@ class LetterAlphabet(Alphabet):
312
317
  # Direct 'astype' conversion is not allowed by numpy
313
318
  # -> frombuffer()
314
319
  self._symbols = np.frombuffer(
315
- np.array(self._symbols, dtype="|S1"),
316
- dtype=np.ubyte
320
+ np.array(self._symbols, dtype="|S1"), dtype=np.ubyte
317
321
  )
318
322
 
319
323
  def __repr__(self):
320
324
  """Represent LetterAlphabet as a string for debugging."""
321
- return f'LetterAlphabet({self.get_symbols()})'
322
-
325
+ return f"LetterAlphabet({self.get_symbols()})"
326
+
323
327
  def extends(self, alphabet):
324
328
  if alphabet is self:
325
329
  return True
326
- elif type(alphabet) == LetterAlphabet:
330
+ elif isinstance(alphabet, LetterAlphabet):
327
331
  if len(alphabet._symbols) > len(self._symbols):
328
332
  return False
329
- return np.all(
330
- alphabet._symbols == self._symbols[:len(alphabet._symbols)]
331
- )
333
+ return np.all(alphabet._symbols == self._symbols[: len(alphabet._symbols)])
332
334
  else:
333
335
  return super().extends(alphabet)
334
336
 
335
337
  def get_symbols(self):
336
- """
337
- Get the symbols in the alphabet.
338
-
339
- Returns
340
- -------
341
- symbols : list
342
- Copy of the internal list of symbols.
343
- """
344
- return [symbol.decode("ASCII") for symbol
345
- in self._symbols_as_bytes()]
346
-
338
+ return tuple([symbol.decode("ASCII") for symbol in self._symbols_as_bytes()])
339
+
347
340
  def encode(self, symbol):
348
341
  if not isinstance(symbol, (str, bytes)) or len(symbol) > 1:
349
342
  raise AlphabetError(f"Symbol '{symbol}' is not a single letter")
350
343
  indices = np.where(self._symbols == ord(symbol))[0]
351
344
  if len(indices) == 0:
352
- raise AlphabetError(
353
- f"Symbol {repr(symbol)} is not in the alphabet"
354
- )
355
- return indices[0]
356
-
345
+ raise AlphabetError(f"Symbol {repr(symbol)} is not in the alphabet")
346
+ return indices[0].item()
347
+
357
348
  def decode(self, code, as_bytes=False):
358
349
  if code < 0 or code >= len(self._symbols):
359
350
  raise AlphabetError(f"'{code:d}' is not a valid code")
360
351
  return chr(self._symbols[code])
361
-
352
+
362
353
  def encode_multiple(self, symbols, dtype=None):
363
354
  """
364
355
  Encode multiple symbols.
365
-
356
+
366
357
  Parameters
367
358
  ----------
368
359
  symbols : iterable object or str or bytes
@@ -371,7 +362,7 @@ class LetterAlphabet(Alphabet):
371
362
  containing the symbols is provided, instead of e.g. a list.
372
363
  dtype : dtype, optional
373
364
  For compatibility with superclass. The value is ignored
374
-
365
+
375
366
  Returns
376
367
  -------
377
368
  code : ndarray
@@ -382,20 +373,17 @@ class LetterAlphabet(Alphabet):
382
373
  elif isinstance(symbols, bytes):
383
374
  symbols = np.frombuffer(symbols, dtype=np.ubyte)
384
375
  elif isinstance(symbols, np.ndarray):
385
- symbols = np.frombuffer(
386
- symbols.astype(dtype="|S1"), dtype=np.ubyte
387
- )
376
+ symbols = np.frombuffer(symbols.astype(dtype="|S1"), dtype=np.ubyte)
388
377
  else:
389
378
  symbols = np.frombuffer(
390
- np.array(list(symbols), dtype="|S1"),
391
- dtype=np.ubyte
379
+ np.array(list(symbols), dtype="|S1"), dtype=np.ubyte
392
380
  )
393
381
  return encode_chars(alphabet=self._symbols, symbols=symbols)
394
-
382
+
395
383
  def decode_multiple(self, code, as_bytes=False):
396
384
  """
397
385
  Decode a sequence code into a list of symbols.
398
-
386
+
399
387
  Parameters
400
388
  ----------
401
389
  code : ndarray, dtype=uint8
@@ -421,20 +409,19 @@ class LetterAlphabet(Alphabet):
421
409
  if not as_bytes:
422
410
  symbols = symbols.astype("U1")
423
411
  return symbols
424
-
412
+
425
413
  def __contains__(self, symbol):
426
414
  if not isinstance(symbol, (str, bytes)):
427
415
  return False
428
416
  return ord(symbol) in self._symbols
429
-
417
+
430
418
  def __len__(self):
431
419
  return len(self._symbols)
432
-
420
+
433
421
  def _symbols_as_bytes(self):
434
422
  "Properly convert from dtype 'np.ubyte' to '|S1'"
435
423
  return np.frombuffer(self._symbols, dtype="|S1")
436
424
 
437
-
438
425
 
439
426
  class AlphabetMapper(object):
440
427
  """
@@ -445,7 +432,7 @@ class AlphabetMapper(object):
445
432
  alphabet so that the symbol itself is preserved.
446
433
  This class works for single symbol codes or an entire sequence code
447
434
  likewise.
448
-
435
+
449
436
  Parameters
450
437
  ----------
451
438
  source_alphabet, target_alphabet : Alphabet
@@ -454,7 +441,7 @@ class AlphabetMapper(object):
454
441
  The target alphabet must contain at least all symbols of the
455
442
  source alphabet, but it is not required that the shared symbols
456
443
  are in the same order.
457
-
444
+
458
445
  Examples
459
446
  --------
460
447
 
@@ -470,56 +457,54 @@ class AlphabetMapper(object):
470
457
  >>> in_sequence = GeneralSequence(source_alph, "GCCTAT")
471
458
  >>> print(in_sequence.code)
472
459
  [2 1 1 3 0 3]
473
- >>> print(in_sequence)
460
+ >>> print("".join(in_sequence.symbols))
474
461
  GCCTAT
475
462
  >>> out_sequence = GeneralSequence(target_alph)
476
463
  >>> out_sequence.code = mapper[in_sequence.code]
477
464
  >>> print(out_sequence.code)
478
465
  [3 4 4 0 2 0]
479
- >>> print(out_sequence)
466
+ >>> print("".join(out_sequence.symbols))
480
467
  GCCTAT
481
468
  """
482
-
469
+
483
470
  def __init__(self, source_alphabet, target_alphabet):
484
471
  if target_alphabet.extends(source_alphabet):
485
472
  self._necessary_mapping = False
486
473
  else:
487
474
  self._necessary_mapping = True
488
475
  self._mapper = np.zeros(
489
- len(source_alphabet),
490
- dtype=AlphabetMapper._dtype(len(target_alphabet))
476
+ len(source_alphabet), dtype=AlphabetMapper._dtype(len(target_alphabet))
491
477
  )
492
478
  for old_code in range(len(source_alphabet)):
493
479
  symbol = source_alphabet.decode(old_code)
494
480
  new_code = target_alphabet.encode(symbol)
495
481
  self._mapper[old_code] = new_code
496
-
482
+
497
483
  def __getitem__(self, code):
498
484
  if isinstance(code, Integral):
499
485
  if self._necessary_mapping:
500
486
  return self._mapper[code]
501
487
  else:
502
488
  return code
503
- if not isinstance(code, np.ndarray) \
504
- or code.dtype not in (np.uint8, np.uint16, np.uint32, np.uint64):
505
- code = np.array(code, dtype=np.uint64)
489
+ if not isinstance(code, np.ndarray) or code.dtype not in (
490
+ np.uint8,
491
+ np.uint16,
492
+ np.uint32,
493
+ np.uint64,
494
+ ):
495
+ code = np.array(code, dtype=np.uint64)
506
496
  if self._necessary_mapping:
507
497
  mapped_code = np.empty(len(code), dtype=self._mapper.dtype)
508
- map_sequence_code(
509
- self._mapper,
510
- code,
511
- mapped_code
512
- )
498
+ map_sequence_code(self._mapper, code, mapped_code)
513
499
  return mapped_code
514
500
  else:
515
501
  return code
516
502
 
517
-
518
503
  @staticmethod
519
504
  def _dtype(alphabet_size):
520
- _size_uint8 = np.iinfo(np.uint8 ).max +1
521
- _size_uint16 = np.iinfo(np.uint16).max +1
522
- _size_uint32 = np.iinfo(np.uint32).max +1
505
+ _size_uint8 = np.iinfo(np.uint8).max + 1
506
+ _size_uint16 = np.iinfo(np.uint16).max + 1
507
+ _size_uint32 = np.iinfo(np.uint32).max + 1
523
508
  if alphabet_size <= _size_uint8:
524
509
  return np.uint8
525
510
  elif alphabet_size <= _size_uint16:
@@ -535,6 +520,7 @@ class AlphabetError(Exception):
535
520
  This exception is raised, when a code or a symbol is not in an
536
521
  :class:`Alphabet`.
537
522
  """
523
+
538
524
  pass
539
525
 
540
526
 
@@ -552,7 +538,7 @@ def common_alphabet(alphabets):
552
538
  -------
553
539
  common_alphabet : Alphabet or None
554
540
  The alphabet from `alphabets` that extends all alphabets.
555
- ``None`` if no such common alphabet exists.
541
+ ``None`` if no such common alphabet exists.
556
542
  """
557
543
  common_alphabet = None
558
544
  for alphabet in alphabets:
@@ -563,4 +549,4 @@ def common_alphabet(alphabets):
563
549
  common_alphabet = alphabet
564
550
  else:
565
551
  return None
566
- return common_alphabet
552
+ return common_alphabet