biotite 0.41.1__cp310-cp310-win_amd64.whl → 1.0.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show
  1. biotite/__init__.py +2 -3
  2. biotite/application/__init__.py +36 -10
  3. biotite/application/application.py +22 -11
  4. biotite/application/autodock/__init__.py +1 -1
  5. biotite/application/autodock/app.py +74 -79
  6. biotite/application/blast/__init__.py +1 -1
  7. biotite/application/blast/alignment.py +19 -10
  8. biotite/application/blast/webapp.py +92 -85
  9. biotite/application/clustalo/__init__.py +1 -1
  10. biotite/application/clustalo/app.py +46 -61
  11. biotite/application/dssp/__init__.py +1 -1
  12. biotite/application/dssp/app.py +8 -11
  13. biotite/application/localapp.py +62 -60
  14. biotite/application/mafft/__init__.py +1 -1
  15. biotite/application/mafft/app.py +16 -22
  16. biotite/application/msaapp.py +78 -89
  17. biotite/application/muscle/__init__.py +1 -1
  18. biotite/application/muscle/app3.py +50 -64
  19. biotite/application/muscle/app5.py +23 -31
  20. biotite/application/sra/__init__.py +1 -1
  21. biotite/application/sra/app.py +64 -68
  22. biotite/application/tantan/__init__.py +1 -1
  23. biotite/application/tantan/app.py +22 -45
  24. biotite/application/util.py +7 -9
  25. biotite/application/viennarna/rnaalifold.py +34 -28
  26. biotite/application/viennarna/rnafold.py +24 -39
  27. biotite/application/viennarna/rnaplot.py +36 -21
  28. biotite/application/viennarna/util.py +17 -12
  29. biotite/application/webapp.py +13 -14
  30. biotite/copyable.py +13 -13
  31. biotite/database/__init__.py +1 -1
  32. biotite/database/entrez/__init__.py +1 -1
  33. biotite/database/entrez/check.py +2 -3
  34. biotite/database/entrez/dbnames.py +7 -5
  35. biotite/database/entrez/download.py +55 -49
  36. biotite/database/entrez/key.py +1 -1
  37. biotite/database/entrez/query.py +62 -23
  38. biotite/database/error.py +2 -1
  39. biotite/database/pubchem/__init__.py +1 -1
  40. biotite/database/pubchem/download.py +43 -45
  41. biotite/database/pubchem/error.py +2 -2
  42. biotite/database/pubchem/query.py +34 -31
  43. biotite/database/pubchem/throttle.py +3 -4
  44. biotite/database/rcsb/__init__.py +1 -1
  45. biotite/database/rcsb/download.py +44 -52
  46. biotite/database/rcsb/query.py +85 -80
  47. biotite/database/uniprot/check.py +6 -3
  48. biotite/database/uniprot/download.py +6 -11
  49. biotite/database/uniprot/query.py +115 -31
  50. biotite/file.py +12 -31
  51. biotite/sequence/__init__.py +16 -5
  52. biotite/sequence/align/__init__.py +160 -6
  53. biotite/sequence/align/alignment.py +99 -90
  54. biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
  55. biotite/sequence/align/buckets.py +12 -10
  56. biotite/sequence/align/cigar.py +43 -52
  57. biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
  58. biotite/sequence/align/kmeralphabet.pyx +55 -51
  59. biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
  60. biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
  61. biotite/sequence/align/kmertable.pyx +3 -2
  62. biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
  63. biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
  64. biotite/sequence/align/matrix.py +81 -82
  65. biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
  66. biotite/sequence/align/multiple.pyx +35 -35
  67. biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
  68. biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
  69. biotite/sequence/align/permutation.pyx +12 -4
  70. biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
  71. biotite/sequence/align/selector.pyx +52 -54
  72. biotite/sequence/align/statistics.py +32 -33
  73. biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
  74. biotite/sequence/alphabet.py +112 -126
  75. biotite/sequence/annotation.py +78 -77
  76. biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
  77. biotite/sequence/codon.py +90 -79
  78. biotite/sequence/graphics/__init__.py +1 -1
  79. biotite/sequence/graphics/alignment.py +184 -103
  80. biotite/sequence/graphics/colorschemes.py +10 -12
  81. biotite/sequence/graphics/dendrogram.py +79 -34
  82. biotite/sequence/graphics/features.py +133 -99
  83. biotite/sequence/graphics/logo.py +22 -28
  84. biotite/sequence/graphics/plasmid.py +229 -178
  85. biotite/sequence/io/fasta/__init__.py +1 -1
  86. biotite/sequence/io/fasta/convert.py +44 -33
  87. biotite/sequence/io/fasta/file.py +42 -55
  88. biotite/sequence/io/fastq/__init__.py +1 -1
  89. biotite/sequence/io/fastq/convert.py +11 -14
  90. biotite/sequence/io/fastq/file.py +68 -112
  91. biotite/sequence/io/genbank/__init__.py +2 -2
  92. biotite/sequence/io/genbank/annotation.py +12 -20
  93. biotite/sequence/io/genbank/file.py +74 -76
  94. biotite/sequence/io/genbank/metadata.py +74 -62
  95. biotite/sequence/io/genbank/sequence.py +13 -14
  96. biotite/sequence/io/general.py +39 -30
  97. biotite/sequence/io/gff/__init__.py +2 -2
  98. biotite/sequence/io/gff/convert.py +10 -15
  99. biotite/sequence/io/gff/file.py +81 -65
  100. biotite/sequence/phylo/__init__.py +1 -1
  101. biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
  102. biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
  103. biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
  104. biotite/sequence/profile.py +57 -28
  105. biotite/sequence/search.py +17 -15
  106. biotite/sequence/seqtypes.py +200 -164
  107. biotite/sequence/sequence.py +64 -64
  108. biotite/structure/__init__.py +3 -3
  109. biotite/structure/atoms.py +226 -240
  110. biotite/structure/basepairs.py +260 -271
  111. biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
  112. biotite/structure/bonds.pyx +88 -100
  113. biotite/structure/box.py +67 -71
  114. biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
  115. biotite/structure/chains.py +55 -39
  116. biotite/structure/charges.cp310-win_amd64.pyd +0 -0
  117. biotite/structure/compare.py +32 -32
  118. biotite/structure/density.py +13 -18
  119. biotite/structure/dotbracket.py +20 -22
  120. biotite/structure/error.py +10 -2
  121. biotite/structure/filter.py +82 -77
  122. biotite/structure/geometry.py +130 -119
  123. biotite/structure/graphics/atoms.py +60 -43
  124. biotite/structure/graphics/rna.py +81 -68
  125. biotite/structure/hbond.py +112 -93
  126. biotite/structure/info/__init__.py +0 -2
  127. biotite/structure/info/atoms.py +10 -11
  128. biotite/structure/info/bonds.py +41 -43
  129. biotite/structure/info/ccd.py +21 -7
  130. biotite/structure/info/groups.py +10 -15
  131. biotite/structure/info/masses.py +5 -10
  132. biotite/structure/info/misc.py +1 -1
  133. biotite/structure/info/radii.py +20 -20
  134. biotite/structure/info/standardize.py +15 -26
  135. biotite/structure/integrity.py +18 -71
  136. biotite/structure/io/__init__.py +3 -4
  137. biotite/structure/io/dcd/__init__.py +1 -1
  138. biotite/structure/io/dcd/file.py +22 -20
  139. biotite/structure/io/general.py +47 -61
  140. biotite/structure/io/gro/__init__.py +1 -1
  141. biotite/structure/io/gro/file.py +73 -72
  142. biotite/structure/io/mol/__init__.py +1 -1
  143. biotite/structure/io/mol/convert.py +8 -11
  144. biotite/structure/io/mol/ctab.py +37 -36
  145. biotite/structure/io/mol/header.py +14 -10
  146. biotite/structure/io/mol/mol.py +9 -53
  147. biotite/structure/io/mol/sdf.py +47 -50
  148. biotite/structure/io/netcdf/__init__.py +1 -1
  149. biotite/structure/io/netcdf/file.py +24 -23
  150. biotite/structure/io/pdb/__init__.py +1 -1
  151. biotite/structure/io/pdb/convert.py +32 -20
  152. biotite/structure/io/pdb/file.py +151 -172
  153. biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
  154. biotite/structure/io/pdbqt/__init__.py +1 -1
  155. biotite/structure/io/pdbqt/convert.py +17 -11
  156. biotite/structure/io/pdbqt/file.py +128 -80
  157. biotite/structure/io/pdbx/__init__.py +1 -2
  158. biotite/structure/io/pdbx/bcif.py +36 -52
  159. biotite/structure/io/pdbx/cif.py +64 -62
  160. biotite/structure/io/pdbx/component.py +10 -16
  161. biotite/structure/io/pdbx/convert.py +235 -246
  162. biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
  163. biotite/structure/io/trajfile.py +76 -93
  164. biotite/structure/io/trr/__init__.py +1 -1
  165. biotite/structure/io/trr/file.py +12 -15
  166. biotite/structure/io/xtc/__init__.py +1 -1
  167. biotite/structure/io/xtc/file.py +11 -14
  168. biotite/structure/mechanics.py +9 -11
  169. biotite/structure/molecules.py +3 -4
  170. biotite/structure/pseudoknots.py +53 -67
  171. biotite/structure/rdf.py +23 -21
  172. biotite/structure/repair.py +137 -86
  173. biotite/structure/residues.py +26 -16
  174. biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
  175. biotite/structure/{resutil.py → segments.py} +24 -23
  176. biotite/structure/sequence.py +10 -11
  177. biotite/structure/sse.py +100 -119
  178. biotite/structure/superimpose.py +39 -77
  179. biotite/structure/transform.py +97 -71
  180. biotite/structure/util.py +11 -13
  181. biotite/version.py +2 -2
  182. biotite/visualize.py +69 -55
  183. {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/METADATA +6 -6
  184. biotite-1.0.0.dist-info/RECORD +322 -0
  185. {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/WHEEL +1 -1
  186. biotite/structure/io/ctab.py +0 -72
  187. biotite/structure/io/mmtf/__init__.py +0 -21
  188. biotite/structure/io/mmtf/assembly.py +0 -214
  189. biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
  190. biotite/structure/io/mmtf/convertarray.pyx +0 -341
  191. biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
  192. biotite/structure/io/mmtf/convertfile.pyx +0 -501
  193. biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
  194. biotite/structure/io/mmtf/decode.pyx +0 -152
  195. biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
  196. biotite/structure/io/mmtf/encode.pyx +0 -183
  197. biotite/structure/io/mmtf/file.py +0 -233
  198. biotite/structure/io/npz/__init__.py +0 -20
  199. biotite/structure/io/npz/file.py +0 -152
  200. biotite/structure/io/pdbx/legacy.py +0 -267
  201. biotite/structure/io/tng/__init__.py +0 -13
  202. biotite/structure/io/tng/file.py +0 -46
  203. biotite/temp.py +0 -86
  204. biotite-0.41.1.dist-info/RECORD +0 -340
  205. {biotite-0.41.1.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -6,17 +6,16 @@ __name__ = "biotite.sequence"
6
6
  __author__ = "Patrick Kunzmann", "Thomas Nevolianis"
7
7
  __all__ = ["GeneralSequence", "NucleotideSequence", "ProteinSequence"]
8
8
 
9
- from .sequence import Sequence
10
- from .alphabet import LetterAlphabet, AlphabetError, AlphabetMapper
11
9
  import numpy as np
12
- import copy
10
+ from biotite.sequence.alphabet import AlphabetError, AlphabetMapper, LetterAlphabet
11
+ from biotite.sequence.sequence import Sequence
13
12
 
14
13
 
15
14
  class GeneralSequence(Sequence):
16
15
  """
17
16
  This class allows the creation of a sequence with custom
18
17
  :class:`Alphabet` without the need to subclass :class:`Sequence`.
19
-
18
+
20
19
  Parameters
21
20
  ----------
22
21
  alphabet : Alphabet
@@ -27,22 +26,24 @@ class GeneralSequence(Sequence):
27
26
  may also be a :class:`str` object.
28
27
  By default the sequence is empty.
29
28
  """
30
-
29
+
31
30
  def __init__(self, alphabet, sequence=()):
32
31
  self._alphabet = alphabet
33
32
  super().__init__(sequence)
34
33
 
35
34
  def __repr__(self):
36
35
  """Represent GeneralSequence as a string for debugging."""
37
- return f"GeneralSequence(Alphabet({self._alphabet}), " \
38
- f"[{', '.join([repr(symbol) for symbol in self.symbols])}])"
36
+ return (
37
+ f"GeneralSequence(Alphabet({self._alphabet}), "
38
+ f"[{', '.join([repr(symbol) for symbol in self.symbols])}])"
39
+ )
39
40
 
40
41
  def __copy_create__(self):
41
42
  return GeneralSequence(self._alphabet)
42
-
43
+
43
44
  def get_alphabet(self):
44
45
  return self._alphabet
45
-
46
+
46
47
  def as_type(self, sequence):
47
48
  """
48
49
  Convert the :class:`GeneralSequence` into a sequence of another
@@ -58,12 +59,12 @@ class GeneralSequence(Sequence):
58
59
  of this object.
59
60
  The alphabet must equal or extend the alphabet of this
60
61
  object.
61
-
62
+
62
63
  Returns
63
64
  -------
64
65
  sequence : Sequence
65
66
  The input `sequence` with replaced sequence code.
66
-
67
+
67
68
  Raises
68
69
  ------
69
70
  AlphabetError
@@ -78,16 +79,17 @@ class GeneralSequence(Sequence):
78
79
  sequence.code = self.code
79
80
  return sequence
80
81
 
82
+
81
83
  class NucleotideSequence(Sequence):
82
84
  """
83
85
  Representation of a nucleotide sequence (DNA or RNA).
84
-
86
+
85
87
  This class may have one of two different alphabets:
86
88
  :attr:`unambiguous_alphabet()` contains only the unambiguous DNA
87
89
  letters 'A', 'C', 'G' and 'T'.
88
- :attr:`ambiguous_alphabet()` uses an extended alphabet for ambiguous
90
+ :attr:`ambiguous_alphabet()` uses an extended alphabet for ambiguous
89
91
  letters.
90
-
92
+
91
93
  Parameters
92
94
  ----------
93
95
  sequence : iterable object, optional
@@ -100,35 +102,36 @@ class NucleotideSequence(Sequence):
100
102
  ambiguous letters in the sequence, the ambiguous alphabet
101
103
  is used.
102
104
  """
103
-
104
- alphabet_unamb = LetterAlphabet(["A","C","G","T"])
105
- alphabet_amb = LetterAlphabet(
106
- ["A","C","G","T","R","Y","W","S",
107
- "M","K","H","B","V","D","N"]
105
+
106
+ alphabet_unamb = LetterAlphabet(["A", "C", "G", "T"])
107
+ alphabet_amb = LetterAlphabet(
108
+ ["A", "C", "G", "T", "R", "Y", "W", "S", "M", "K", "H", "B", "V", "D", "N"]
108
109
  )
109
-
110
- compl_symbol_dict = {"A" : "T",
111
- "C" : "G",
112
- "G" : "C",
113
- "T" : "A",
114
- "M" : "K",
115
- "R" : "Y",
116
- "W" : "W",
117
- "S" : "S",
118
- "Y" : "R",
119
- "K" : "M",
120
- "V" : "B",
121
- "H" : "D",
122
- "D" : "H",
123
- "B" : "V",
124
- "N" : "N"}
110
+
111
+ compl_symbol_dict = {
112
+ "A": "T",
113
+ "C": "G",
114
+ "G": "C",
115
+ "T": "A",
116
+ "M": "K",
117
+ "R": "Y",
118
+ "W": "W",
119
+ "S": "S",
120
+ "Y": "R",
121
+ "K": "M",
122
+ "V": "B",
123
+ "H": "D",
124
+ "D": "H",
125
+ "B": "V",
126
+ "N": "N",
127
+ }
125
128
  # List comprehension does not work in this scope
126
129
  _compl_symbols = []
127
130
  for _symbol in alphabet_amb.get_symbols():
128
131
  _compl_symbols.append(compl_symbol_dict[_symbol])
129
132
  _compl_alphabet_unamb = LetterAlphabet(_compl_symbols)
130
133
  _compl_mapper = AlphabetMapper(_compl_alphabet_unamb, alphabet_amb)
131
-
134
+
132
135
  def __init__(self, sequence=[], ambiguous=None):
133
136
  if isinstance(sequence, str):
134
137
  sequence = sequence.upper()
@@ -164,28 +167,28 @@ class NucleotideSequence(Sequence):
164
167
  else:
165
168
  seq_copy = NucleotideSequence(ambiguous=False)
166
169
  return seq_copy
167
-
170
+
168
171
  def get_alphabet(self):
169
172
  return self._alphabet
170
-
173
+
171
174
  def complement(self):
172
175
  """
173
176
  Get the complement nucleotide sequence.
174
-
177
+
175
178
  Returns
176
179
  -------
177
180
  complement : NucleotideSequence
178
181
  The complement sequence.
179
-
182
+
180
183
  Examples
181
184
  --------
182
-
185
+
183
186
  >>> dna_seq = NucleotideSequence("ACGCTT")
184
187
  >>> print(dna_seq.complement())
185
188
  TGCGAA
186
189
  >>> print(dna_seq.reverse().complement())
187
190
  AAGCGT
188
-
191
+
189
192
  """
190
193
  # Interpreting the sequence code of this object in the
191
194
  # complementary alphabet gives the complementary symbols
@@ -194,18 +197,18 @@ class NucleotideSequence(Sequence):
194
197
  # alphabet into the original alphabet
195
198
  compl_code = NucleotideSequence._compl_mapper[self.code]
196
199
  return self.copy(compl_code)
197
-
200
+
198
201
  def translate(self, complete=False, codon_table=None, met_start=False):
199
202
  """
200
203
  Translate the nucleotide sequence into a protein sequence.
201
-
204
+
202
205
  If `complete` is true, the entire sequence is translated,
203
206
  beginning with the first codon and ending with the last codon,
204
207
  even if stop codons occur during the translation.
205
-
208
+
206
209
  Otherwise this method returns possible ORFs in the
207
210
  sequence, even if not stop codon occurs in an ORF.
208
-
211
+
209
212
  Parameters
210
213
  ----------
211
214
  complete : bool, optional
@@ -222,7 +225,7 @@ class NucleotideSequence(Sequence):
222
225
  Otherwise the translation starts with the amino acid
223
226
  the codon codes for. Only applies, if `complete` is false.
224
227
  (Default: False)
225
-
228
+
226
229
  Returns
227
230
  -------
228
231
  protein : ProteinSequence or list of ProteinSequence
@@ -233,15 +236,15 @@ class NucleotideSequence(Sequence):
233
236
  pos : list of tuple (int, int)
234
237
  Is only returned if `complete` is false. The list contains
235
238
  a tuple for each ORF.
236
- The first element of the tuple is the index of the
239
+ The first element of the tuple is the index of the
237
240
  :class:`NucleotideSequence`, where the translation starts.
238
241
  The second element is the exclusive stop index, it
239
242
  represents the first nucleotide in the
240
243
  :class:`NucleotideSequence` after a stop codon.
241
-
244
+
242
245
  Examples
243
246
  --------
244
-
247
+
245
248
  >>> dna_seq = NucleotideSequence("AATGATGCTATAGAT")
246
249
  >>> prot_seq = dna_seq.translate(complete=True)
247
250
  >>> print(prot_seq)
@@ -251,29 +254,32 @@ class NucleotideSequence(Sequence):
251
254
  ... print(seq)
252
255
  MML*
253
256
  ML*
254
-
257
+
255
258
  """
256
259
  if self._alphabet != NucleotideSequence.alphabet_unamb:
257
260
  raise AlphabetError("Translation requires unambiguous alphabet")
258
261
  # Determine codon_table
259
262
  if codon_table is None:
260
263
  # Import at this position to avoid circular import
261
- from .codon import CodonTable
264
+ from biotite.sequence.codon import CodonTable
265
+
262
266
  codon_table = CodonTable.default_table()
263
-
267
+
264
268
  if complete:
265
269
  if len(self) % 3 != 0:
266
- raise ValueError("Sequence length needs to be a multiple of 3 "
267
- "for complete translation")
270
+ raise ValueError(
271
+ "Sequence length needs to be a multiple of 3 "
272
+ "for complete translation"
273
+ )
268
274
  # Reshape code into (n,3), with n being the amount of codons
269
275
  codons = self.code.reshape(-1, 3)
270
276
  protein_seq = ProteinSequence()
271
277
  protein_seq.code = codon_table.map_codon_codes(codons)
272
278
  return protein_seq
273
-
279
+
274
280
  else:
275
281
  stop_code = ProteinSequence.alphabet.encode("*")
276
- met_code = ProteinSequence.alphabet.encode("M")
282
+ met_code = ProteinSequence.alphabet.encode("M")
277
283
  protein_seqs = []
278
284
  pos = []
279
285
  code = self.code
@@ -282,7 +288,7 @@ class NucleotideSequence(Sequence):
282
288
  # The frame length is always a multiple of 3
283
289
  # If there is a trailing partial codon, remove it
284
290
  frame_length = ((len(code) - shift) // 3) * 3
285
- frame = code[shift : shift+frame_length]
291
+ frame = code[shift : shift + frame_length]
286
292
  # Reshape frame into (n,3), with n being the amount of codons
287
293
  frame_codons = frame.reshape(-1, 3)
288
294
  # At first, translate frame completely
@@ -297,8 +303,7 @@ class NucleotideSequence(Sequence):
297
303
  stops = np.where(code_from_start == stop_code)[0]
298
304
  # Find first stop codon after start codon
299
305
  # Include stop -> stops[0] + 1
300
- stop_i = stops[0] + 1 if len(stops) > 0 \
301
- else len(code_from_start)
306
+ stop_i = stops[0] + 1 if len(stops) > 0 else len(code_from_start)
302
307
  code_from_start_to_stop = code_from_start[:stop_i]
303
308
  prot_seq = ProteinSequence()
304
309
  if met_start:
@@ -310,13 +315,13 @@ class NucleotideSequence(Sequence):
310
315
  protein_seqs.append(prot_seq)
311
316
  # Codon indices are transformed
312
317
  # to nucleotide sequence indices
313
- pos.append((shift + start_i*3, shift + (start_i+stop_i)*3))
318
+ pos.append((shift + start_i * 3, shift + (start_i + stop_i) * 3))
314
319
  # Sort by start position
315
320
  order = np.argsort([start for start, stop in pos])
316
321
  pos = [pos[i] for i in order]
317
322
  protein_seqs = [protein_seqs[i] for i in order]
318
323
  return protein_seqs, pos
319
-
324
+
320
325
  @staticmethod
321
326
  def unambiguous_alphabet():
322
327
  """
@@ -329,7 +334,7 @@ class NucleotideSequence(Sequence):
329
334
  The unambiguous nucleotide alphabet.
330
335
  """
331
336
  return NucleotideSequence.alphabet_unamb
332
-
337
+
333
338
  @staticmethod
334
339
  def ambiguous_alphabet():
335
340
  """
@@ -348,10 +353,10 @@ class NucleotideSequence(Sequence):
348
353
  class ProteinSequence(Sequence):
349
354
  """
350
355
  Representation of a protein sequence.
351
-
356
+
352
357
  Furthermore this class offers a conversion of amino acids from
353
358
  3-letter code into 1-letter code and vice versa.
354
-
359
+
355
360
  Parameters
356
361
  ----------
357
362
  sequence : iterable object, optional
@@ -359,7 +364,7 @@ class ProteinSequence(Sequence):
359
364
  string. May take upper or lower case letters. If a list is
360
365
  given, the list elements can be 1-letter or 3-letter amino acid
361
366
  representations. By default the sequence is empty.
362
-
367
+
363
368
  Notes
364
369
  -----
365
370
  The :class:`Alphabet` of this :class:`Sequence` class does not
@@ -370,106 +375,138 @@ class ProteinSequence(Sequence):
370
375
  """
371
376
 
372
377
  _codon_table = None
373
-
374
- alphabet = LetterAlphabet(["A","C","D","E","F","G","H","I","K","L",
375
- "M","N","P","Q","R","S","T","V","W","Y",
376
- "B","Z","X","*"])
378
+
379
+ alphabet = LetterAlphabet(
380
+ [
381
+ "A",
382
+ "C",
383
+ "D",
384
+ "E",
385
+ "F",
386
+ "G",
387
+ "H",
388
+ "I",
389
+ "K",
390
+ "L",
391
+ "M",
392
+ "N",
393
+ "P",
394
+ "Q",
395
+ "R",
396
+ "S",
397
+ "T",
398
+ "V",
399
+ "W",
400
+ "Y",
401
+ "B",
402
+ "Z",
403
+ "X",
404
+ "*",
405
+ ]
406
+ )
377
407
 
378
408
  # Masses are taken from
379
409
  # https://web.expasy.org/findmod/findmod_masses.html#AA
380
410
 
381
- _mol_weight_average = np.array([
382
- 71.0788, # A
383
- 103.1388, # C
384
- 115.0886, # D
385
- 129.1155, # E
386
- 147.1766, # F
387
- 57.0519, # G
388
- 137.1411, # H
389
- 113.1594, # I
390
- 128.1741, # K
391
- 113.1594, # L
392
- 131.1926, # M
393
- 114.1038, # N
394
- 97.1167, # P
395
- 128.1307, # Q
396
- 156.1875, # R
397
- 87.0782, # S
398
- 101.1051, # T
399
- 99.1326, # V
400
- 186.2132, # W
401
- 163.1760, # Y
402
- np.nan, # B
403
- np.nan, # Z
404
- np.nan, # X
405
- np.nan, # *
406
- ])
407
-
408
- _mol_weight_monoisotopic = np.array([
409
- 71.03711, # A
410
- 103.00919, # C
411
- 115.02694, # D
412
- 129.04259, # E
413
- 147.06841, # F
414
- 57.02146, # G
415
- 137.05891, # H
416
- 113.08406, # I
417
- 128.09496, # K
418
- 113.08406, # L
419
- 131.04049, # M
420
- 114.04293, # N
421
- 97.05276, # P
422
- 128.05858, # Q
423
- 156.10111, # R
424
- 87.03203, # S
425
- 101.04768, # T
426
- 99.06841, # V
427
- 186.07931, # W
428
- 163.06333, # Y
429
- np.nan, # B
430
- np.nan, # Z
431
- np.nan, # X
432
- np.nan, # *
433
- ])
434
-
435
- _dict_1to3 = {"A" : "ALA",
436
- "C" : "CYS",
437
- "D" : "ASP",
438
- "E" : "GLU",
439
- "F" : "PHE",
440
- "G" : "GLY",
441
- "H" : "HIS",
442
- "I" : "ILE",
443
- "K" : "LYS",
444
- "L" : "LEU",
445
- "M" : "MET",
446
- "N" : "ASN",
447
- "P" : "PRO",
448
- "Q" : "GLN",
449
- "R" : "ARG",
450
- "S" : "SER",
451
- "T" : "THR",
452
- "V" : "VAL",
453
- "W" : "TRP",
454
- "Y" : "TYR",
455
- "B" : "ASX",
456
- "Z" : "GLX",
457
- "X" : "UNK",
458
- "*" : " * "}
459
-
411
+ _mol_weight_average = np.array(
412
+ [
413
+ 71.0788, # A
414
+ 103.1388, # C
415
+ 115.0886, # D
416
+ 129.1155, # E
417
+ 147.1766, # F
418
+ 57.0519, # G
419
+ 137.1411, # H
420
+ 113.1594, # I
421
+ 128.1741, # K
422
+ 113.1594, # L
423
+ 131.1926, # M
424
+ 114.1038, # N
425
+ 97.1167, # P
426
+ 128.1307, # Q
427
+ 156.1875, # R
428
+ 87.0782, # S
429
+ 101.1051, # T
430
+ 99.1326, # V
431
+ 186.2132, # W
432
+ 163.1760, # Y
433
+ np.nan, # B
434
+ np.nan, # Z
435
+ np.nan, # X
436
+ np.nan, # *
437
+ ]
438
+ )
439
+
440
+ _mol_weight_monoisotopic = np.array(
441
+ [
442
+ 71.03711, # A
443
+ 103.00919, # C
444
+ 115.02694, # D
445
+ 129.04259, # E
446
+ 147.06841, # F
447
+ 57.02146, # G
448
+ 137.05891, # H
449
+ 113.08406, # I
450
+ 128.09496, # K
451
+ 113.08406, # L
452
+ 131.04049, # M
453
+ 114.04293, # N
454
+ 97.05276, # P
455
+ 128.05858, # Q
456
+ 156.10111, # R
457
+ 87.03203, # S
458
+ 101.04768, # T
459
+ 99.06841, # V
460
+ 186.07931, # W
461
+ 163.06333, # Y
462
+ np.nan, # B
463
+ np.nan, # Z
464
+ np.nan, # X
465
+ np.nan, # *
466
+ ]
467
+ )
468
+
469
+ _dict_1to3 = {
470
+ "A": "ALA",
471
+ "C": "CYS",
472
+ "D": "ASP",
473
+ "E": "GLU",
474
+ "F": "PHE",
475
+ "G": "GLY",
476
+ "H": "HIS",
477
+ "I": "ILE",
478
+ "K": "LYS",
479
+ "L": "LEU",
480
+ "M": "MET",
481
+ "N": "ASN",
482
+ "P": "PRO",
483
+ "Q": "GLN",
484
+ "R": "ARG",
485
+ "S": "SER",
486
+ "T": "THR",
487
+ "V": "VAL",
488
+ "W": "TRP",
489
+ "Y": "TYR",
490
+ "B": "ASX",
491
+ "Z": "GLX",
492
+ "X": "UNK",
493
+ "*": " * ",
494
+ }
495
+
460
496
  _dict_3to1 = {}
461
497
  for _key, _value in _dict_1to3.items():
462
498
  _dict_3to1[_value] = _key
463
499
  _dict_3to1["SEC"] = "C"
464
500
  _dict_3to1["MSE"] = "M"
465
-
501
+
466
502
  def __init__(self, sequence=()):
467
503
  dict_3to1 = ProteinSequence._dict_3to1
468
- alph = ProteinSequence.alphabet
469
504
  # Convert 3-letter codes to single letter codes,
470
505
  # if list contains 3-letter codes
471
- sequence = [dict_3to1[symbol.upper()] if len(symbol) == 3
472
- else symbol.upper() for symbol in sequence]
506
+ sequence = [
507
+ dict_3to1[symbol.upper()] if len(symbol) == 3 else symbol.upper()
508
+ for symbol in sequence
509
+ ]
473
510
  super().__init__(sequence)
474
511
 
475
512
  def __repr__(self):
@@ -478,11 +515,11 @@ class ProteinSequence(Sequence):
478
515
 
479
516
  def get_alphabet(self):
480
517
  return ProteinSequence.alphabet
481
-
518
+
482
519
  def remove_stops(self):
483
520
  """
484
521
  Remove *stop signals* from the sequence.
485
-
522
+
486
523
  Returns
487
524
  -------
488
525
  no_stop : ProteinSequence
@@ -493,34 +530,34 @@ class ProteinSequence(Sequence):
493
530
  seq_code = no_stop.code
494
531
  no_stop.code = seq_code[seq_code != stop_code]
495
532
  return no_stop
496
-
533
+
497
534
  @staticmethod
498
535
  def convert_letter_3to1(symbol):
499
536
  """
500
537
  Convert a 3-letter to a 1-letter amino acid representation.
501
-
538
+
502
539
  Parameters
503
540
  ----------
504
541
  symbol : string
505
542
  3-letter amino acid representation.
506
-
543
+
507
544
  Returns
508
545
  -------
509
546
  convert : string
510
547
  1-letter amino acid representation.
511
548
  """
512
549
  return ProteinSequence._dict_3to1[symbol.upper()]
513
-
550
+
514
551
  @staticmethod
515
552
  def convert_letter_1to3(symbol):
516
553
  """
517
554
  Convert a 1-letter to a 3-letter amino acid representation.
518
-
555
+
519
556
  Parameters
520
557
  ----------
521
558
  symbol : string
522
559
  1-letter amino acid representation.
523
-
560
+
524
561
  Returns
525
562
  -------
526
563
  convert : string
@@ -531,7 +568,7 @@ class ProteinSequence(Sequence):
531
568
  def get_molecular_weight(self, monoisotopic=False):
532
569
  """
533
570
  Calculate the molecular weight of this protein.
534
-
571
+
535
572
  Average protein molecular weight is calculated by the addition
536
573
  of average isotopic masses of the amino acids
537
574
  in the protein and the average isotopic mass of one water
@@ -550,7 +587,6 @@ class ProteinSequence(Sequence):
550
587
 
551
588
  if np.isnan(weight):
552
589
  raise ValueError(
553
- "Sequence contains ambiguous amino acids, "
554
- "cannot calculate weight"
590
+ "Sequence contains ambiguous amino acids, " "cannot calculate weight"
555
591
  )
556
592
  return weight