PyPI - biotite - Versions diffs - 0.41.2__cp310-cp310-win_amd64.whl → 1.0.0__cp310-cp310-win_amd64.whl - Mend

biotite 0.41.2__cp310-cp310-win_amd64.whl → 1.0.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (205) hide show

biotite/__init__.py +2 -3
biotite/application/__init__.py +1 -1
biotite/application/application.py +20 -10
biotite/application/autodock/__init__.py +1 -1
biotite/application/autodock/app.py +74 -79
biotite/application/blast/__init__.py +1 -1
biotite/application/blast/alignment.py +19 -10
biotite/application/blast/webapp.py +92 -85
biotite/application/clustalo/__init__.py +1 -1
biotite/application/clustalo/app.py +46 -61
biotite/application/dssp/__init__.py +1 -1
biotite/application/dssp/app.py +8 -11
biotite/application/localapp.py +62 -60
biotite/application/mafft/__init__.py +1 -1
biotite/application/mafft/app.py +16 -22
biotite/application/msaapp.py +78 -89
biotite/application/muscle/__init__.py +1 -1
biotite/application/muscle/app3.py +50 -64
biotite/application/muscle/app5.py +23 -31
biotite/application/sra/__init__.py +1 -1
biotite/application/sra/app.py +64 -68
biotite/application/tantan/__init__.py +1 -1
biotite/application/tantan/app.py +22 -45
biotite/application/util.py +7 -9
biotite/application/viennarna/rnaalifold.py +34 -28
biotite/application/viennarna/rnafold.py +24 -39
biotite/application/viennarna/rnaplot.py +36 -21
biotite/application/viennarna/util.py +17 -12
biotite/application/webapp.py +13 -14
biotite/copyable.py +13 -13
biotite/database/__init__.py +1 -1
biotite/database/entrez/__init__.py +1 -1
biotite/database/entrez/check.py +2 -3
biotite/database/entrez/dbnames.py +7 -5
biotite/database/entrez/download.py +55 -49
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +62 -23
biotite/database/error.py +2 -1
biotite/database/pubchem/__init__.py +1 -1
biotite/database/pubchem/download.py +43 -45
biotite/database/pubchem/error.py +2 -2
biotite/database/pubchem/query.py +34 -31
biotite/database/pubchem/throttle.py +3 -4
biotite/database/rcsb/__init__.py +1 -1
biotite/database/rcsb/download.py +44 -52
biotite/database/rcsb/query.py +85 -80
biotite/database/uniprot/check.py +6 -3
biotite/database/uniprot/download.py +6 -11
biotite/database/uniprot/query.py +115 -31
biotite/file.py +12 -31
biotite/sequence/__init__.py +3 -3
biotite/sequence/align/__init__.py +2 -2
biotite/sequence/align/alignment.py +99 -90
biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/buckets.py +12 -10
biotite/sequence/align/cigar.py +43 -52
biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +55 -51
biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +3 -2
biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/matrix.py +81 -82
biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/multiple.pyx +1 -1
biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/permutation.pyx +12 -4
biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
biotite/sequence/align/selector.pyx +52 -54
biotite/sequence/align/statistics.py +32 -33
biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +51 -65
biotite/sequence/annotation.py +78 -77
biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
biotite/sequence/codon.py +90 -79
biotite/sequence/graphics/__init__.py +1 -1
biotite/sequence/graphics/alignment.py +184 -103
biotite/sequence/graphics/colorschemes.py +10 -12
biotite/sequence/graphics/dendrogram.py +79 -34
biotite/sequence/graphics/features.py +133 -99
biotite/sequence/graphics/logo.py +22 -28
biotite/sequence/graphics/plasmid.py +229 -178
biotite/sequence/io/fasta/__init__.py +1 -1
biotite/sequence/io/fasta/convert.py +44 -33
biotite/sequence/io/fasta/file.py +42 -55
biotite/sequence/io/fastq/__init__.py +1 -1
biotite/sequence/io/fastq/convert.py +11 -14
biotite/sequence/io/fastq/file.py +68 -112
biotite/sequence/io/genbank/__init__.py +2 -2
biotite/sequence/io/genbank/annotation.py +12 -20
biotite/sequence/io/genbank/file.py +74 -76
biotite/sequence/io/genbank/metadata.py +74 -62
biotite/sequence/io/genbank/sequence.py +13 -14
biotite/sequence/io/general.py +39 -30
biotite/sequence/io/gff/__init__.py +2 -2
biotite/sequence/io/gff/convert.py +10 -15
biotite/sequence/io/gff/file.py +81 -65
biotite/sequence/phylo/__init__.py +1 -1
biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
biotite/sequence/profile.py +57 -28
biotite/sequence/search.py +17 -15
biotite/sequence/seqtypes.py +200 -164
biotite/sequence/sequence.py +15 -17
biotite/structure/__init__.py +3 -3
biotite/structure/atoms.py +221 -235
biotite/structure/basepairs.py +260 -271
biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +29 -32
biotite/structure/box.py +67 -71
biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
biotite/structure/chains.py +55 -39
biotite/structure/charges.cp310-win_amd64.pyd +0 -0
biotite/structure/compare.py +32 -32
biotite/structure/density.py +13 -18
biotite/structure/dotbracket.py +20 -22
biotite/structure/error.py +10 -2
biotite/structure/filter.py +82 -77
biotite/structure/geometry.py +130 -119
biotite/structure/graphics/atoms.py +60 -43
biotite/structure/graphics/rna.py +81 -68
biotite/structure/hbond.py +112 -93
biotite/structure/info/__init__.py +0 -2
biotite/structure/info/atoms.py +10 -11
biotite/structure/info/bonds.py +41 -43
biotite/structure/info/ccd.py +4 -5
biotite/structure/info/groups.py +1 -3
biotite/structure/info/masses.py +5 -10
biotite/structure/info/misc.py +1 -1
biotite/structure/info/radii.py +20 -20
biotite/structure/info/standardize.py +15 -26
biotite/structure/integrity.py +18 -71
biotite/structure/io/__init__.py +3 -4
biotite/structure/io/dcd/__init__.py +1 -1
biotite/structure/io/dcd/file.py +22 -20
biotite/structure/io/general.py +47 -61
biotite/structure/io/gro/__init__.py +1 -1
biotite/structure/io/gro/file.py +73 -72
biotite/structure/io/mol/__init__.py +1 -1
biotite/structure/io/mol/convert.py +8 -11
biotite/structure/io/mol/ctab.py +37 -36
biotite/structure/io/mol/header.py +14 -10
biotite/structure/io/mol/mol.py +9 -53
biotite/structure/io/mol/sdf.py +47 -50
biotite/structure/io/netcdf/__init__.py +1 -1
biotite/structure/io/netcdf/file.py +24 -23
biotite/structure/io/pdb/__init__.py +1 -1
biotite/structure/io/pdb/convert.py +32 -20
biotite/structure/io/pdb/file.py +151 -172
biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/__init__.py +1 -1
biotite/structure/io/pdbqt/convert.py +17 -11
biotite/structure/io/pdbqt/file.py +128 -80
biotite/structure/io/pdbx/__init__.py +1 -2
biotite/structure/io/pdbx/bcif.py +36 -44
biotite/structure/io/pdbx/cif.py +64 -62
biotite/structure/io/pdbx/component.py +10 -16
biotite/structure/io/pdbx/convert.py +235 -246
biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
biotite/structure/io/trajfile.py +76 -93
biotite/structure/io/trr/__init__.py +1 -1
biotite/structure/io/trr/file.py +12 -15
biotite/structure/io/xtc/__init__.py +1 -1
biotite/structure/io/xtc/file.py +11 -14
biotite/structure/mechanics.py +9 -11
biotite/structure/molecules.py +3 -4
biotite/structure/pseudoknots.py +53 -67
biotite/structure/rdf.py +23 -21
biotite/structure/repair.py +137 -86
biotite/structure/residues.py +26 -16
biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
biotite/structure/{resutil.py → segments.py} +24 -23
biotite/structure/sequence.py +10 -11
biotite/structure/sse.py +100 -119
biotite/structure/superimpose.py +39 -77
biotite/structure/transform.py +97 -71
biotite/structure/util.py +11 -13
biotite/version.py +2 -2
biotite/visualize.py +69 -55
{biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
biotite-1.0.0.dist-info/RECORD +322 -0
biotite/structure/io/ctab.py +0 -72
biotite/structure/io/mmtf/__init__.py +0 -21
biotite/structure/io/mmtf/assembly.py +0 -214
biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertarray.pyx +0 -341
biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/convertfile.pyx +0 -501
biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/decode.pyx +0 -152
biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
biotite/structure/io/mmtf/encode.pyx +0 -183
biotite/structure/io/mmtf/file.py +0 -233
biotite/structure/io/npz/__init__.py +0 -20
biotite/structure/io/npz/file.py +0 -152
biotite/structure/io/pdbx/legacy.py +0 -267
biotite/structure/io/tng/__init__.py +0 -13
biotite/structure/io/tng/file.py +0 -46
biotite/temp.py +0 -86
biotite-0.41.2.dist-info/RECORD +0 -340
{biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
{biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/sequence/seqtypes.py CHANGED Viewed

@@ -6,17 +6,16 @@ __name__ = "biotite.sequence"
 __author__ = "Patrick Kunzmann", "Thomas Nevolianis"
 __all__ = ["GeneralSequence", "NucleotideSequence", "ProteinSequence"]
-from .sequence import Sequence
-from .alphabet import LetterAlphabet, AlphabetError, AlphabetMapper
 import numpy as np
-import copy
+from biotite.sequence.alphabet import AlphabetError, AlphabetMapper, LetterAlphabet
+from biotite.sequence.sequence import Sequence
 class GeneralSequence(Sequence):
     """
     This class allows the creation of a sequence with custom
     :class:`Alphabet` without the need to subclass :class:`Sequence`.
     Parameters
     ----------
     alphabet : Alphabet
@@ -27,22 +26,24 @@ class GeneralSequence(Sequence):
         may also be a :class:`str` object.
         By default the sequence is empty.
     """
     def __init__(self, alphabet, sequence=()):
         self._alphabet = alphabet
         super().__init__(sequence)
     def __repr__(self):
         """Represent GeneralSequence as a string for debugging."""
-        return f"GeneralSequence(Alphabet({self._alphabet}), " \
-               f"[{', '.join([repr(symbol) for symbol in self.symbols])}])"
+        return (
+            f"GeneralSequence(Alphabet({self._alphabet}), "
+            f"[{', '.join([repr(symbol) for symbol in self.symbols])}])"
+        )
     def __copy_create__(self):
         return GeneralSequence(self._alphabet)
     def get_alphabet(self):
         return self._alphabet
     def as_type(self, sequence):
         """
         Convert the :class:`GeneralSequence` into a sequence of another
@@ -58,12 +59,12 @@ class GeneralSequence(Sequence):
             of this object.
             The alphabet must equal or extend the alphabet of this
             object.
         Returns
         -------
         sequence : Sequence
             The input `sequence` with replaced sequence code.
         Raises
         ------
         AlphabetError
@@ -78,16 +79,17 @@ class GeneralSequence(Sequence):
         sequence.code = self.code
         return sequence
 class NucleotideSequence(Sequence):
     """
     Representation of a nucleotide sequence (DNA or RNA).
     This class may have one of two different alphabets:
     :attr:`unambiguous_alphabet()` contains only the unambiguous DNA
     letters 'A', 'C', 'G' and 'T'.
-    :attr:`ambiguous_alphabet()` uses an extended alphabet for ambiguous
+    :attr:`ambiguous_alphabet()` uses an extended alphabet for ambiguous
     letters.
     Parameters
     ----------
     sequence : iterable object, optional
@@ -100,35 +102,36 @@ class NucleotideSequence(Sequence):
         ambiguous letters in the sequence, the ambiguous alphabet
         is used.
     """
-    alphabet_unamb = LetterAlphabet(["A","C","G","T"])
-    alphabet_amb   = LetterAlphabet(
-        ["A","C","G","T","R","Y","W","S",
-         "M","K","H","B","V","D","N"]
+    alphabet_unamb = LetterAlphabet(["A", "C", "G", "T"])
+    alphabet_amb = LetterAlphabet(
+        ["A", "C", "G", "T", "R", "Y", "W", "S", "M", "K", "H", "B", "V", "D", "N"]
     )
-    compl_symbol_dict = {"A" : "T",
-                         "C" : "G",
-                         "G" : "C",
-                         "T" : "A",
-                         "M" : "K",
-                         "R" : "Y",
-                         "W" : "W",
-                         "S" : "S",
-                         "Y" : "R",
-                         "K" : "M",
-                         "V" : "B",
-                         "H" : "D",
-                         "D" : "H",
-                         "B" : "V",
-                         "N" : "N"}
+    compl_symbol_dict = {
+        "A": "T",
+        "C": "G",
+        "G": "C",
+        "T": "A",
+        "M": "K",
+        "R": "Y",
+        "W": "W",
+        "S": "S",
+        "Y": "R",
+        "K": "M",
+        "V": "B",
+        "H": "D",
+        "D": "H",
+        "B": "V",
+        "N": "N",
+    }
     # List comprehension does not work in this scope
     _compl_symbols = []
     for _symbol in alphabet_amb.get_symbols():
         _compl_symbols.append(compl_symbol_dict[_symbol])
     _compl_alphabet_unamb = LetterAlphabet(_compl_symbols)
     _compl_mapper = AlphabetMapper(_compl_alphabet_unamb, alphabet_amb)
     def __init__(self, sequence=[], ambiguous=None):
         if isinstance(sequence, str):
             sequence = sequence.upper()
@@ -164,28 +167,28 @@ class NucleotideSequence(Sequence):
         else:
             seq_copy = NucleotideSequence(ambiguous=False)
         return seq_copy
     def get_alphabet(self):
         return self._alphabet
     def complement(self):
         """
         Get the complement nucleotide sequence.
         Returns
         -------
         complement : NucleotideSequence
             The complement sequence.
         Examples
         --------
         >>> dna_seq = NucleotideSequence("ACGCTT")
         >>> print(dna_seq.complement())
         TGCGAA
         >>> print(dna_seq.reverse().complement())
         AAGCGT
         """
         # Interpreting the sequence code of this object in the
         # complementary alphabet gives the complementary symbols
@@ -194,18 +197,18 @@ class NucleotideSequence(Sequence):
         # alphabet into the original alphabet
         compl_code = NucleotideSequence._compl_mapper[self.code]
         return self.copy(compl_code)
     def translate(self, complete=False, codon_table=None, met_start=False):
         """
         Translate the nucleotide sequence into a protein sequence.
         If `complete` is true, the entire sequence is translated,
         beginning with the first codon and ending with the last codon,
         even if stop codons occur during the translation.
         Otherwise this method returns possible ORFs in the
         sequence, even if not stop codon occurs in an ORF.
         Parameters
         ----------
         complete : bool, optional
@@ -222,7 +225,7 @@ class NucleotideSequence(Sequence):
             Otherwise the translation starts with the amino acid
             the codon codes for. Only applies, if `complete` is false.
             (Default: False)
         Returns
         -------
         protein : ProteinSequence or list of ProteinSequence
@@ -233,15 +236,15 @@ class NucleotideSequence(Sequence):
         pos : list of tuple (int, int)
             Is only returned if `complete` is false. The list contains
             a tuple for each ORF.
-            The first element of the tuple is the index of the
+            The first element of the tuple is the index of the
             :class:`NucleotideSequence`, where the translation starts.
             The second element is the exclusive stop index, it
             represents the first nucleotide in the
             :class:`NucleotideSequence` after a stop codon.
         Examples
         --------
         >>> dna_seq = NucleotideSequence("AATGATGCTATAGAT")
         >>> prot_seq = dna_seq.translate(complete=True)
         >>> print(prot_seq)
@@ -251,29 +254,32 @@ class NucleotideSequence(Sequence):
         ...    print(seq)
         MML*
         ML*
         """
         if self._alphabet != NucleotideSequence.alphabet_unamb:
             raise AlphabetError("Translation requires unambiguous alphabet")
         # Determine codon_table
         if codon_table is None:
             # Import at this position to avoid circular import
-            from .codon import CodonTable
+            from biotite.sequence.codon import CodonTable
             codon_table = CodonTable.default_table()
         if complete:
             if len(self) % 3 != 0:
-                raise ValueError("Sequence length needs to be a multiple of 3 "
-                                 "for complete translation")
+                raise ValueError(
+                    "Sequence length needs to be a multiple of 3 "
+                    "for complete translation"
+                )
             # Reshape code into (n,3), with n being the amount of codons
             codons = self.code.reshape(-1, 3)
             protein_seq = ProteinSequence()
             protein_seq.code = codon_table.map_codon_codes(codons)
             return protein_seq
         else:
             stop_code = ProteinSequence.alphabet.encode("*")
-            met_code  = ProteinSequence.alphabet.encode("M")
+            met_code = ProteinSequence.alphabet.encode("M")
             protein_seqs = []
             pos = []
             code = self.code
@@ -282,7 +288,7 @@ class NucleotideSequence(Sequence):
                 # The frame length is always a multiple of 3
                 # If there is a trailing partial codon, remove it
                 frame_length = ((len(code) - shift) // 3) * 3
-                frame = code[shift : shift+frame_length]
+                frame = code[shift : shift + frame_length]
                 # Reshape frame into (n,3), with n being the amount of codons
                 frame_codons = frame.reshape(-1, 3)
                 # At first, translate frame completely
@@ -297,8 +303,7 @@ class NucleotideSequence(Sequence):
                     stops = np.where(code_from_start == stop_code)[0]
                     # Find first stop codon after start codon
                     # Include stop -> stops[0] + 1
-                    stop_i = stops[0] + 1 if len(stops) > 0 \
-                             else len(code_from_start)
+                    stop_i = stops[0] + 1 if len(stops) > 0 else len(code_from_start)
                     code_from_start_to_stop = code_from_start[:stop_i]
                     prot_seq = ProteinSequence()
                     if met_start:
@@ -310,13 +315,13 @@ class NucleotideSequence(Sequence):
                     protein_seqs.append(prot_seq)
                     # Codon indices are transformed
                     # to nucleotide sequence indices
-                    pos.append((shift + start_i*3, shift + (start_i+stop_i)*3))
+                    pos.append((shift + start_i * 3, shift + (start_i + stop_i) * 3))
             # Sort by start position
             order = np.argsort([start for start, stop in pos])
             pos = [pos[i] for i in order]
             protein_seqs = [protein_seqs[i] for i in order]
             return protein_seqs, pos
     @staticmethod
     def unambiguous_alphabet():
         """
@@ -329,7 +334,7 @@ class NucleotideSequence(Sequence):
             The unambiguous nucleotide alphabet.
         """
         return NucleotideSequence.alphabet_unamb
     @staticmethod
     def ambiguous_alphabet():
         """
@@ -348,10 +353,10 @@ class NucleotideSequence(Sequence):
 class ProteinSequence(Sequence):
     """
     Representation of a protein sequence.
     Furthermore this class offers a conversion of amino acids from
     3-letter code into 1-letter code and vice versa.
     Parameters
     ----------
     sequence : iterable object, optional
@@ -359,7 +364,7 @@ class ProteinSequence(Sequence):
         string. May take upper or lower case letters. If a list is
         given, the list elements can be 1-letter or 3-letter amino acid
         representations. By default the sequence is empty.
     Notes
     -----
     The :class:`Alphabet` of this :class:`Sequence` class does not
@@ -370,106 +375,138 @@ class ProteinSequence(Sequence):
     """
     _codon_table = None
-    alphabet = LetterAlphabet(["A","C","D","E","F","G","H","I","K","L",
-                               "M","N","P","Q","R","S","T","V","W","Y",
-                               "B","Z","X","*"])
+    alphabet = LetterAlphabet(
+        [
+            "A",
+            "C",
+            "D",
+            "E",
+            "F",
+            "G",
+            "H",
+            "I",
+            "K",
+            "L",
+            "M",
+            "N",
+            "P",
+            "Q",
+            "R",
+            "S",
+            "T",
+            "V",
+            "W",
+            "Y",
+            "B",
+            "Z",
+            "X",
+            "*",
+        ]
+    )
     # Masses are taken from
     # https://web.expasy.org/findmod/findmod_masses.html#AA
-    _mol_weight_average = np.array([
-         71.0788,  # A
-        103.1388,  # C
-        115.0886,  # D
-        129.1155,  # E
-        147.1766,  # F
-         57.0519,  # G
-        137.1411,  # H
-        113.1594,  # I
-        128.1741,  # K
-        113.1594,  # L
-        131.1926,  # M
-        114.1038,  # N
-         97.1167,  # P
-        128.1307,  # Q
-        156.1875,  # R
-         87.0782,  # S
-        101.1051,  # T
-         99.1326,  # V
-        186.2132,  # W
-        163.1760,  # Y
-          np.nan,  # B
-          np.nan,  # Z
-          np.nan,  # X
-          np.nan,  # *
-    ])
-    _mol_weight_monoisotopic = np.array([
-         71.03711,  # A
-        103.00919,  # C
-        115.02694,  # D
-        129.04259,  # E
-        147.06841,  # F
-         57.02146,  # G
-        137.05891,  # H
-        113.08406,  # I
-        128.09496,  # K
-        113.08406,  # L
-        131.04049,  # M
-        114.04293,  # N
-         97.05276,  # P
-        128.05858,  # Q
-        156.10111,  # R
-         87.03203,  # S
-        101.04768,  # T
-         99.06841,  # V
-        186.07931,  # W
-        163.06333,  # Y
-        np.nan,  # B
-        np.nan,  # Z
-        np.nan,  # X
-        np.nan,  # *
-    ])
-    _dict_1to3 = {"A" : "ALA",
-                  "C" : "CYS",
-                  "D" : "ASP",
-                  "E" : "GLU",
-                  "F" : "PHE",
-                  "G" : "GLY",
-                  "H" : "HIS",
-                  "I" : "ILE",
-                  "K" : "LYS",
-                  "L" : "LEU",
-                  "M" : "MET",
-                  "N" : "ASN",
-                  "P" : "PRO",
-                  "Q" : "GLN",
-                  "R" : "ARG",
-                  "S" : "SER",
-                  "T" : "THR",
-                  "V" : "VAL",
-                  "W" : "TRP",
-                  "Y" : "TYR",
-                  "B" : "ASX",
-                  "Z" : "GLX",
-                  "X" : "UNK",
-                  "*" : " * "}
+    _mol_weight_average = np.array(
+        [
+            71.0788,  # A
+            103.1388,  # C
+            115.0886,  # D
+            129.1155,  # E
+            147.1766,  # F
+            57.0519,  # G
+            137.1411,  # H
+            113.1594,  # I
+            128.1741,  # K
+            113.1594,  # L
+            131.1926,  # M
+            114.1038,  # N
+            97.1167,  # P
+            128.1307,  # Q
+            156.1875,  # R
+            87.0782,  # S
+            101.1051,  # T
+            99.1326,  # V
+            186.2132,  # W
+            163.1760,  # Y
+            np.nan,  # B
+            np.nan,  # Z
+            np.nan,  # X
+            np.nan,  # *
+        ]
+    )
+    _mol_weight_monoisotopic = np.array(
+        [
+            71.03711,  # A
+            103.00919,  # C
+            115.02694,  # D
+            129.04259,  # E
+            147.06841,  # F
+            57.02146,  # G
+            137.05891,  # H
+            113.08406,  # I
+            128.09496,  # K
+            113.08406,  # L
+            131.04049,  # M
+            114.04293,  # N
+            97.05276,  # P
+            128.05858,  # Q
+            156.10111,  # R
+            87.03203,  # S
+            101.04768,  # T
+            99.06841,  # V
+            186.07931,  # W
+            163.06333,  # Y
+            np.nan,  # B
+            np.nan,  # Z
+            np.nan,  # X
+            np.nan,  # *
+        ]
+    )
+    _dict_1to3 = {
+        "A": "ALA",
+        "C": "CYS",
+        "D": "ASP",
+        "E": "GLU",
+        "F": "PHE",
+        "G": "GLY",
+        "H": "HIS",
+        "I": "ILE",
+        "K": "LYS",
+        "L": "LEU",
+        "M": "MET",
+        "N": "ASN",
+        "P": "PRO",
+        "Q": "GLN",
+        "R": "ARG",
+        "S": "SER",
+        "T": "THR",
+        "V": "VAL",
+        "W": "TRP",
+        "Y": "TYR",
+        "B": "ASX",
+        "Z": "GLX",
+        "X": "UNK",
+        "*": " * ",
+    }
     _dict_3to1 = {}
     for _key, _value in _dict_1to3.items():
         _dict_3to1[_value] = _key
     _dict_3to1["SEC"] = "C"
     _dict_3to1["MSE"] = "M"
     def __init__(self, sequence=()):
         dict_3to1 = ProteinSequence._dict_3to1
-        alph = ProteinSequence.alphabet
         # Convert 3-letter codes to single letter codes,
         # if list contains 3-letter codes
-        sequence = [dict_3to1[symbol.upper()] if len(symbol) == 3
-                    else symbol.upper() for symbol in sequence]
+        sequence = [
+            dict_3to1[symbol.upper()] if len(symbol) == 3 else symbol.upper()
+            for symbol in sequence
+        ]
         super().__init__(sequence)
     def __repr__(self):
@@ -478,11 +515,11 @@ class ProteinSequence(Sequence):
     def get_alphabet(self):
         return ProteinSequence.alphabet
     def remove_stops(self):
         """
         Remove *stop signals* from the sequence.
         Returns
         -------
         no_stop : ProteinSequence
@@ -493,34 +530,34 @@ class ProteinSequence(Sequence):
         seq_code = no_stop.code
         no_stop.code = seq_code[seq_code != stop_code]
         return no_stop
     @staticmethod
     def convert_letter_3to1(symbol):
         """
         Convert a 3-letter to a 1-letter amino acid representation.
         Parameters
         ----------
         symbol : string
             3-letter amino acid representation.
         Returns
         -------
         convert : string
             1-letter amino acid representation.
         """
         return ProteinSequence._dict_3to1[symbol.upper()]
     @staticmethod
     def convert_letter_1to3(symbol):
         """
         Convert a 1-letter to a 3-letter amino acid representation.
         Parameters
         ----------
         symbol : string
             1-letter amino acid representation.
         Returns
         -------
         convert : string
@@ -531,7 +568,7 @@ class ProteinSequence(Sequence):
     def get_molecular_weight(self, monoisotopic=False):
         """
         Calculate the molecular weight of this protein.
         Average protein molecular weight is calculated by the addition
         of average isotopic masses of the amino acids
         in the protein and the average isotopic mass of one water
@@ -550,7 +587,6 @@ class ProteinSequence(Sequence):
         if np.isnan(weight):
             raise ValueError(
-                "Sequence contains ambiguous amino acids, "
-                "cannot calculate weight"
+                "Sequence contains ambiguous amino acids, " "cannot calculate weight"
             )
         return weight