PyPI - biotite - Versions diffs - 1.1.0__cp312-cp312-win_amd64.whl → 1.2.0__cp312-cp312-win_amd64.whl - Mend

biotite 1.1.0__cp312-cp312-win_amd64.whl → 1.2.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (155) hide show

biotite/application/application.py +3 -3
biotite/application/autodock/app.py +1 -1
biotite/application/blast/webapp.py +1 -1
biotite/application/clustalo/app.py +1 -1
biotite/application/localapp.py +2 -2
biotite/application/msaapp.py +10 -10
biotite/application/muscle/app3.py +3 -3
biotite/application/muscle/app5.py +3 -3
biotite/application/sra/app.py +0 -5
biotite/application/util.py +21 -1
biotite/application/viennarna/rnaalifold.py +8 -8
biotite/application/viennarna/rnaplot.py +3 -1
biotite/application/viennarna/util.py +1 -1
biotite/application/webapp.py +1 -1
biotite/database/afdb/__init__.py +12 -0
biotite/database/afdb/download.py +191 -0
biotite/database/entrez/dbnames.py +10 -0
biotite/database/entrez/download.py +9 -10
biotite/database/entrez/key.py +1 -1
biotite/database/entrez/query.py +5 -4
biotite/database/pubchem/download.py +6 -6
biotite/database/pubchem/error.py +10 -0
biotite/database/pubchem/query.py +12 -23
biotite/database/rcsb/download.py +3 -2
biotite/database/rcsb/query.py +2 -3
biotite/database/uniprot/check.py +2 -2
biotite/database/uniprot/download.py +2 -5
biotite/database/uniprot/query.py +3 -4
biotite/file.py +14 -2
biotite/interface/__init__.py +19 -0
biotite/interface/openmm/__init__.py +16 -0
biotite/interface/openmm/state.py +93 -0
biotite/interface/openmm/system.py +227 -0
biotite/interface/pymol/__init__.py +198 -0
biotite/interface/pymol/cgo.py +346 -0
biotite/interface/pymol/convert.py +185 -0
biotite/interface/pymol/display.py +267 -0
biotite/interface/pymol/object.py +1226 -0
biotite/interface/pymol/shapes.py +178 -0
biotite/interface/pymol/startup.py +169 -0
biotite/interface/rdkit/__init__.py +15 -0
biotite/interface/rdkit/mol.py +490 -0
biotite/interface/version.py +71 -0
biotite/interface/warning.py +19 -0
biotite/sequence/align/__init__.py +0 -4
biotite/sequence/align/alignment.py +33 -11
biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/banded.pyx +21 -21
biotite/sequence/align/cigar.py +2 -2
biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmeralphabet.pyx +2 -2
biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/kmertable.pyx +6 -6
biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localgapped.pyx +47 -47
biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/localungapped.pyx +10 -10
biotite/sequence/align/matrix.py +12 -3
biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/pairwise.pyx +35 -35
biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
biotite/sequence/align/selector.pyx +2 -2
biotite/sequence/align/statistics.py +1 -1
biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
biotite/sequence/alphabet.py +2 -2
biotite/sequence/annotation.py +19 -13
biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
biotite/sequence/codon.py +1 -2
biotite/sequence/graphics/alignment.py +25 -39
biotite/sequence/graphics/dendrogram.py +4 -2
biotite/sequence/graphics/features.py +2 -2
biotite/sequence/graphics/logo.py +10 -12
biotite/sequence/io/fasta/convert.py +1 -2
biotite/sequence/io/fasta/file.py +1 -1
biotite/sequence/io/fastq/file.py +3 -3
biotite/sequence/io/genbank/file.py +3 -3
biotite/sequence/io/genbank/sequence.py +2 -0
biotite/sequence/io/gff/convert.py +1 -1
biotite/sequence/io/gff/file.py +1 -2
biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
biotite/sequence/profile.py +19 -25
biotite/sequence/search.py +0 -1
biotite/sequence/seqtypes.py +12 -5
biotite/sequence/sequence.py +1 -2
biotite/structure/__init__.py +2 -0
biotite/structure/alphabet/i3d.py +1 -2
biotite/structure/alphabet/pb.py +1 -2
biotite/structure/alphabet/unkerasify.py +8 -2
biotite/structure/atoms.py +35 -27
biotite/structure/basepairs.py +26 -26
biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
biotite/structure/bonds.pyx +8 -5
biotite/structure/box.py +19 -21
biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
biotite/structure/celllist.pyx +83 -67
biotite/structure/chains.py +5 -37
biotite/structure/charges.cp312-win_amd64.pyd +0 -0
biotite/structure/compare.py +420 -13
biotite/structure/density.py +1 -1
biotite/structure/dotbracket.py +27 -28
biotite/structure/filter.py +8 -8
biotite/structure/geometry.py +15 -15
biotite/structure/hbond.py +17 -19
biotite/structure/info/atoms.py +11 -2
biotite/structure/info/ccd.py +0 -2
biotite/structure/info/components.bcif +0 -0
biotite/structure/info/groups.py +0 -3
biotite/structure/info/misc.py +0 -1
biotite/structure/info/radii.py +92 -22
biotite/structure/info/standardize.py +1 -2
biotite/structure/integrity.py +4 -6
biotite/structure/io/general.py +2 -2
biotite/structure/io/gro/file.py +8 -9
biotite/structure/io/mol/convert.py +1 -1
biotite/structure/io/mol/ctab.py +33 -28
biotite/structure/io/mol/mol.py +1 -1
biotite/structure/io/mol/sdf.py +39 -13
biotite/structure/io/pdb/convert.py +2 -3
biotite/structure/io/pdb/file.py +11 -22
biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
biotite/structure/io/pdbqt/file.py +4 -4
biotite/structure/io/pdbx/bcif.py +22 -7
biotite/structure/io/pdbx/cif.py +20 -7
biotite/structure/io/pdbx/component.py +6 -0
biotite/structure/io/pdbx/compress.py +2 -2
biotite/structure/io/pdbx/convert.py +222 -33
biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
biotite/structure/io/trajfile.py +9 -6
biotite/structure/io/util.py +38 -0
biotite/structure/mechanics.py +0 -1
biotite/structure/molecules.py +0 -15
biotite/structure/pseudoknots.py +7 -13
biotite/structure/repair.py +2 -4
biotite/structure/residues.py +13 -24
biotite/structure/rings.py +335 -0
biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
biotite/structure/sasa.pyx +2 -1
biotite/structure/segments.py +68 -9
biotite/structure/sequence.py +0 -1
biotite/structure/sse.py +0 -2
biotite/structure/superimpose.py +74 -62
biotite/structure/tm.py +581 -0
biotite/structure/transform.py +12 -25
biotite/structure/util.py +3 -3
biotite/version.py +9 -4
biotite/visualize.py +111 -1
{biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/METADATA +5 -3
{biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/RECORD +155 -135
{biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
{biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0

biotite/sequence/graphics/logo.py CHANGED Viewed

@@ -9,7 +9,7 @@ __all__ = ["plot_sequence_logo"]
 import numpy as np
 from biotite.sequence.alphabet import LetterAlphabet
 from biotite.sequence.graphics.colorschemes import get_color_scheme
-from biotite.visualize import set_font_size_in_coord
+from biotite.visualize import plot_scaled_text
 def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
@@ -29,7 +29,7 @@ def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
     ----------
     axes : Axes
         The axes to draw the logo one.
-    profile: SequenceProfile
+    profile : SequenceProfile
         The logo is created based on this profile.
     scheme : str or list of (tuple or str)
         Either a valid color scheme name
@@ -38,7 +38,8 @@ def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
         The list length must be at least as long as the
         length of the alphabet used by the `profile`.
     **kwargs
-        Additional `text parameters <https://matplotlib.org/api/text_api.html#matplotlib.text.Text>`_.
+        Additional parameters for the :class:`matplotlib.font_manager.FontProperties`
+        of the text or the created :class:`matplotlib.patches.PathPatch`.
     References
     ----------
@@ -69,23 +70,20 @@ def plot_sequence_logo(axes, profile, scheme=None, **kwargs):
         index_order = np.argsort(symbols_heights)
         start_height = 0
         for j in index_order[i]:
-            # Stack the symbols at position on top of the preceeding one
+            # Stack the symbols at position on top of the preceding one
             height = symbols_heights[i, j]
             if height > 0:
                 symbol = alphabet.decode(j)
-                text = axes.text(
+                plot_scaled_text(
+                    axes,
+                    symbol,
                     i + 0.5,
                     start_height,
-                    symbol,
-                    ha="left",
-                    va="bottom",
+                    width=1,
+                    height=height,
                     color=colors[j],
-                    # Best results are obtained with this font size
-                    size=1,
                     **kwargs,
                 )
-                text.set_clip_on(True)
-                set_font_size_in_coord(text, width=1, height=height)
                 start_height += height
     axes.set_xlim(0.5, len(profile.symbols) + 0.5)

biotite/sequence/io/fasta/convert.py CHANGED Viewed

@@ -275,8 +275,7 @@ def _process_nucleotide_sequence(x):
 def _convert_to_string(sequence, as_rna):
     if not isinstance(sequence.get_alphabet(), LetterAlphabet):
         raise ValueError(
-            "Only sequences using single letter alphabets "
-            "can be stored in a FASTA file"
+            "Only sequences using single letter alphabets can be stored in a FASTA file"
         )
     if isinstance(sequence, NucleotideSequence) and as_rna:
         return str(sequence).replace("T", "U")

biotite/sequence/io/fasta/file.py CHANGED Viewed

@@ -102,7 +102,7 @@ class FastaFile(TextFile, MutableMapping):
         if not isinstance(header, str):
             raise IndexError("'FastaFile' only supports header strings as keys")
         if not isinstance(seq_str, str):
-            raise TypeError("'FastaFile' only supports sequence strings " "as values")
+            raise TypeError("'FastaFile' only supports sequence strings as values")
         # Create lines for new header and sequence (with line breaks)
         new_lines = [">" + header.replace("\n", "").strip()] + wrap_string(
             seq_str, width=self._chars_per_line

biotite/sequence/io/fastq/file.py CHANGED Viewed

@@ -302,10 +302,10 @@ class FastqFile(TextFile, MutableMapping):
                 else:  # score_len > seq_len
                     raise InvalidFileError(
                         f"The amount of scores is not equal to the sequence "
-                        f"length for the sequence in line {seq_start_i+1} "
+                        f"length for the sequence in line {seq_start_i + 1} "
                     )
             else:
-                raise InvalidFileError(f"Line {i+1} in FASTQ file is invalid")
+                raise InvalidFileError(f"Line {i + 1} in FASTQ file is invalid")
         # At the end of the file, the last sequence or score block
         # must have properly ended
         if in_sequence or in_scores:
@@ -392,7 +392,7 @@ class FastqFile(TextFile, MutableMapping):
                     yield identifier, ("".join(seq_str_list), scores)
                 else:  # score_len > seq_len
                     raise InvalidFileError(
-                        "The amount of scores is not equal to the sequence " "length"
+                        "The amount of scores is not equal to the sequence length"
                     )
             else:

biotite/sequence/io/genbank/file.py CHANGED Viewed

@@ -80,7 +80,7 @@ class GenBankFile(TextFile):
     >>> print(content)
     ['One line', 'A second line']
     >>> print(subfields)
-    OrderedDict([('SUBFIELD1', ['Single Line']), ('SUBFIELD2', ['Two', 'lines'])])
+    OrderedDict({'SUBFIELD1': ['Single Line'], 'SUBFIELD2': ['Two', 'lines']})
     Adding an additional field:
@@ -391,7 +391,7 @@ class GenBankFile(TextFile):
             The field name.
         content : list of str
             The content lines.
-        subfield_dict : dict of str -> str, optional
+        subfields : dict of str -> str, optional
             The subfields of the field.
             The dictionary maps subfield names to the content lines of
             the respective subfield.
@@ -432,7 +432,7 @@ class GenBankFile(TextFile):
             The field name.
         content : list of str
             The content lines.
-        subfield_dict : dict of str -> str, optional
+        subfields : dict of str -> str, optional
             The subfields of the field.
             The dictionary maps subfield names to the content lines of
             the respective subfield.

biotite/sequence/io/genbank/sequence.py CHANGED Viewed

@@ -82,6 +82,8 @@ def get_annotated_sequence(gb_file, format="gb", include_only=None):
     ----------
     gb_file : GenBankFile
         The GenBank file to read the fields from.
+    format : {'gb', 'gp'}
+        Whether the file is a *GenBank* or *GenPept* file.
     include_only : iterable object of str, optional
         List of names of feature keys, which should included
         in the annotation. By default all features are included.

biotite/sequence/io/gff/convert.py CHANGED Viewed

@@ -84,7 +84,7 @@ def set_annotation(gff_file, annotation, seqid=None, source=None, is_stranded=Tr
     for feature in sorted(annotation):
         if len(feature.locs) > 1 and "ID" not in feature.qual:
             raise ValueError(
-                "The 'Id' qualifier is required " "for features with multiple locations"
+                "The 'Id' qualifier is required for features with multiple locations"
             )
         ## seqid ##
         if seqid is not None and " " in seqid:

biotite/sequence/io/gff/file.py CHANGED Viewed

@@ -303,8 +303,7 @@ class GFFFile(TextFile):
     def __getitem__(self, index):
         if (index >= 0 and index >= len(self)) or (index < 0 and -index > len(self)):
             raise IndexError(
-                f"Index {index} is out of range for GFFFile with "
-                f"{len(self)} entries"
+                f"Index {index} is out of range for GFFFile with {len(self)} entries"
             )
         line_index = self._entries[index]

biotite/sequence/phylo/nj.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/phylo/tree.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/phylo/upgma.cp312-win_amd64.pyd CHANGED Viewed

Binary file

biotite/sequence/profile.py CHANGED Viewed

@@ -95,7 +95,7 @@ class SequenceProfile(object):
     gaps : ndarray, dtype=int, shape=n
         Array which indicates the number of gaps at each position.
     alphabet : Alphabet, length=k
-        Alphabet of sequences of sequence profile
+        Alphabet of sequences of sequence profile.
     Attributes
     ----------
@@ -264,15 +264,14 @@ class SequenceProfile(object):
         alphabet : bool
             This alphabet will be used when creating the SequenceProfile
             object. If no alphabet is selected, the alphabet for this
-            SequenceProfile
+            :class:`SequenceProfile`.
             object will be calculated from the sequences of object
             Alignment.
-            (Default: None).
         Returns
         -------
         profile: SequenceProfile
-            The created SequenceProfile object
+            The created :class:`SequenceProfile` object.
         """
         sequences = get_codes(alignment)
         if alphabet is None:
@@ -306,13 +305,12 @@ class SequenceProfile(object):
             If true, returns consensus sequence as GeneralSequence
             object.
             Otherwise, the consensus sequence object type is chosen
-            based on the alphabet of this SequenceProfile object
-            (Default: False).
+            based on the alphabet of this SequenceProfile object.
         Returns
         -------
         consensus: Sequence
-            The calculated consensus sequence
+            The calculated consensus sequence.
         """
         # https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry#Amino_acid_and_nucleotide_base_codes
         if as_general:
@@ -420,14 +418,13 @@ class SequenceProfile(object):
         Parameters
         ----------
-        pseudocount: int, optional
+        pseudocount : int, optional
             Amount added to the number of observed cases in order to
             change the expected probability of the PPM.
-            (Default: 0)
         Returns
         -------
-        ppm: ndarray, dtype=float, shape=(n,k)
+        ppm : ndarray, dtype=float, shape=(n,k)
             The calculated the position probability matrix.
         """
         if pseudocount < 0:
@@ -456,17 +453,16 @@ class SequenceProfile(object):
         Parameters
         ----------
-        pseudocount: int, optional
-            Amount added to the number of observed cases in order to change
-            the expected probability of the PPM.
-            (Default: 0)
-        background_frequencies: ndarray, shape=(k,), dtype=float, optional
+        background_frequencies : ndarray, shape=(k,), dtype=float, optional
             The background frequencies for each symbol in the alphabet.
             By default, a uniform distribution is assumed.
+        pseudocount : int, optional
+            Amount added to the number of observed cases in order to change
+            the expected probability of the PPM.
         Returns
         -------
-        pwm: ndarray, dtype=float, shape=(n,k)
+        pwm : ndarray, dtype=float, shape=(n,k)
             The calculated the position weight matrix.
         """
         if background_frequencies is None:
@@ -490,14 +486,13 @@ class SequenceProfile(object):
         ----------
         sequence : Sequence
            The input sequence.
-        pseudocount: int, optional
+        pseudocount : int, optional
             Amount added to the number of observed cases in order to change
             the expected probability of the PPM.
-            (Default: 0)
         Returns
         -------
-        probability: float
+        probability : float
            The calculated probability for the input sequence based on
            the PPM.
         """
@@ -526,17 +521,16 @@ class SequenceProfile(object):
         ----------
         sequence : Sequence
            The input sequence.
-        pseudocount: int, optional
-            Amount added to the number of observed cases in order to change
-            the expected probability of the PPM.
-            (Default: 0)
-        background_frequencies: ndarray, shape=(k,), dtype=float, optional
+        background_frequencies : ndarray, shape=(k,), dtype=float, optional
             The background frequencies for each symbol in the alphabet.
             By default a uniform distribution is assumed.
+        pseudocount : int, optional
+            Amount added to the number of observed cases in order to change
+            the expected probability of the PPM.
         Returns
         -------
-        score: float
+        score : float
            The calculated score for the input sequence based on
            the PWM.
         """

biotite/sequence/search.py CHANGED Viewed

@@ -39,7 +39,6 @@ def find_subsequence(sequence, query):
     >>> sub_seq = NucleotideSequence("TGA")
     >>> print(find_subsequence(main_seq, sub_seq))
     [2 6]
     """
     if not sequence.get_alphabet().extends(query.get_alphabet()):
         raise ValueError("The sequences alphabets are not equal")

biotite/sequence/seqtypes.py CHANGED Viewed

@@ -200,7 +200,6 @@ class NucleotideSequence(Sequence):
         TGCGAA
         >>> print(dna_seq.reverse().complement())
         AAGCGT
         """
         # Interpreting the sequence code of this object in the
         # complementary alphabet gives the complementary symbols
@@ -226,7 +225,7 @@ class NucleotideSequence(Sequence):
         complete : bool, optional
             If true, the complete sequence is translated. In this case
             the sequence length must be a multiple of 3.
-            Otherwise all ORFs are translated. (Default: False)
+            Otherwise all ORFs are translated.
         codon_table : CodonTable, optional
             The codon table to be used. By default the default table
             will be used
@@ -236,7 +235,6 @@ class NucleotideSequence(Sequence):
             even if the start codon codes for another amino acid.
             Otherwise the translation starts with the amino acid
             the codon codes for. Only applies, if `complete` is false.
-            (Default: False)
         Returns
         -------
@@ -266,7 +264,6 @@ class NucleotideSequence(Sequence):
         ...    print(seq)
         MML*
         ML*
         """
         if self._alphabet != NucleotideSequence.alphabet_unamb:
             raise AlphabetError("Translation requires unambiguous alphabet")
@@ -586,6 +583,11 @@ class ProteinSequence(Sequence):
         in the protein and the average isotopic mass of one water
         molecule.
+        Parameters
+        ----------
+        monoisotopic : bool
+            Use the mass of the most common isotope.
         Returns
         -------
         weight : float
@@ -599,7 +601,7 @@ class ProteinSequence(Sequence):
         if np.isnan(weight):
             raise ValueError(
-                "Sequence contains ambiguous amino acids, " "cannot calculate weight"
+                "Sequence contains ambiguous amino acids, cannot calculate weight"
             )
         return weight
@@ -700,6 +702,11 @@ class PurePositionalSequence(Sequence):
     This class is similar to :class:`PositionalSequence`, but the symbols are not
     derived from an original sequence, but are the pure position.
     Hence, there is no meaningful string representation of the sequence and its symbols.
+    Parameters
+    ----------
+    length : int
+        The length of the sequence.
     """
     def __init__(self, length):

biotite/sequence/sequence.py CHANGED Viewed

@@ -139,7 +139,6 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
     >>> dna_seq_concat = dna_seq + dna_seq_rev
     >>> print(dna_seq_concat)
     ACGTAATGCA
     """
     def __init__(self, sequence=()):
@@ -354,7 +353,7 @@ class Sequence(Copyable, metaclass=abc.ABCMeta):
         Parameters
         ----------
-        alpahabet_size : int
+        alphabet_size : int
             The size of the alphabet.
         Returns

biotite/structure/__init__.py CHANGED Viewed

@@ -125,9 +125,11 @@ from .pseudoknots import *
 from .rdf import *
 from .repair import *
 from .residues import *
+from .rings import *
 from .sasa import *
 from .sequence import *
 from .sse import *
 from .superimpose import *
+from .tm import *
 from .transform import *
 # util and segments are used internally

biotite/structure/alphabet/i3d.py CHANGED Viewed

@@ -31,7 +31,7 @@ class I3DSequence(Sequence):
         May take upper or lower case letters.
         By default the sequence is empty.
-    See also
+    See Also
     --------
     to_3di : Create 3Di sequences from a structure.
@@ -39,7 +39,6 @@ class I3DSequence(Sequence):
     ----------
     .. footbibliography::
     """
     alphabet = LetterAlphabet("acdefghiklmnpqrstvwy")

biotite/structure/alphabet/pb.py CHANGED Viewed

@@ -52,7 +52,7 @@ class ProteinBlocksSequence(Sequence):
         May take upper or lower case letters.
         By default the sequence is empty.
-    See also
+    See Also
     --------
     to_protein_blocks : Create *Protein Blocks* sequences from a structure.
@@ -60,7 +60,6 @@ class ProteinBlocksSequence(Sequence):
     ----------
     .. footbibliography::
     """
     alphabet = LetterAlphabet("abcdefghijklmnopz")

biotite/structure/alphabet/unkerasify.py CHANGED Viewed

@@ -41,7 +41,13 @@ class ActivationType(enum.IntEnum):
 class KerasifyParser:
-    """An incomplete parser for model files serialized with `kerasify`.
+    """
+    An incomplete parser for model files serialized with `kerasify`.
+    Parameters
+    ----------
+    file : file-like
+        The ``.kerasify`` file to parse.
     Notes
     -----
@@ -65,7 +71,7 @@ class KerasifyParser:
             (w1,) = self._get("I")
             (b0,) = self._get("I")
             weights = (
-                np.frombuffer(self._read(f"={w0*w1}f"), dtype="f4")
+                np.frombuffer(self._read(f"={w0 * w1}f"), dtype="f4")
                 .reshape(w0, w1)
                 .copy()
             )

biotite/structure/atoms.py CHANGED Viewed

@@ -35,6 +35,11 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
     :class:`AtomArrayStack`.
     It implements functionality for annotation arrays and also
     rudimentarily for coordinates.
+    Parameters
+    ----------
+    length : int
+        The amount of atoms in the structure.
     """
     def __init__(self, length):
@@ -96,11 +101,11 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
             The annotation category to be added.
         dtype : type or str
             A type instance or a valid *NumPy* *dtype* string.
-            Defines the type of the annotation
+            Defines the type of the annotation.
         See Also
         --------
-        set_annotation
+        set_annotation : Assign directly a value to an annotation.
         Notes
         -----
@@ -171,7 +176,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
         array = np.asarray(array)
         if len(array) != self._array_length:
             raise IndexError(
-                f"Expected array length {self._array_length}, " f"but got {len(array)}"
+                f"Expected array length {self._array_length}, but got {len(array)}"
             )
         if category in self._annot:
             # If the annotation already exists, find the compatible dtype
@@ -244,7 +249,7 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
         ----------
         item : AtomArray or AtomArrayStack
             The object to compare the annotation arrays with.
-        equal_nan: bool
+        equal_nan : bool
             Whether to count `nan` values as equal. Default: True.
         Returns
@@ -323,17 +328,16 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
             if isinstance(self, AtomArray):
                 if value.ndim != 2:
                     raise ValueError(
-                        "A 2-dimensional ndarray is expected " "for an AtomArray"
+                        "A 2-dimensional ndarray is expected for an AtomArray"
                     )
             elif isinstance(self, AtomArrayStack):
                 if value.ndim != 3:
                     raise ValueError(
-                        "A 3-dimensional ndarray is expected " "for an AtomArrayStack"
+                        "A 3-dimensional ndarray is expected for an AtomArrayStack"
                     )
             if value.shape[-2] != self._array_length:
                 raise ValueError(
-                    f"Expected array length {self._array_length}, "
-                    f"but got {len(value)}"
+                    f"Expected array length {self._array_length}, but got {len(value)}"
                 )
             if value.shape[-1] != 3:
                 raise TypeError("Expected 3 coordinates for each atom")
@@ -358,13 +362,12 @@ class _AtomArrayBase(Copyable, metaclass=abc.ABCMeta):
                 if isinstance(self, AtomArray):
                     if value.ndim != 2:
                         raise ValueError(
-                            "A 2-dimensional ndarray is expected " "for an AtomArray"
+                            "A 2-dimensional ndarray is expected for an AtomArray"
                         )
                 else:  # AtomArrayStack
                     if value.ndim != 3:
                         raise ValueError(
-                            "A 3-dimensional ndarray is expected "
-                            "for an AtomArrayStack"
+                            "A 3-dimensional ndarray is expected for an AtomArrayStack"
                         )
                 if value.shape[-2:] != (3, 3):
                     raise TypeError("Box must be a 3x3 matrix (three vectors)")
@@ -448,9 +451,9 @@ class Atom(Copyable):
     Parameters
     ----------
-    coord: list or ndarray
+    coord : list or ndarray
         The x, y and z coordinates.
-    kwargs
+    **kwargs
         Atom annotations as key value pair.
     Attributes
@@ -472,7 +475,6 @@ class Atom(Copyable):
     CA
     >>> print(atom.coord)
     [1. 2. 3.]
     """
     def __init__(self, coord, **kwargs):
@@ -632,6 +634,10 @@ class AtomArray(_AtomArrayBase):
         The single value in the tuple is
         the length of the atom array.
+    See Also
+    --------
+    AtomArrayStack : Representation of multiple structure models.
     Examples
     --------
     Creating an atom array from atoms:
@@ -700,10 +706,6 @@ class AtomArray(_AtomArrayBase):
             Shape of the array.
             The single value in the tuple is
             the :func:`array_length()`.
-        See Also
-        --------
-        array_length
         """
         return (self.array_length(),)
@@ -895,9 +897,9 @@ class AtomArrayStack(_AtomArrayBase):
         The numbers correspond to the stack depth
         and array length, respectively.
-    See also
+    See Also
     --------
-    AtomArray
+    AtomArray : Representation of a single structure model.
     Examples
     --------
@@ -1195,9 +1197,18 @@ def array(atoms):
                 f"annotation categories as the atom at index 0"
             )
     array = AtomArray(len(atoms))
     # Add all (also optional) annotation categories
     for name in names:
-        array.add_annotation(name, dtype=type(atoms[0]._annot[name]))
+        value = atoms[0]._annot[name]
+        if isinstance(value, str):
+            # Find maximum string length across all atoms for this annotation
+            max_len = max(len(str(atom._annot[name])) for atom in atoms)
+            dtype = f"<U{max_len}"
+        else:
+            dtype = type(value)
+        array.add_annotation(name, dtype=dtype)
     # Add all atoms to AtomArray
     for i in range(len(atoms)):
         for name in names:
@@ -1443,8 +1454,7 @@ def repeat(atoms, coord):
     if isinstance(atoms, AtomArray):
         if coord.ndim != 3:
             raise ValueError(
-                f"Expected 3 dimensions for the coordinate array, "
-                f"but got {coord.ndim}"
+                f"Expected 3 dimensions for the coordinate array, but got {coord.ndim}"
             )
         repeated = AtomArray(new_length)
         repeated.coord = coord.reshape((new_length, 3))
@@ -1452,16 +1462,14 @@ def repeat(atoms, coord):
     elif isinstance(atoms, AtomArrayStack):
         if coord.ndim != 4:
             raise ValueError(
-                f"Expected 4 dimensions for the coordinate array, "
-                f"but got {coord.ndim}"
+                f"Expected 4 dimensions for the coordinate array, but got {coord.ndim}"
             )
         repeated = AtomArrayStack(atoms.stack_depth(), new_length)
         repeated.coord = coord.reshape((atoms.stack_depth(), new_length, 3))
     else:
         raise TypeError(
-            f"Expected 'AtomArray' or 'AtomArrayStack', "
-            f"but got {type(atoms).__name__}"
+            f"Expected 'AtomArray' or 'AtomArrayStack', but got {type(atoms).__name__}"
         )
     for category in atoms.get_annotation_categories():