PyPI - biotite - Versions diffs - 0.41.1__cp310-cp310-macosx_10_16_arm64.whl - Mend

biotite 0.41.1__cp310-cp310-macosx_10_16_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biotite might be problematic. Click here for more details.

Files changed (340) hide show

biotite/__init__.py +19 -0
biotite/application/__init__.py +43 -0
biotite/application/application.py +265 -0
biotite/application/autodock/__init__.py +12 -0
biotite/application/autodock/app.py +505 -0
biotite/application/blast/__init__.py +14 -0
biotite/application/blast/alignment.py +83 -0
biotite/application/blast/webapp.py +421 -0
biotite/application/clustalo/__init__.py +12 -0
biotite/application/clustalo/app.py +238 -0
biotite/application/dssp/__init__.py +12 -0
biotite/application/dssp/app.py +152 -0
biotite/application/localapp.py +306 -0
biotite/application/mafft/__init__.py +12 -0
biotite/application/mafft/app.py +122 -0
biotite/application/msaapp.py +374 -0
biotite/application/muscle/__init__.py +13 -0
biotite/application/muscle/app3.py +254 -0
biotite/application/muscle/app5.py +171 -0
biotite/application/sra/__init__.py +18 -0
biotite/application/sra/app.py +456 -0
biotite/application/tantan/__init__.py +12 -0
biotite/application/tantan/app.py +222 -0
biotite/application/util.py +59 -0
biotite/application/viennarna/__init__.py +18 -0
biotite/application/viennarna/rnaalifold.py +304 -0
biotite/application/viennarna/rnafold.py +269 -0
biotite/application/viennarna/rnaplot.py +187 -0
biotite/application/viennarna/util.py +72 -0
biotite/application/webapp.py +77 -0
biotite/copyable.py +71 -0
biotite/database/__init__.py +23 -0
biotite/database/entrez/__init__.py +15 -0
biotite/database/entrez/check.py +61 -0
biotite/database/entrez/dbnames.py +89 -0
biotite/database/entrez/download.py +223 -0
biotite/database/entrez/key.py +44 -0
biotite/database/entrez/query.py +223 -0
biotite/database/error.py +15 -0
biotite/database/pubchem/__init__.py +21 -0
biotite/database/pubchem/download.py +260 -0
biotite/database/pubchem/error.py +20 -0
biotite/database/pubchem/query.py +827 -0
biotite/database/pubchem/throttle.py +99 -0
biotite/database/rcsb/__init__.py +13 -0
biotite/database/rcsb/download.py +167 -0
biotite/database/rcsb/query.py +959 -0
biotite/database/uniprot/__init__.py +13 -0
biotite/database/uniprot/check.py +32 -0
biotite/database/uniprot/download.py +134 -0
biotite/database/uniprot/query.py +209 -0
biotite/file.py +251 -0
biotite/sequence/__init__.py +73 -0
biotite/sequence/align/__init__.py +49 -0
biotite/sequence/align/alignment.py +658 -0
biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
biotite/sequence/align/banded.pyx +652 -0
biotite/sequence/align/buckets.py +69 -0
biotite/sequence/align/cigar.py +434 -0
biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
biotite/sequence/align/kmeralphabet.pyx +574 -0
biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
biotite/sequence/align/kmersimilarity.pyx +233 -0
biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
biotite/sequence/align/kmertable.pyx +3400 -0
biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
biotite/sequence/align/localgapped.pyx +892 -0
biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
biotite/sequence/align/localungapped.pyx +279 -0
biotite/sequence/align/matrix.py +405 -0
biotite/sequence/align/matrix_data/BLOSUM100.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM30.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM35.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM40.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM45.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM50_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM50_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM55.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM60.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM62_13p.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_14.3.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM62_5.0.mat +25 -0
biotite/sequence/align/matrix_data/BLOSUM65.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM70.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM75.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM80.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM85.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUM90.mat +31 -0
biotite/sequence/align/matrix_data/BLOSUMN.mat +31 -0
biotite/sequence/align/matrix_data/CorBLOSUM49_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM57_14.3.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM61_5.0.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM66_13p.mat +25 -0
biotite/sequence/align/matrix_data/CorBLOSUM67_14.3.mat +25 -0
biotite/sequence/align/matrix_data/DAYHOFF.mat +32 -0
biotite/sequence/align/matrix_data/GONNET.mat +26 -0
biotite/sequence/align/matrix_data/IDENTITY.mat +25 -0
biotite/sequence/align/matrix_data/MATCH.mat +25 -0
biotite/sequence/align/matrix_data/NUC.mat +25 -0
biotite/sequence/align/matrix_data/PAM10.mat +34 -0
biotite/sequence/align/matrix_data/PAM100.mat +34 -0
biotite/sequence/align/matrix_data/PAM110.mat +34 -0
biotite/sequence/align/matrix_data/PAM120.mat +34 -0
biotite/sequence/align/matrix_data/PAM130.mat +34 -0
biotite/sequence/align/matrix_data/PAM140.mat +34 -0
biotite/sequence/align/matrix_data/PAM150.mat +34 -0
biotite/sequence/align/matrix_data/PAM160.mat +34 -0
biotite/sequence/align/matrix_data/PAM170.mat +34 -0
biotite/sequence/align/matrix_data/PAM180.mat +34 -0
biotite/sequence/align/matrix_data/PAM190.mat +34 -0
biotite/sequence/align/matrix_data/PAM20.mat +34 -0
biotite/sequence/align/matrix_data/PAM200.mat +34 -0
biotite/sequence/align/matrix_data/PAM210.mat +34 -0
biotite/sequence/align/matrix_data/PAM220.mat +34 -0
biotite/sequence/align/matrix_data/PAM230.mat +34 -0
biotite/sequence/align/matrix_data/PAM240.mat +34 -0
biotite/sequence/align/matrix_data/PAM250.mat +34 -0
biotite/sequence/align/matrix_data/PAM260.mat +34 -0
biotite/sequence/align/matrix_data/PAM270.mat +34 -0
biotite/sequence/align/matrix_data/PAM280.mat +34 -0
biotite/sequence/align/matrix_data/PAM290.mat +34 -0
biotite/sequence/align/matrix_data/PAM30.mat +34 -0
biotite/sequence/align/matrix_data/PAM300.mat +34 -0
biotite/sequence/align/matrix_data/PAM310.mat +34 -0
biotite/sequence/align/matrix_data/PAM320.mat +34 -0
biotite/sequence/align/matrix_data/PAM330.mat +34 -0
biotite/sequence/align/matrix_data/PAM340.mat +34 -0
biotite/sequence/align/matrix_data/PAM350.mat +34 -0
biotite/sequence/align/matrix_data/PAM360.mat +34 -0
biotite/sequence/align/matrix_data/PAM370.mat +34 -0
biotite/sequence/align/matrix_data/PAM380.mat +34 -0
biotite/sequence/align/matrix_data/PAM390.mat +34 -0
biotite/sequence/align/matrix_data/PAM40.mat +34 -0
biotite/sequence/align/matrix_data/PAM400.mat +34 -0
biotite/sequence/align/matrix_data/PAM410.mat +34 -0
biotite/sequence/align/matrix_data/PAM420.mat +34 -0
biotite/sequence/align/matrix_data/PAM430.mat +34 -0
biotite/sequence/align/matrix_data/PAM440.mat +34 -0
biotite/sequence/align/matrix_data/PAM450.mat +34 -0
biotite/sequence/align/matrix_data/PAM460.mat +34 -0
biotite/sequence/align/matrix_data/PAM470.mat +34 -0
biotite/sequence/align/matrix_data/PAM480.mat +34 -0
biotite/sequence/align/matrix_data/PAM490.mat +34 -0
biotite/sequence/align/matrix_data/PAM50.mat +34 -0
biotite/sequence/align/matrix_data/PAM500.mat +34 -0
biotite/sequence/align/matrix_data/PAM60.mat +34 -0
biotite/sequence/align/matrix_data/PAM70.mat +34 -0
biotite/sequence/align/matrix_data/PAM80.mat +34 -0
biotite/sequence/align/matrix_data/PAM90.mat +34 -0
biotite/sequence/align/matrix_data/RBLOSUM52_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM59_14.3.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM64_5.0.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_13p.mat +25 -0
biotite/sequence/align/matrix_data/RBLOSUM69_14.3.mat +25 -0
biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
biotite/sequence/align/multiple.pyx +620 -0
biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
biotite/sequence/align/pairwise.pyx +587 -0
biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
biotite/sequence/align/permutation.pyx +305 -0
biotite/sequence/align/primes.txt +821 -0
biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
biotite/sequence/align/selector.pyx +956 -0
biotite/sequence/align/statistics.py +265 -0
biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
biotite/sequence/align/tracetable.pxd +64 -0
biotite/sequence/align/tracetable.pyx +370 -0
biotite/sequence/alphabet.py +566 -0
biotite/sequence/annotation.py +829 -0
biotite/sequence/codec.cpython-310-darwin.so +0 -0
biotite/sequence/codec.pyx +155 -0
biotite/sequence/codon.py +466 -0
biotite/sequence/codon_tables.txt +202 -0
biotite/sequence/graphics/__init__.py +33 -0
biotite/sequence/graphics/alignment.py +1034 -0
biotite/sequence/graphics/color_schemes/autumn.json +51 -0
biotite/sequence/graphics/color_schemes/blossom.json +51 -0
biotite/sequence/graphics/color_schemes/clustalx_dna.json +11 -0
biotite/sequence/graphics/color_schemes/clustalx_protein.json +28 -0
biotite/sequence/graphics/color_schemes/flower.json +51 -0
biotite/sequence/graphics/color_schemes/jalview_buried.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_hydrophobicity.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_helix.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_strand.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_prop_turn.json +31 -0
biotite/sequence/graphics/color_schemes/jalview_taylor.json +28 -0
biotite/sequence/graphics/color_schemes/jalview_zappo.json +28 -0
biotite/sequence/graphics/color_schemes/ocean.json +51 -0
biotite/sequence/graphics/color_schemes/pb_flower.json +39 -0
biotite/sequence/graphics/color_schemes/rainbow_dna.json +11 -0
biotite/sequence/graphics/color_schemes/rainbow_protein.json +30 -0
biotite/sequence/graphics/color_schemes/spring.json +51 -0
biotite/sequence/graphics/color_schemes/sunset.json +51 -0
biotite/sequence/graphics/color_schemes/wither.json +51 -0
biotite/sequence/graphics/colorschemes.py +139 -0
biotite/sequence/graphics/dendrogram.py +184 -0
biotite/sequence/graphics/features.py +510 -0
biotite/sequence/graphics/logo.py +110 -0
biotite/sequence/graphics/plasmid.py +661 -0
biotite/sequence/io/__init__.py +12 -0
biotite/sequence/io/fasta/__init__.py +22 -0
biotite/sequence/io/fasta/convert.py +273 -0
biotite/sequence/io/fasta/file.py +278 -0
biotite/sequence/io/fastq/__init__.py +19 -0
biotite/sequence/io/fastq/convert.py +120 -0
biotite/sequence/io/fastq/file.py +551 -0
biotite/sequence/io/genbank/__init__.py +17 -0
biotite/sequence/io/genbank/annotation.py +277 -0
biotite/sequence/io/genbank/file.py +575 -0
biotite/sequence/io/genbank/metadata.py +324 -0
biotite/sequence/io/genbank/sequence.py +172 -0
biotite/sequence/io/general.py +192 -0
biotite/sequence/io/gff/__init__.py +26 -0
biotite/sequence/io/gff/convert.py +133 -0
biotite/sequence/io/gff/file.py +434 -0
biotite/sequence/phylo/__init__.py +36 -0
biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
biotite/sequence/phylo/nj.pyx +221 -0
biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
biotite/sequence/phylo/tree.pyx +1169 -0
biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
biotite/sequence/phylo/upgma.pyx +164 -0
biotite/sequence/profile.py +456 -0
biotite/sequence/search.py +116 -0
biotite/sequence/seqtypes.py +556 -0
biotite/sequence/sequence.py +374 -0
biotite/structure/__init__.py +132 -0
biotite/structure/atoms.py +1455 -0
biotite/structure/basepairs.py +1415 -0
biotite/structure/bonds.cpython-310-darwin.so +0 -0
biotite/structure/bonds.pyx +1933 -0
biotite/structure/box.py +592 -0
biotite/structure/celllist.cpython-310-darwin.so +0 -0
biotite/structure/celllist.pyx +849 -0
biotite/structure/chains.py +298 -0
biotite/structure/charges.cpython-310-darwin.so +0 -0
biotite/structure/charges.pyx +520 -0
biotite/structure/compare.py +274 -0
biotite/structure/density.py +114 -0
biotite/structure/dotbracket.py +216 -0
biotite/structure/error.py +31 -0
biotite/structure/filter.py +585 -0
biotite/structure/geometry.py +697 -0
biotite/structure/graphics/__init__.py +13 -0
biotite/structure/graphics/atoms.py +226 -0
biotite/structure/graphics/rna.py +282 -0
biotite/structure/hbond.py +409 -0
biotite/structure/info/__init__.py +25 -0
biotite/structure/info/atom_masses.json +121 -0
biotite/structure/info/atoms.py +82 -0
biotite/structure/info/bonds.py +145 -0
biotite/structure/info/ccd/README.rst +8 -0
biotite/structure/info/ccd/amino_acids.txt +1663 -0
biotite/structure/info/ccd/carbohydrates.txt +1135 -0
biotite/structure/info/ccd/components.bcif +0 -0
biotite/structure/info/ccd/nucleotides.txt +798 -0
biotite/structure/info/ccd.py +95 -0
biotite/structure/info/groups.py +90 -0
biotite/structure/info/masses.py +123 -0
biotite/structure/info/misc.py +144 -0
biotite/structure/info/radii.py +197 -0
biotite/structure/info/standardize.py +196 -0
biotite/structure/integrity.py +268 -0
biotite/structure/io/__init__.py +30 -0
biotite/structure/io/ctab.py +72 -0
biotite/structure/io/dcd/__init__.py +13 -0
biotite/structure/io/dcd/file.py +65 -0
biotite/structure/io/general.py +257 -0
biotite/structure/io/gro/__init__.py +14 -0
biotite/structure/io/gro/file.py +343 -0
biotite/structure/io/mmtf/__init__.py +21 -0
biotite/structure/io/mmtf/assembly.py +214 -0
biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/convertarray.pyx +341 -0
biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/convertfile.pyx +501 -0
biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/decode.pyx +152 -0
biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
biotite/structure/io/mmtf/encode.pyx +183 -0
biotite/structure/io/mmtf/file.py +233 -0
biotite/structure/io/mol/__init__.py +20 -0
biotite/structure/io/mol/convert.py +115 -0
biotite/structure/io/mol/ctab.py +414 -0
biotite/structure/io/mol/header.py +116 -0
biotite/structure/io/mol/mol.py +193 -0
biotite/structure/io/mol/sdf.py +916 -0
biotite/structure/io/netcdf/__init__.py +13 -0
biotite/structure/io/netcdf/file.py +63 -0
biotite/structure/io/npz/__init__.py +20 -0
biotite/structure/io/npz/file.py +152 -0
biotite/structure/io/pdb/__init__.py +20 -0
biotite/structure/io/pdb/convert.py +293 -0
biotite/structure/io/pdb/file.py +1240 -0
biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
biotite/structure/io/pdb/hybrid36.pyx +242 -0
biotite/structure/io/pdbqt/__init__.py +15 -0
biotite/structure/io/pdbqt/convert.py +107 -0
biotite/structure/io/pdbqt/file.py +640 -0
biotite/structure/io/pdbx/__init__.py +23 -0
biotite/structure/io/pdbx/bcif.py +648 -0
biotite/structure/io/pdbx/cif.py +1032 -0
biotite/structure/io/pdbx/component.py +246 -0
biotite/structure/io/pdbx/convert.py +1597 -0
biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
biotite/structure/io/pdbx/encoding.pyx +950 -0
biotite/structure/io/pdbx/legacy.py +267 -0
biotite/structure/io/tng/__init__.py +13 -0
biotite/structure/io/tng/file.py +46 -0
biotite/structure/io/trajfile.py +710 -0
biotite/structure/io/trr/__init__.py +13 -0
biotite/structure/io/trr/file.py +46 -0
biotite/structure/io/xtc/__init__.py +13 -0
biotite/structure/io/xtc/file.py +46 -0
biotite/structure/mechanics.py +75 -0
biotite/structure/molecules.py +353 -0
biotite/structure/pseudoknots.py +642 -0
biotite/structure/rdf.py +243 -0
biotite/structure/repair.py +253 -0
biotite/structure/residues.py +562 -0
biotite/structure/resutil.py +178 -0
biotite/structure/sasa.cpython-310-darwin.so +0 -0
biotite/structure/sasa.pyx +322 -0
biotite/structure/sequence.py +112 -0
biotite/structure/sse.py +327 -0
biotite/structure/superimpose.py +727 -0
biotite/structure/transform.py +504 -0
biotite/structure/util.py +98 -0
biotite/temp.py +86 -0
biotite/version.py +16 -0
biotite/visualize.py +251 -0
biotite-0.41.1.dist-info/METADATA +187 -0
biotite-0.41.1.dist-info/RECORD +340 -0
biotite-0.41.1.dist-info/WHEEL +4 -0
biotite-0.41.1.dist-info/licenses/LICENSE.rst +30 -0

biotite/sequence/io/gff/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+"""
+This subpackage is used for reading and writing sequence features in the
+*Generic Feature Format 3* (GFF3).
+It provides the :class:`GFFFile` class, a low-level line-based
+interface to this format, and high-level functions for extracting
+:class:`Annotation` objects.
+.. note: This package cannot create hierarchical data structures from
+   GFF 3 files. This means, that you cannot directly access the the
+   parent or child of a feature.
+   However, the ``Id`` and ``Name`` attributes are stored in the
+   qualifiers of the created :class:`Feature` objects.
+   Hence, it is possible to implement such a data structure from this
+   information.
+"""
+__name__ = "biotite.sequence.io.gff"
+__author__ = "Patrick Kunzmann"
+from .file import *
+from .convert import *

biotite/sequence/io/gff/convert.py ADDED Viewed

@@ -0,0 +1,133 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.sequence.io.gff"
+__author__ = "Patrick Kunzmann"
+__all__ = ["get_annotation", "set_annotation"]
+from ...annotation import Location, Feature, Annotation
+def get_annotation(gff_file):
+    """
+    Parse a GFF3 file into an :class:`Annotation`.
+    The *type* column is used as the :attr:`Feature.key` attribute,
+    the locations (``loc``) are taken from the *start*, *end* and
+    *strand* columns and  the *attributes* column is parsed into the
+    :attr:`Feature.qual` attribute.
+    Multiple entries with the same ``ID`` attribute are interpreted
+    as the same feature.
+    Thus, for entries with the same ``ID``, the *type* and *attributes*
+    are only parsed once and the locations are aggregated from each
+    entry.
+    Parameters
+    ----------
+    gff_file : GFFFile
+        The file tro extract the :class:`Annotation` object from.
+    Returns
+    -------
+    annotation : Annotation
+        The extracted annotation.
+    """
+    annot = Annotation()
+    current_key = None
+    current_locs = None
+    current_qual = None
+    current_id = None
+    for _, _, type, start, end, _, strand, _, attrib in gff_file:
+        id = attrib.get("ID")
+        if id != current_id or id is None:
+            # current_key is None, when there is no previous feature
+            # (beginning of the file)
+            if current_key is not None:
+                # Beginning of new feature -> Save previous feature
+                annot.add_feature(
+                    Feature(current_key, current_locs, current_qual)
+                )
+            # Track new feature
+            current_key = type
+            current_locs = [Location(start, end, strand)]
+            current_qual = attrib
+        else:
+            current_locs.append(Location(start, end, strand))
+        current_id = id
+    # Save last feature
+    if current_key is not None:
+        annot.add_feature(Feature(current_key, current_locs, current_qual))
+    return annot
+def set_annotation(gff_file, annotation,
+                   seqid=None, source=None, is_stranded=True):
+    """
+    Write an :class:`Annotation` object into a GFF3 file.
+    Each feature will get one entry for each location it has.
+    :class:`Feature` objects with multiple locations require the ``ID``
+    qualifier in its :attr:`Feature.qual` attribute.
+    Parameters
+    ----------
+    gff_file : GFFFile
+        The GFF3 file to write into.
+    annotation : Annotation
+        The annoation which is written to the GFF3 file.
+    seqid : str, optional
+        The content for the *seqid* column.
+    source : str, optional
+        The content for the *source* column.
+    is_stranded : bool, optional
+        If true, the strand of each feature is taken into account.
+        Otherwise the *strand* column is filled with '``.``'.
+    """
+    for feature in sorted(annotation):
+        if len(feature.locs) > 1 and "ID" not in feature.qual:
+            raise ValueError(
+                "The 'Id' qualifier is required "
+                "for features with multiple locations"
+            )
+        ## seqid ##
+        if seqid is not None and " " in seqid:
+            raise ValueError("The 'seqid' must not contain whitespaces")
+        ## source ##
+        #Nothing to be done
+        ## type ##
+        type = feature.key
+        ## strand ##
+        # Expect same strandedness for all locations
+        strand = list(feature.locs)[0].strand if is_stranded else None
+        ## score ##
+        score = None
+        ## attributes ##
+        attributes = feature.qual
+        # The previous properties are shared by all entries
+        # for this feature
+        # The following loop handles properties that change with each
+        # location
+        reverse_order = True if strand == Location.Strand.REVERSE else False
+        next_phase = 0
+        for loc in sorted(
+            feature.locs, key=lambda loc: loc.first, reverse=reverse_order
+        ):
+            ## start ##
+            start = loc.first
+            ## end ##
+            end = loc.last
+            ## strand ##
+            strand = loc.strand if is_stranded else None
+            ## phase ##
+            if type == "CDS":
+                phase = next_phase
+                # Subtract the length of the location
+                next_phase -= loc.last - loc.first + 1
+                next_phase %= 3
+            else:
+                phase = None
+            gff_file.append(
+                seqid, source, type, start, end,
+                score, strand, phase, attributes
+            )

biotite/sequence/io/gff/file.py ADDED Viewed

@@ -0,0 +1,434 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+__name__ = "biotite.sequence.io.gff"
+__author__ = "Patrick Kunzmann"
+__all__ = ["GFFFile"]
+import copy
+import string
+from urllib.parse import quote, unquote
+import warnings
+from ....file import TextFile, InvalidFileError
+from ...annotation import Location
+# All punctuation characters except
+# percent, semicolon, equals, ampersand, comma
+_NOT_QUOTED = "".join(
+    [char for char in string.punctuation if char not in "%;=&,"]
+) + " "
+class GFFFile(TextFile):
+    """
+    This class represents a file in *Generic Feature Format 3*
+    (`GFF3 <https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md>`_)
+    format.
+    Similar to GenBank files, GFF3 files contain information about
+    features of a reference sequence, but in a more concise and better
+    parsable way.
+    However, it does not provide additional meta information.
+    This class serves as low-level API for accessing GFF3 files.
+    It is used as a sequence of entries, where each entry is defined as
+    a non-comment and non-directive line.
+    Each entry consists of values corresponding to the 9 columns of
+    GFF3:
+    ==============  ===============================  ==========================================================
+    **seqid**       ``str``                          The ID of the reference sequence
+    **source**      ``str``                          Source of the data (e.g. ``Genbank``)
+    **type**        ``str``                          Type of the feature (e.g. ``CDS``)
+    **start**       ``int``                          Start coordinate of feature on the reference sequence
+    **end**         ``int``                          End coordinate of feature on the reference sequence
+    **score**       ``float`` or ``None``            Optional score (e.g. an E-value)
+    **strand**      ``Location.Strand`` or ``None``  Strand of the feature, ``None`` if feature is not stranded
+    **phase**       ``int`` or ``None``              Reading frame shift, ``None`` for non-CDS features
+    **attributes**  ``dict``                         Additional properties of the feature
+    ==============  ===============================  ==========================================================
+    Note that the entry index may not be equal to the line index,
+    because GFF3 files can contain comment and directive lines.
+    Notes
+    -----
+    Although the GFF3 specification allows mixing in reference sequence
+    data in FASTA format via the ``##FASTA`` directive, this class does
+    not support extracting the sequence information.
+    The content after the ``##FASTA`` directive is simply ignored.
+    Please provide the sequence via a separate file or read the FASTA
+    data directly via the :attr:`lines` attribute:
+    >>> import os.path
+    >>> from io import StringIO
+    >>> gff_file = GFFFile.read(os.path.join(path_to_sequences, "indexing_test.gff3"))
+    >>> fasta_start_index = None
+    >>> for directive, line_index in gff_file.directives():
+    ...     if directive == "FASTA":
+    ...         fasta_start_index = line_index + 1
+    >>> fasta_data = StringIO("\\n".join(gff_file.lines[fasta_start_index:]))
+    >>> fasta_file = FastaFile.read(fasta_data)
+    >>> for seq_string in fasta_file.values():
+    ...     print(seq_string[:60] + "...")
+    TACGTAGCTAGCTGATCGATGTTGTGTGTATCGATCTAGCTAGCTAGCTGACTACACAAT...
+    Examples
+    --------
+    Reading and editing of an existing GFF3 file:
+    >>> import os.path
+    >>> gff_file = GFFFile.read(os.path.join(path_to_sequences, "gg_avidin.gff3"))
+    >>> # Get content of first entry
+    >>> seqid, source, type, start, end, score, strand, phase, attrib = gff_file[0]
+    >>> print(seqid)
+    AJ311647.1
+    >>> print(source)
+    EMBL
+    >>> print(type)
+    region
+    >>> print(start)
+    1
+    >>> print(end)
+    1224
+    >>> print(score)
+    None
+    >>> print(strand)
+    Strand.FORWARD
+    >>> print(phase)
+    None
+    >>> print(attrib)
+    {'ID': 'AJ311647.1:1..1224', 'Dbxref': 'taxon:9031', 'Name': 'Z', 'chromosome': 'Z', 'gbkey': 'Src', 'mol_type': 'genomic DNA'}
+    >>> # Edit the first entry: Simply add a score
+    >>> score = 1.0
+    >>> gff_file[0] = seqid, source, type, start, end, score, strand, phase, attrib
+    >>> # Delete first entry
+    >>> del gff_file[0]
+    Writing a new GFF3 file:
+    >>> gff_file = GFFFile()
+    >>> gff_file.append_directive("Example directive", "param1", "param2")
+    >>> gff_file.append(
+    ...     "SomeSeqID", "Biotite", "CDS", 1, 99,
+    ...     None, Location.Strand.FORWARD, 0,
+    ...     {"ID": "FeatureID", "product":"A protein"}
+    ... )
+    >>> print(gff_file)   #doctest: +NORMALIZE_WHITESPACE
+    ##gff-version 3
+    ##Example directive param1 param2
+    SomeSeqID   Biotite CDS     1       99      .       +       0       ID=FeatureID;product=A protein
+    """
+    def __init__(self):
+        super().__init__()
+        # Maps entry indices to line indices
+        self._entries = None
+        # Stores the directives as (directive text, line index)-tuple
+        self._directives = None
+        # Stores whether the file has FASTA data
+        self._has_fasta = None
+        self._index_entries()
+        self.append_directive("gff-version", "3")
+    @classmethod
+    def read(cls, file):
+        """
+        Read a GFF3 file.
+        Parameters
+        ----------
+        file : file-like object or str
+            The file to be read.
+            Alternatively a file path can be supplied.
+        Returns
+        -------
+        file_object : GFFFile
+            The parsed file.
+        """
+        file = super().read(file)
+        file._index_entries()
+        return file
+    def insert(self, index, seqid, source, type, start, end,
+               score, strand, phase, attributes=None):
+        """
+        Insert an entry at the given index.
+        Parameters
+        ----------
+        index : int
+            Index where the entry is inserted.
+            If the index is equal to the length of the file, the entry
+            is appended at the end of the file.
+        seqid : str
+            The ID of the reference sequence.
+        source : str
+            Source of the data (e.g. ``Genbank``).
+        type : str
+            Type of the feature (e.g. ``CDS``).
+        start : int
+            Start coordinate of feature on the reference sequence.
+        end : int
+            End coordinate of feature on the reference sequence.
+        score : float or None
+            Optional score (e.g. an E-value).
+        strand : Location.Strand or None
+            Strand of the feature, ``None`` if feature is not stranded.
+        phase : int or None
+            Reading frame shift, ``None`` for non-CDS features.
+        attributes : dict, optional
+            Additional properties of the feature.
+        """
+        if index == len(self):
+            self.append(seqid, source, type, start, end,
+                        score, strand, phase, attributes)
+        else:
+            line_index = self._entries[index]
+            line = GFFFile._create_line(
+                seqid, source, type, start, end,
+                score, strand, phase, attributes
+            )
+            self.lines.insert(line_index, line)
+            self._index_entries()
+    def append(self, seqid, source, type, start, end,
+               score, strand, phase, attributes=None):
+        """
+        Append an entry to the end of the file.
+        Parameters
+        ----------
+        seqid : str
+            The ID of the reference sequence.
+        source : str
+            Source of the data (e.g. ``Genbank``).
+        type : str
+            Type of the feature (e.g. ``CDS``).
+        start : int
+            Start coordinate of feature on the reference sequence.
+        end : int
+            End coordinate of feature on the reference sequence.
+        score : float or None
+            Optional score (e.g. an E-value).
+        strand : Location.Strand or None
+            Strand of the feature, ``None`` if feature is not stranded.
+        phase : int or None
+            Reading frame shift, ``None`` for non-CDS features.
+        attributes : dict, optional
+            Additional properties of the feature.
+        """
+        if self._has_fasta:
+            raise NotImplementedError(
+                "Cannot append feature entries, "
+                "as this file contains additional FASTA data"
+            )
+        line = GFFFile._create_line(
+            seqid, source, type, start, end, score, strand, phase, attributes
+        )
+        self.lines.append(line)
+        # Fast update of entry index by adding last line
+        self._entries.append(len(self.lines) - 1)
+    def append_directive(self, directive, *args):
+        """
+        Append a directive line to the end of the file.
+        Parameters
+        ----------
+        directive : str
+            Name of the directive.
+        *args : str
+            Optional parameters for the directive.
+            Each argument is simply appended to the directive, separated
+            by a single space character.
+        Raises
+        ------
+        NotImplementedError
+            If the ``##FASTA`` directive is used, which is not
+            supported.
+        Examples
+        --------
+        >>> gff_file = GFFFile()
+        >>> gff_file.append_directive("Example directive", "param1", "param2")
+        >>> print(gff_file)
+        ##gff-version 3
+        ##Example directive param1 param2
+        """
+        if directive.startswith("FASTA"):
+            raise NotImplementedError(
+                "Adding FASTA information is not supported"
+            )
+        directive_line = "##" + directive + " " + " ".join(args)
+        self._directives.append((directive_line[2:], len(self.lines)))
+        self.lines.append(directive_line)
+    def directives(self):
+        """
+        Get the directives in the file.
+        Returns
+        -------
+        directives : list of tuple(str, int)
+            A list of directives, sorted by their line order.
+            The first element of each tuple is the name of the
+            directive (without ``##``), the second element is the index
+            of the corresponding line.
+        """
+        # Sort in line order
+        return sorted(self._directives, key=lambda directive: directive[1])
+    def __setitem__(self, index, item):
+        seqid, source, type, start, end, score, strand, phase, attrib = item
+        line = GFFFile._create_line(
+            seqid, source, type, start, end, score, strand, phase, attrib
+        )
+        line_index = self._entries[index]
+        self.lines[line_index] = line
+    def __getitem__(self, index):
+        if (index >= 0 and  index >= len(self)) or \
+           (index <  0 and -index >  len(self)):
+                raise IndexError(
+                    f"Index {index} is out of range for GFFFile with "
+                    f"{len(self)} entries"
+                )
+        line_index = self._entries[index]
+        # Columns are tab separated
+        s = self.lines[line_index].strip().split("\t")
+        if len(s) != 9:
+            raise InvalidFileError(f"Expected 9 columns, but got {len(s)}")
+        seqid, source, type, start, end, score, strand, phase, attrib = s
+        seqid = unquote(seqid)
+        source = unquote(source)
+        type = unquote(type)
+        start = int(start)
+        end = int(end)
+        score = None if score == "." else float(score)
+        if strand == "+":
+            strand = Location.Strand.FORWARD
+        elif strand == "-":
+            strand = Location.Strand.REVERSE
+        else:
+            strand = None
+        phase = None if phase == "." else int(phase)
+        attrib = GFFFile._parse_attributes(attrib)
+        return seqid, source, type, start, end, score, strand, phase, attrib
+    def __delitem__(self, index):
+        line_index = self._entries[index]
+        del self.lines[line_index]
+        self._index_entries()
+    def __len__(self):
+        return len(self._entries)
+    def _index_entries(self):
+        """
+        Parse the file for comment and directive lines.
+        Count these lines cumulatively, so that entry indices can be
+        mapped onto line indices.
+        Additionally track the line index of directive lines.
+        """
+        self._directives = []
+        # Worst case allocation -> all lines contain actual entries
+        self._entries = [None] * len(self.lines)
+        self._has_fasta = False
+        entry_counter = 0
+        for line_i, line in enumerate(self.lines):
+            if len(line) == 0 or line[0] == " ":
+                # Empty line -> do nothing
+                pass
+            elif line.startswith("#"):
+                # Comment or directive
+                if line.startswith("##"):
+                    # Directive
+                    # Omit the leading '##'
+                    self._directives.append((line[2:], line_i))
+                    if line[2:] == "FASTA":
+                        self._has_fasta = True
+                        # This parser does not support bundled FASTA
+                        # data
+                        warnings.warn(
+                            "Biotite does not support FASTA data mixed into "
+                            "GFF files, the FASTA data will be ignored"
+                        )
+                        # To ignore the following FASTA data, stop
+                        # parsing at this point
+                        break
+            else:
+                # Actual entry
+                self._entries[entry_counter] = line_i
+                entry_counter += 1
+        # Trim to correct size
+        self._entries = self._entries[:entry_counter]
+    @staticmethod
+    def _create_line(seqid, source, type, start, end,
+                     score, strand, phase, attributes):
+        """
+        Create a line for a newly created entry.
+        """
+        seqid = quote(seqid.strip(), safe=_NOT_QUOTED) \
+                if seqid is not None else "."
+        source = quote(source.strip(), safe=_NOT_QUOTED) \
+                 if source is not None else "."
+        type = type.strip()
+        # Perform checks
+        if len(seqid) == 0:
+            raise ValueError("'seqid' must not be empty")
+        if len(source) == 0:
+            raise ValueError("'source' must not be empty")
+        if len(type) == 0:
+            raise ValueError("'type' must not be empty")
+        if seqid[0] == ">":
+            raise ValueError("'seqid' must not start with '>'")
+        score = str(score) if score is not None else "."
+        if strand == Location.Strand.FORWARD:
+            strand = "+"
+        elif strand == Location.Strand.REVERSE:
+            strand = "-"
+        else:
+            strand = "."
+        phase = str(phase) if phase is not None else "."
+        attributes = ";".join(
+            [quote(key, safe=_NOT_QUOTED) + "=" + quote(val, safe=_NOT_QUOTED)
+             for key, val in attributes.items()]
+        ) if attributes is not None and len(attributes) > 0 else "."
+        return "\t".join(
+            [seqid, source, type, str(start), str(end),
+             str(score), strand, phase, attributes]
+        )
+    @staticmethod
+    def _parse_attributes(attributes):
+        """
+        Parse the *attributes* string into a dictionary.
+        """
+        if attributes == ".":
+            return {}
+        attrib_dict = {}
+        attrib_entries = attributes.split(";")
+        for entry in attrib_entries:
+            compounds = entry.split("=")
+            if len(compounds) != 2:
+                raise InvalidFileError(
+                    f"Attribute entry '{entry}' is invalid"
+                )
+            key, val = compounds
+            attrib_dict[unquote(key)] = unquote(val)
+        return attrib_dict

biotite/sequence/phylo/__init__.py ADDED Viewed

@@ -0,0 +1,36 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+"""
+This subpackage provides functions and data structures for creating
+(phylogenetic) trees.
+The :class:`Tree` is the central class in this subpackage.
+It wraps a *root* :class:`TreeNode` object.
+A :class:`TreeNode` is either an intermediate node, if it has child
+:class:`TreeNode` objects, or otherwise a leaf node.
+A :class:`Tree` is not a container itself:
+Objects, e.g species names or sequences, that are represented by the
+nodes, cannot be stored directly in a :class:`Tree` or
+:class:`TreeNode`.
+Instead, each leaf node has a reference index:
+These indices refer to a separate list or array, containing the actual
+reference objects.
+A :class:`Tree` can be created from or exported to a *Newick* notation,
+usingthe :func:`Tree.from_newick()` or :func:`Tree.to_newick()` method,
+respectively.
+A :class:`Tree` can be build from a pairwise distance matrix using the
+popular *UPGMA* (:func:`upgma()`) and *Neighbor-Joining*
+(:func:`neighbor_joining()`) algorithms.
+"""
+__name__ = "biotite.sequence.phylo"
+__author__ = "Patrick Kunzmann"
+from .tree import *
+from .upgma import *
+from .nj import *

biotite/sequence/phylo/nj.cpython-310-darwin.so ADDED Viewed

Binary file