PyPI - pydna - Versions diffs - 5.5.4__py3-none-any.whl → 5.5.5__py3-none-any.whl - Mend

pydna 5.5.4py3-none-any.whl → 5.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pydna/__init__.py +24 -193
pydna/_pretty.py +8 -8
pydna/_thermodynamic_data.py +3 -3
pydna/alphabet.py +995 -0
pydna/amplicon.py +19 -24
pydna/amplify.py +75 -95
pydna/assembly.py +64 -81
pydna/assembly2.py +283 -294
pydna/codon.py +4 -4
pydna/common_sub_strings.py +6 -8
pydna/contig.py +203 -10
pydna/design.py +176 -60
pydna/download.py +6 -15
pydna/dseq.py +1794 -718
pydna/dseqrecord.py +170 -169
pydna/gateway.py +6 -6
pydna/gel.py +5 -5
pydna/genbank.py +43 -46
pydna/genbankfixer.py +89 -92
pydna/ladders.py +11 -12
pydna/oligonucleotide_hybridization.py +124 -0
pydna/opencloning_models.py +187 -60
pydna/parsers.py +45 -32
pydna/primer.py +4 -4
pydna/primer_screen.py +833 -0
pydna/readers.py +14 -9
pydna/seq.py +137 -47
pydna/seqrecord.py +54 -62
pydna/sequence_picker.py +2 -5
pydna/sequence_regex.py +6 -6
pydna/tm.py +17 -17
pydna/types.py +19 -19
pydna/utils.py +97 -75
{pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/METADATA +8 -8
pydna-5.5.5.dist-info/RECORD +43 -0
{pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/WHEEL +1 -1
pydna/conftest.py +0 -42
pydna/genbankfile.py +0 -42
pydna/genbankrecord.py +0 -168
pydna/goldengate.py +0 -45
pydna/ligate.py +0 -62
pydna/user_cloning.py +0 -29
pydna-5.5.4.dist-info/RECORD +0 -46
{pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/licenses/LICENSE.txt +0 -0

pydna/dseq.py CHANGED Viewed

@@ -1,10 +1,6 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# Copyright 2013-2023 by Björn Johansson.  All rights reserved.
-# This code is part of the Python-dna distribution and governed by its
-# license.  Please see the LICENSE.txt file that should have been included
-# as part of this package.
 """Provides the Dseq class for handling double stranded DNA sequences.
 Dseq is a subclass of :class:`Bio.Seq.Seq`. The Dseq class
@@ -14,87 +10,217 @@ which can hold more meta data.
 The Dseq class support the notion of circular and linear DNA topology.
 """
+import itertools
+import re
+import copy
+import sys
+import math
+import inspect
+from typing import List, Tuple, Union
-import copy as _copy
-import itertools as _itertools
-import re as _re
-import sys as _sys
-import math as _math
+from Bio.Restriction import RestrictionBatch
+from Bio.Restriction import CommOnly
+from seguid import ldseguid
+from seguid import cdseguid
+from pydna.seq import Seq
+from Bio.Seq import _SeqAbstractBaseClass
+from Bio.Data.IUPACData import unambiguous_dna_weights
+from Bio.Data.IUPACData import unambiguous_rna_weights
+from Bio.Data.IUPACData import atom_weights
+from pydna._pretty import pretty_str
+from pydna.utils import rc
+from pydna.utils import flatten
+from pydna.utils import cuts_overlap
+from pydna.alphabet import basepair_dict
+from pydna.alphabet import dscode_to_watson_table
+from pydna.alphabet import dscode_to_crick_table
+from pydna.alphabet import regex_ds_melt_factory
+from pydna.alphabet import regex_ss_melt_factory
+from pydna.alphabet import dscode_to_full_sequence_table
+from pydna.alphabet import dscode_to_watson_tail_table
+from pydna.alphabet import dscode_to_crick_tail_table
+from pydna.alphabet import complement_table_for_dscode
+from pydna.alphabet import letters_not_in_dscode
+from pydna.alphabet import get_parts
+from pydna.alphabet import representation_tuple
+from pydna.alphabet import dsbreaks
+from pydna.common_sub_strings import common_sub_strings
+from pydna.types import DseqType, EnzymesType, CutSiteType
+# Sequences larger than this gets a truncated representation.
+length_limit_for_repr = 30
+placeholder = letters_not_in_dscode[-1]
+class CircularBytes(bytes):
+    """
+    A circular bytes sequence: indexing and slicing wrap around index 0.
+    """
-from pydna.seq import Seq as _Seq
-from Bio.Seq import _translate_str, _SeqAbstractBaseClass
+    def __new__(cls, value: bytes | bytearray | memoryview):
+        return super().__new__(cls, bytes(value))
+    def __getitem__(self, key):
+        n = len(self)
+        if n == 0:
+            if isinstance(key, slice):
+                return self.__class__(b"")
+            raise IndexError("CircularBytes index out of range (empty bytes)")
+        if isinstance(key, int):
+            return super().__getitem__(key % n)
+        if isinstance(key, slice):
+            start, stop, step = key.start, key.stop, key.step
+            step = 1 if step is None else step
+            if step == 0:
+                raise ValueError("slice step cannot be zero")
+            if step > 0:
+                start = 0 if start is None else start
+                stop = n if stop is None else stop
+                while stop <= start:
+                    stop += n
+                rng = range(start, stop, step)
+            else:
+                start = (n - 1) if start is None else start
+                stop = -1 if stop is None else stop
+                while stop >= start:
+                    stop -= n
+                rng = range(start, stop, step)
+            limit = n if step % n == 0 else n * 2
+            out = bytearray()
+            count = 0
+            for i in rng:
+                out.append(super().__getitem__(i % n))
+                count += 1
+                if count > limit:
+                    break
+            return self.__class__(bytes(out))
-from pydna._pretty import pretty_str as _pretty_str
-from seguid import ldseguid as _ldseguid
-from seguid import cdseguid as _cdseguid
+        return super().__getitem__(key)
-from pydna.utils import rc as _rc
-from pydna.utils import flatten as _flatten
-from pydna.utils import cuts_overlap as _cuts_overlap
+    def cutaround(self, start: int, length: int) -> bytes:
+        """
+        Return a circular slice of given length starting at index `start`.
+        Can exceed len(self), wrapping around as needed.
-from pydna.common_sub_strings import common_sub_strings as _common_sub_strings
-from Bio.Restriction import RestrictionBatch as _RestrictionBatch
-from Bio.Restriction import CommOnly
+        Examples
+        --------
+        s = CircularBytes(b"ABCDE")
+        assert s.cutaround(3, 7) == b"DEABCDE"
+        assert s.cutaround(-1, 4) == b"EABC"
+        """
+        n = len(self)
+        if n == 0 or length <= 0:
+            return self.__class__(b"")
+        start %= n
+        out = bytearray()
+        for i in range(length):
+            out.append(self[(start + i) % n])
+        return self.__class__(bytes(out))
+    def find(
+        self,
+        sub: bytes | bytearray | memoryview | str,
+        start: int = 0,
+        end: int | None = None,
+    ) -> int:
+        """
+        Find a subsequence in the circular sequence, possibly
+        wrapping across the origin.
+        Returns -1 if not found.
+        """
+        n = len(self)
+        if n == 0:
+            return -1
+        end = n if end is None else min(end, n)
+        doubled = self + self
+        try:
+            sub = sub.encode("ascii")
+        except AttributeError:
+            pass
+        pos = doubled.find(bytes(sub), start, n + len(sub) - 1)
+        if pos == -1 or pos >= n:
+            return -1
+        return pos
-from .types import DseqType, EnzymesType, CutSiteType
+class Dseq(Seq):
+    """Dseq describes a double stranded DNA fragment, linear or circular.
-from typing import List as _List, Tuple as _Tuple, Union as _Union
+    Dseq can be initiated in two ways, using two strings, each representing the
+    Watson (upper, sense) strand, the Crick (lower, antisense) strand and an
+    optional value describing the stagger betwen the strands on the left side (ovhg).
+    Alternatively, a single string represenation using dsIUPAC codes can be used.
+    If a single string is used, the letters of that string are interpreted as base
+    pairs rather than single bases. For example "A" would indicate the basepair
+    "A/T". An expanded IUPAC code is used where the letters PEXI have been assigned
+    to GATC on the Watson strand with no paring base on the Crick strand G/"", A/"",
+    T/"" and C/"". The letters QFZJ have been assigned the opposite base pairs with
+    an empty Watson strand ""/G, ""/A, ""/T, and ""/C.
+    ::
+        PEXIGATCQFZJ  would indicate the linear double-stranded fragment:
+        GATCGATC
+            CTAGCTAG
-class Dseq(_Seq):
-    """Dseq holds information for a double stranded DNA fragment.
-    Dseq also holds information describing the topology of
-    the DNA fragment (linear or circular).
     Parameters
     ----------
     watson : str
-        a string representing the watson (sense) DNA strand.
+        a string representing the Watson (sense) DNA strand or a basepair
+        represenation.
     crick : str, optional
-        a string representing the crick (antisense) DNA strand.
+        a string representing the Crick (antisense) DNA strand.
     ovhg : int, optional
         A positive or negative number to describe the stagger between the
-        watson and crick strands.
+        Watson and Crick strands.
         see below for a detailed explanation.
-    linear : bool, optional
-        True indicates that sequence is linear, False that it is circular.
     circular : bool, optional
         True indicates that sequence is circular, False that it is linear.
     Examples
     --------
-    Dseq is a subclass of the Biopython Seq object. It stores two
-    strings representing the watson (sense) and crick(antisense) strands.
-    two properties called linear and circular, and a numeric value ovhg
-    (overhang) describing the stagger for the watson and crick strand
-    in the 5' end of the fragment.
+    Dseq is a subclass of the Biopython Bio.Seq.Seq class. The constructor
+    can accept two strings representing the Watson (sense) and Crick(antisense)
+    DNA strands. These are interpreted as single stranded DNA. There is a check
+    for complementarity between the strands.
-    The most common usage is probably to create a Dseq object as a
-    part of a Dseqrecord object (see :class:`pydna.dseqrecord.Dseqrecord`).
-    There are three ways of creating a Dseq object directly listed below, but you can also
-    use the function Dseq.from_full_sequence_and_overhangs() to create a Dseq:
+    If the DNA molecule is staggered on the left side, an integer ovhg
+    (overhang) must be given, describing the stagger between the Watson and Crick strand
+    in the 5' end of the fragment.
-    Only one argument (string):
+    Additionally, the optional boolean parameter circular can be given to indicate if the
+    DNA molecule is circular.
-    >>> from pydna.dseq import Dseq
-    >>> Dseq("aaa")
-    Dseq(-3)
-    aaa
-    ttt
+    The most common usage of the Dseq class is probably not to use it directly, but to
+    create it as part of a Dseqrecord object (see :class:`pydna.dseqrecord.Dseqrecord`).
+    This works in the same way as for the relationship between the :class:`Bio.Seq.Seq` and
+    :class:`Bio.SeqRecord.SeqRecord` classes in Biopython.
-    The given string will be interpreted as the watson strand of a
-    blunt, linear double stranded sequence object. The crick strand
-    is created automatically from the watson strand.
+    There are multiple ways of creating a Dseq object directly listed below, but you can also
+    use the function Dseq.from_full_sequence_and_overhangs() to create a Dseq:
-    Two arguments (string, string):
+    Two arguments (string, string), no overhang provided:
     >>> from pydna.dseq import Dseq
     >>> Dseq("gggaaat","ttt")
@@ -102,16 +228,14 @@ class Dseq(_Seq):
     gggaaat
        ttt
-    If both watson and crick are given, but not ovhg an attempt
-    will be made to find the best annealing between the strands.
-    There are limitations to this. For long fragments it is quite
-    slow. The length of the annealing sequences have to be at least
-    half the length of the shortest of the strands.
+    If Watson and Crick are given, but not ovhg, an attempt will be made to find the best annealing
+    between the strands. There are important limitations to this. If there are several ways to
+    anneal the strands, this will fail. For long fragments it is quite slow.
     Three arguments (string, string, ovhg=int):
-    The ovhg parameter is an integer describing the length of the
-    crick strand overhang in the 5' end of the molecule.
+    The ovhg parameter is an integer describing the length of the Crick strand overhang on the
+    left side (the 5' end of Watson strand).
     The ovhg parameter controls the stagger at the five prime end::
@@ -134,53 +258,51 @@ class Dseq(_Seq):
     Example of creating Dseq objects with different amounts of stagger:
-    >>> Dseq(watson="agt", crick="actta", ovhg=-2)
+    >>> Dseq(watson="att", crick="acata", ovhg=-2)
     Dseq(-7)
-    agt
-      attca
-    >>> Dseq(watson="agt",crick="actta",ovhg=-1)
+    att
+      ataca
+    >>> Dseq(watson="ata",crick="acata",ovhg=-1)
     Dseq(-6)
-    agt
-     attca
-    >>> Dseq(watson="agt",crick="actta",ovhg=0)
+    ata
+     ataca
+    >>> Dseq(watson="taa",crick="actta",ovhg=0)
     Dseq(-5)
-    agt
+    taa
     attca
-    >>> Dseq(watson="agt",crick="actta",ovhg=1)
+    >>> Dseq(watson="aag",crick="actta",ovhg=1)
     Dseq(-5)
-     agt
+     aag
     attca
     >>> Dseq(watson="agt",crick="actta",ovhg=2)
     Dseq(-5)
       agt
     attca
-    If the ovhg parameter is specified a crick strand also
-    needs to be supplied, otherwise an exception is raised.
+    If the ovhg parameter is specified a Crick strand also needs to be supplied, or
+    an exception is raised.
     >>> Dseq(watson="agt", ovhg=2)
     Traceback (most recent call last):
-      File "<stdin>", line 1, in <module>
-      File "/usr/local/lib/python2.7/dist-packages/pydna_/dsdna.py", line 169, in __init__
-        else:
-    ValueError: ovhg defined without crick strand!
+        ...
+    ValueError: ovhg (overhang) defined without a crick strand.
-    The shape of the fragment is set by circular = True, False
-    Note that both ends of the DNA fragment has to be compatible to set
-    circular = True.
+    The shape or topology of the fragment is set by the circular parameter, True or False (default).
-    >>> Dseq("aaa","ttt")
+    >>> Dseq("aaa", "ttt", ovhg = 0)  # A linear sequence by default
     Dseq(-3)
     aaa
     ttt
-    >>> Dseq("aaa","ttt",ovhg=0)
+    >>> Dseq("aaa", "ttt", ovhg = 0, circular = False)  # A linear sequence if circular is False
     Dseq(-3)
     aaa
     ttt
-    >>> Dseq("aaa","ttt",ovhg=1)
+    >>> Dseq("aaa", "ttt", ovhg = 0, circular = True)  # A circular sequence
+    Dseq(o3)
+    aaa
+    ttt
+    >>> Dseq("aaa", "ttt", ovhg=1, circular = False)
     Dseq(-4)
      aaa
     ttt
@@ -210,6 +332,18 @@ class Dseq(_Seq):
     -4
     >>>
+    dsIUPAC [#]_ is an nn extension to the IUPAC alphabet used to describe ss regions:
+    ::
+            aaaGATC       GATCccc          ad-hoc representations
+        CTAGttt               gggCTAG
+        QFZJaaaPEXI       PEXIcccQFZJ      dsIUPAC
     Coercing to string
     >>> str(a)
@@ -295,46 +429,76 @@ class Dseq(_Seq):
     """
-    trunc = 30
     def __init__(
         self,
-        watson: _Union[str, bytes],
-        crick: _Union[str, bytes, None] = None,
+        watson: Union[str, bytes],
+        crick: Union[str, bytes, None] = None,
         ovhg=None,
         circular=False,
         pos=0,
     ):
-        if isinstance(watson, bytes):
-            watson = watson.decode("ASCII")
-        if isinstance(crick, bytes):
-            crick = crick.decode("ASCII")
+        if isinstance(watson, (bytes, bytearray)):
+            # watson is decoded to a string if needed.
+            watson = watson.decode("ascii")
+        if isinstance(crick, (bytes, bytearray)):
+            # crick is decoded to a string if needed.
+            crick = crick.decode("ascii")
         if crick is None:
             if ovhg is not None:
-                raise ValueError("ovhg defined without crick strand!")
-            crick = _rc(watson)
-            ovhg = 0
-            self._data = bytes(watson, encoding="ASCII")
+                raise ValueError("ovhg (overhang) defined without a crick strand.")
+            """
+            Giving only the watson string implies inferring the Crick complementary strand
+            from the Watson sequence. The watson string can contain dscode letters wich will
+            be interpreted as outlined in the pydna.alphabet module.
+            The _data property must be a byte string for compatibility with
+            Biopython Bio.Seq.Seq
+            """
+            data = watson
+            self._data = data.encode("ascii")
-        else:  # crick strand given
-            if ovhg is None:  # ovhg not given
-                olaps = _common_sub_strings(
+        else:
+            """
+            Crick strand given, ovhg is optional. An important consequence is that the
+            watson and crick strands are interpreted as single stranded DNA that is
+            supposed to anneal.
+            If ovhg was not given, we try to guess the value below. This will fail
+            if there are two or more ways to anneal with equal length of the double
+            stranded part.
+            """
+            if ovhg is None:  # ovhg not given, try to guess from sequences
+                limit = int(math.log(len(watson)) / math.log(4))
+                olaps = common_sub_strings(
                     str(watson).lower(),
-                    str(_rc(crick).lower()),
-                    int(_math.log(len(watson)) / _math.log(4)),
+                    str(rc(crick).lower()),
+                    limit,
                 )
+                """No overlaps found, strands do not anneal"""
                 if len(olaps) == 0:
                     raise ValueError(
-                        "Could not anneal the two strands." " Please provide ovhg value"
+                        "Could not anneal the two strands."
+                        f" looked for annealing with at least {limit} basepairs"
+                        " Please provide and overhang value (ovhg parameter)"
                     )
-                # We extract the positions and length of the first (longest) overlap, since
-                # common_sub_strings sorts the overlaps by length.
-                pos_watson, pos_crick, longest_olap_length = olaps[0]
+                """
+                We extract the positions and length of the first (longest) overlap,
+                since common_sub_strings sorts the overlaps by length, longest first.
+                """
-                # We see if there is another overlap of the same length
-                if any(olap[2] >= longest_olap_length for olap in olaps[1:]):
+                (pos_watson, pos_crick, longest_olap_length), *rest = olaps
+                """
+                We see if there is another overlap of the same length
+                This means that annealing is ambigous. User should provide
+                and ovhg value.
+                """
+                if any(
+                    olap_length >= longest_olap_length for _, _, olap_length in rest
+                ):
                     raise ValueError(
                         "More than one way of annealing the"
                         " strands. Please provide ovhg value"
@@ -342,120 +506,80 @@ class Dseq(_Seq):
                 ovhg = pos_crick - pos_watson
-                sns = (ovhg * " ") + _pretty_str(watson)
-                asn = (-ovhg * " ") + _pretty_str(_rc(crick))
-                self._data = bytes(
-                    "".join(
-                        [
-                            a.strip() or b.strip()
-                            for a, b in _itertools.zip_longest(sns, asn, fillvalue=" ")
-                        ]
-                    ),
-                    encoding="ASCII",
-                )
+            """
+            Pad both strands on left side ovhg spaces
+            a negative number gives no padding,
+            """
+            sense = ovhg * " " + watson
+            antisense = -ovhg * " " + crick[::-1]
+            max_len = max(len(sense), len(antisense))
-            else:  # ovhg given
-                if ovhg == 0:
-                    if len(watson) >= len(crick):
-                        self._data = bytes(watson, encoding="ASCII")
-                    else:
-                        self._data = bytes(
-                            watson + _rc(crick[: len(crick) - len(watson)]),
-                            encoding="ASCII",
-                        )
-                elif ovhg > 0:
-                    if ovhg + len(watson) > len(crick):
-                        self._data = bytes(
-                            _rc(crick[-ovhg:]) + watson, encoding="ASCII"
-                        )
-                    else:
-                        self._data = bytes(
-                            _rc(crick[-ovhg:])
-                            + watson
-                            + _rc(crick[: len(crick) - ovhg - len(watson)]),
-                            encoding="ASCII",
-                        )
-                else:  # ovhg < 0
-                    if -ovhg + len(crick) > len(watson):
-                        self._data = bytes(
-                            watson + _rc(crick[: -ovhg + len(crick) - len(watson)]),
-                            encoding="ASCII",
-                        )
-                    else:
-                        self._data = bytes(watson, encoding="ASCII")
+            """pad both strands on right side to same size."""
+            sense = sense.ljust(max_len)
+            antisense = antisense.ljust(max_len)
+            """both strands padded so that bsepairs align"""
+            assert len(sense) == len(antisense)
+            data = []
+            for w, c in zip(sense, antisense):
+                try:
+                    data.append(basepair_dict[w, c])
+                except KeyError as err:
+                    print(f"Base mismatch in representation {err}")
+                    raise ValueError(f"Base mismatch in representation: {err}")
+            data = "".join(data).strip()
+            self._data = data.encode("ascii")
         self.circular = circular
-        self.watson = _pretty_str(watson)
-        self.crick = _pretty_str(crick)
-        self.length = len(self._data)
-        self.ovhg = ovhg
         self.pos = pos
+        if circular:
+            data += data[0:1]
+        dsb = dsbreaks(data)
+        if dsb:
+            msg = "".join(dsb)
+            raise ValueError(
+                f"Molecule is internally split in {len(dsb)} location(s):\n\n{msg}".strip()
+            )
     @classmethod
-    def quick(
-        cls,
-        watson: str,
-        crick: str,
-        ovhg=0,
-        circular=False,
-        pos=0,
-    ):
-        obj = cls.__new__(cls)  # Does not call __init__
-        obj.watson = _pretty_str(watson)
-        obj.crick = _pretty_str(crick)
-        obj.ovhg = ovhg
+    def quick(cls, data: bytes, *args, circular=False, pos=0, **kwargs):
+        """Fastest way to instantiate an object of the Dseq class.
+        No checks of parameters are made.
+        Does not call Bio.Seq.Seq.__init__() which has lots of time consuming checks.
+        """
+        obj = cls.__new__(cls)
         obj.circular = circular
-        obj.length = max(len(watson) + max(0, ovhg), len(crick) + max(0, -ovhg))
         obj.pos = pos
-        wb = bytes(watson, encoding="ASCII")
-        cb = bytes(crick, encoding="ASCII")
-        obj._data = (
-            _rc(cb[-max(0, ovhg) or len(cb) :])
-            + wb
-            + _rc(cb[: max(0, len(cb) - ovhg - len(wb))])
-        )
-        return obj
+        obj._data = data
-    @classmethod
-    def from_string(
-        cls,
-        dna: str,
-        *args,
-        # linear=True,
-        circular=False,
-        **kwargs,
-    ):
-        obj = cls.__new__(cls)  # Does not call __init__
-        obj.watson = _pretty_str(dna)
-        obj.crick = _pretty_str(_rc(dna))
-        obj.ovhg = 0
-        obj.circular = circular
-        # obj._linear = linear
-        obj.length = len(dna)
-        obj.pos = 0
-        obj._data = bytes(dna, encoding="ASCII")
         return obj
     @classmethod
     def from_representation(cls, dsdna: str, *args, **kwargs):
-        obj = cls.__new__(cls)  # Does not call __init__
-        w, c, *r = [ln for ln in dsdna.splitlines() if ln]
-        ovhg = obj.ovhg = len(w) - len(w.lstrip()) - (len(c) - len(c.lstrip()))
-        watson = obj.watson = _pretty_str(w.strip())
-        crick = obj.crick = _pretty_str(c.strip()[::-1])
+        obj = cls.__new__(cls)
         obj.circular = False
-        # obj._linear = True
-        obj.length = max(len(watson) + max(0, ovhg), len(crick) + max(0, -ovhg))
         obj.pos = 0
-        wb = bytes(watson, encoding="ASCII")
-        cb = bytes(crick, encoding="ASCII")
-        obj._data = (
-            _rc(cb[-max(0, ovhg) or len(cb) :])
-            + wb
-            + _rc(cb[: max(0, len(cb) - ovhg - len(wb))])
-        )
-        return obj
+        clean = inspect.cleandoc("\n" + dsdna)
+        watson, crick = [
+            ln
+            for ln in clean.splitlines()
+            if ln.strip() and not ln.strip().startswith("Dseq(")
+        ]
+        ovhgw = len(watson) - len(watson.lstrip())
+        ovhgc = -(len(crick) - len(crick.lstrip()))
+        ovhg = ovhgw or ovhgc
+        watson = watson.strip()
+        crick = crick.strip()[::-1]
+        return Dseq(watson, crick, ovhg)
     @classmethod
     def from_full_sequence_and_overhangs(
@@ -522,111 +646,177 @@ class Dseq(_Seq):
         return Dseq(watson, crick=crick, ovhg=crick_ovhg)
-    # @property
-    # def ovhg(self):
-    #     """The ovhg property. This cannot be set directly, but is a
-    #     consequence of how the watson and crick strands anneal to
-    #     each other"""
-    #     return self._ovhg
-    # @property
-    # def linear(self):
-    #     """The linear property can not be set directly.
-    #     Use an empty slice [:] to create a linear object."""
-    #     return self._linear
-    # @property
-    # def circular(self):
-    #     """The circular property can not be set directly.
-    #     Use :meth:`looped` to create a circular Dseq object"""
-    #     return self._circular
+    @property
+    def watson(self) -> str:
+        """
+        The watson (upper) strand of the double stranded fragment 5'-3'.
-    def mw(self) -> float:
-        """This method returns the molecular weight of the DNA molecule
-        in g/mol. The following formula is used::
-               MW = (A x 313.2) + (T x 304.2) +
-                    (C x 289.2) + (G x 329.2) +
-                    (N x 308.9) + 79.0
-        """
-        nts = (self.watson + self.crick).lower()
-        return (
-            313.2 * nts.count("a")
-            + 304.2 * nts.count("t")
-            + 289.2 * nts.count("c")
-            + 329.2 * nts.count("g")
-            + 308.9 * nts.count("n")
-            + 79.0
-        )
+        Returns
+        -------
+        TYPE
+            DESCRIPTION.
-    def upper(self: DseqType) -> DseqType:
-        """Return an upper case copy of the sequence.
+        """
+        return self._data.decode("ascii").translate(dscode_to_watson_table).strip()
-        >>> from pydna.dseq import Dseq
-        >>> my_seq = Dseq("aAa")
-        >>> my_seq
-        Dseq(-3)
-        aAa
-        tTt
-        >>> my_seq.upper()
-        Dseq(-3)
-        AAA
-        TTT
+    @property
+    def crick(self) -> str:
+        """
+        The crick (lower) strand of the double stranded fragment 5'-3'.
         Returns
         -------
-        Dseq
-            Dseq object in uppercase
+        TYPE
+            DESCRIPTION.
-        See also
-        --------
-        pydna.dseq.Dseq.lower
+        """
+        return self._data.decode("ascii").translate(dscode_to_crick_table).strip()[::-1]
+    @property
+    def left_ovhg(self) -> int:
         """
-        return self.quick(
-            self.watson.upper(),
-            self.crick.upper(),
-            ovhg=self.ovhg,
-            # linear=self.linear,
-            circular=self.circular,
-            pos=self.pos,
-        )
+        The 5' overhang of the lower strand compared the the upper.
-    def lower(self: DseqType) -> DseqType:
-        """Return a lower case copy of the sequence.
+        See module docstring for more information.
-        >>> from pydna.dseq import Dseq
-        >>> my_seq = Dseq("aAa")
-        >>> my_seq
-        Dseq(-3)
-        aAa
-        tTt
-        >>> my_seq.lower()
-        Dseq(-3)
-        aaa
-        ttt
+        Returns
+        -------
+        TYPE
+            DESCRIPTION.
+        """
+        parts = self.get_parts()
+        if parts.single_watson or parts.single_crick:
+            return None
+        return -len(parts.sticky_left5) or len(parts.sticky_left3)
+    ovhg = left_ovhg
+    @property
+    def right_ovhg(self) -> int:
+        """Overhang at the right side (end)."""
+        parts = self.get_parts()
+        if parts.single_watson or parts.single_crick:
+            return None
+        return -len(parts.sticky_right5) or len(parts.sticky_right3)
+    watson_ovhg = right_ovhg
+    def __str__(self) -> str:
+        """
+        A string representation of the sequence. The returned string
+        is the watson strand of a blunt version of the sequence.
+        >>> ds = Dseq.from_representation(
+        ... '''
+        ... GAATTC
+        ...   TAA
+        ... ''')
+        >>> str(ds)
+        'GAATTC'
+        >>> ds = Dseq.from_representation(
+        ... '''
+        ...   ATT
+        ... CTTAAG
+        ... ''')
+        >>> str(ds)
+        'GAATTC'
         Returns
         -------
-        Dseq
-            Dseq object in lowercase
+        str
+            A string representation of the sequence.
-        See also
+        """
+        return bytes(self).decode("ascii")
+    to_blunt_string = __str__  # alias of __str__ # TODO: consider removing
+    def __bytes__(self) -> bytes:
+        return self._data.translate(dscode_to_full_sequence_table)
+    def mw(self) -> float:
+        """The molecular weight of the DNA/RNA molecule in g/mol.
+        The molecular weight data in Biopython Bio.Data.IUPACData
+        is used. The DNA is assumed to have a 5'-phosphate as many
+        DNA fragments from restriction digestion do:
+        ::
+             P - G-A-T-T-A-C-A - OH
+                 | | | | | | |
+            OH - C-T-A-A-T-G-T - P
+        The molecular weights listed in the unambiguous_dna_weights
+        dictionary refers to free monophosphate nucleotides.
+        One water molecule is removed for every phopshodiester bond
+        formed between nucleotides. For linear molecules, the weight
+        of one water molecule is added to account for the terminal
+        hydroxyl group and a hydrogen on the 5' terminal phosphate
+        group.
+        ::
+             P - G---A---T - OH  P - C---A - OH
+                 |   |   |           |   |
+            OH - C---T---A---A---T---G---T - P
+        If the DNA is discontinuous, the internal 5'- end is assumed
+        to have a phosphate and the 3'- a hydroxyl group:
+        Examples
         --------
-        pydna.dseq.Dseq.upper
-        """
-        return self.quick(
-            self.watson.lower(),
-            self.crick.lower(),
-            ovhg=self.ovhg,
-            # linear=self.linear,
-            circular=self.circular,
-            pos=self.pos,
-        )
+        >>> from pydna.dseq import Dseq
+        >>> ds_lin_obj = Dseq("GATTACA")
+        >>> ds_lin_obj
+        Dseq(-7)
+        GATTACA
+        CTAATGT
+        >>> round(ds_lin_obj.mw(), 1)
+        4359.8
+        >>> ds_circ_obj = Dseq("GATTACA", circular = True)
+        >>> round(ds_circ_obj.mw(), 1)
+        4323.8
+        >>> ssobj = Dseq("PEXXEIE")
+        >>> ssobj
+        Dseq(-7)
+        GATTACA
+        <BLANKLINE>
+        >>> round(ssobj.mw(), 1)
+        2184.4
+        >>> ds_lin_obj2 = Dseq("GATZFCA")
+        >>> ds_lin_obj2
+        Dseq(-7)
+        GAT  CA
+        CTAATGT
+        >>> round(ds_lin_obj2.mw(), 1)
+        3724.4
+        """
+        h2o = atom_weights["H"] * 2 + atom_weights["O"]
+        mwd = unambiguous_rna_weights | unambiguous_dna_weights | {" ": 0}
+        watsn_weight = sum(mwd[nt] - h2o for nt in self.watson.upper())
+        crick_weight = sum(mwd[nt] - h2o for nt in self.crick.upper())
+        watsn_weight += h2o * len(re.findall(r" +", self.watson))
+        crick_weight += h2o * len(re.findall(r" +", self.crick))
+        if watsn_weight and not self.circular:
+            watsn_weight += h2o
+        if crick_weight and not self.circular:
+            crick_weight += h2o
+        return watsn_weight + crick_weight
     def find(
-        self, sub: _Union[_SeqAbstractBaseClass, str, bytes], start=0, end=_sys.maxsize
+        self, sub: Union[_SeqAbstractBaseClass, str, bytes], start=0, end=sys.maxsize
     ) -> int:
         """This method behaves like the python string method of the same name.
@@ -635,6 +825,8 @@ class Dseq(_Seq):
         Returns -1 if the subsequence is NOT found.
+        The search is case sensitive.
         Parameters
         ----------
@@ -650,80 +842,51 @@ class Dseq(_Seq):
         Examples
         --------
         >>> from pydna.dseq import Dseq
-        >>> seq = Dseq("atcgactgacgtgtt")
+        >>> seq = Dseq("agtaagt")
         >>> seq
-        Dseq(-15)
-        atcgactgacgtgtt
-        tagctgactgcacaa
-        >>> seq.find("gac")
-        3
-        >>> seq = Dseq(watson="agt",crick="actta",ovhg=-2)
+        Dseq(-7)
+        agtaagt
+        tcattca
+        >>> seq.find("taa")
+        2
+        >>> seq = Dseq(watson="agta",crick="actta",ovhg=-2)
         >>> seq
         Dseq(-7)
-        agt
+        agta
           attca
         >>> seq.find("taa")
+        -1
+        >>> seq = Dseq(watson="agta",crick="actta",ovhg=-2)
+        >>> seq
+        Dseq(-7)
+        agta
+          attca
+        >>> seq.find("ta")
         2
         """
-        if not self.circular:
-            return _Seq.find(self, sub, start, end)
-        return (_pretty_str(self) + _pretty_str(self)).find(sub, start, end)
-    def __getitem__(self, sl: slice) -> "Dseq":
-        """Returns a subsequence. This method is used by the slice notation"""
-        if not self.circular:
-            x = len(self.crick) - self.ovhg - len(self.watson)
-            sns = (self.ovhg * " " + self.watson + x * " ")[sl]
-            asn = (-self.ovhg * " " + self.crick[::-1] + -x * " ")[sl]
-            ovhg = max(
-                (len(sns) - len(sns.lstrip()), -len(asn) + len(asn.lstrip())), key=abs
-            )
-            return Dseq(
-                sns.strip(),
-                asn[::-1].strip(),
-                ovhg=ovhg,
-                # linear=True
-            )
+        if self.circular:
+            result = CircularBytes(self._data).find(sub, start, end)
         else:
-            sl = slice(sl.start or 0, sl.stop or len(self), sl.step)
-            if sl.start > len(self) or sl.stop > len(self):
-                return Dseq("")
-            if sl.start < sl.stop:
-                return Dseq(
-                    self.watson[sl],
-                    self.crick[::-1][sl][::-1],
-                    ovhg=0,
-                    # linear=True
-                )
-            else:
-                try:
-                    stp = abs(sl.step)
-                except TypeError:
-                    stp = 1
-                start = sl.start
-                stop = sl.stop
-                w = (
-                    self.watson[(start or len(self)) :: stp]
-                    + self.watson[: (stop or 0) : stp]
-                )
-                c = (
-                    self.crick[len(self) - stop :: stp]
-                    + self.crick[: len(self) - start : stp]
-                )
+            result = super().find(sub, start, end)
+        return result
+    def __contains__(self, sub: [str, bytes]) -> bool:
+        return self.find(sub) != -1
-                return Dseq(w, c, ovhg=0)  # , linear=True)
+    def __getitem__(self, sl: [slice, int]) -> DseqType:
+        if isinstance(sl, int):
+            sl = slice(sl, sl + 1, 1)
+        sl = slice(sl.start, sl.stop, sl.step)
+        if self.circular:
+            cb = CircularBytes(self._data)
+            return self.quick(cb[sl])
+        return super().__getitem__(sl)
     def __eq__(self, other: DseqType) -> bool:
         """Compare to another Dseq object OR an object that implements
-        watson, crick and ovhg properties. This comparison is case
-        insensitive.
+        watson, crick and ovhg properties.
+        This comparison is case insensitive.
         """
         try:
@@ -738,85 +901,15 @@ class Dseq(_Seq):
             same = False
         return same
-    def __repr__(self):
-        """Returns a representation of the sequence, truncated if
-        longer than 30 bp"""
-        if len(self) > Dseq.trunc:
-            if self.ovhg > 0:
-                d = self.crick[-self.ovhg :][::-1]
-                hej = len(d)
-                if len(d) > 10:
-                    d = "{}..{}".format(d[:4], d[-4:])
-                a = len(d) * " "
-            elif self.ovhg < 0:
-                a = self.watson[: max(0, -self.ovhg)]
-                hej = len(a)
-                if len(a) > 10:
-                    a = "{}..{}".format(a[:4], a[-4:])
-                d = len(a) * " "
-            else:
-                a = ""
-                d = ""
-                hej = 0
-            x = self.ovhg + len(self.watson) - len(self.crick)
-            if x > 0:
-                c = self.watson[len(self.crick) - self.ovhg :]
-                y = len(c)
-                if len(c) > 10:
-                    c = "{}..{}".format(c[:4], c[-4:])
-                f = len(c) * " "
-            elif x < 0:
-                f = self.crick[:-x][::-1]
-                y = len(f)
-                if len(f) > 10:
-                    f = "{}..{}".format(f[:4], f[-4:])
-                c = len(f) * " "
-            else:
-                c = ""
-                f = ""
-                y = 0
-            L = len(self) - hej - y
-            x1 = -min(0, self.ovhg)
-            x2 = x1 + L
-            x3 = -min(0, x)
-            x4 = x3 + L
-            b = self.watson[x1:x2]
-            e = self.crick[x3:x4][::-1]
-            if len(b) > 10:
-                b = "{}..{}".format(b[:4], b[-4:])
-                e = "{}..{}".format(e[:4], e[-4:])
-            return _pretty_str(
-                "{klass}({top}{size})\n" "{a}{b}{c}\n" "{d}{e}{f}"
-            ).format(
-                klass=self.__class__.__name__,
-                top={False: "-", True: "o"}[self.circular],
-                size=len(self),
-                a=a,
-                b=b,
-                c=c,
-                d=d,
-                e=e,
-                f=f,
-            )
+    def __repr__(self, lim: int = length_limit_for_repr) -> pretty_str:
-        else:
-            return _pretty_str(
-                "{}({}{})\n{}\n{}".format(
-                    self.__class__.__name__,
-                    {False: "-", True: "o"}[self.circular],
-                    len(self),
-                    self.ovhg * " " + self.watson,
-                    -self.ovhg * " " + self.crick[::-1],
-                )
-            )
+        header = f"{self.__class__.__name__}({({False: '-', True: 'o'}[self.circular])}{len(self)})"
+        w, c = representation_tuple(
+            self._data.decode("ascii"), length_limit_for_repr=length_limit_for_repr
+        )
+        return pretty_str(header + "\n" + w + "\n" + c)
     def reverse_complement(self) -> "Dseq":
         """Dseq object where watson and crick have switched places.
@@ -839,22 +932,29 @@ class Dseq(_Seq):
         >>>
         """
-        return Dseq.quick(
-            self.crick,
-            self.watson,
-            ovhg=len(self.watson) - len(self.crick) + self.ovhg,
-            circular=self.circular,
-        )
+        return Dseq.quick(rc(self._data), circular=self.circular)
     rc = reverse_complement  # alias for reverse_complement
     def shifted(self: DseqType, shift: int) -> DseqType:
-        """Shifted version of a circular Dseq object."""
+        """
+        Shifted copy of a circular Dseq object.
+        >>> ds = Dseq("TAAG", circular = True)
+        >>> ds.shifted(1) # First bp moved to right side:
+        Dseq(o4)
+        AAGT
+        TTCA
+        >>> ds.shifted(-1) # Last bp moved to left side:
+        Dseq(o4)
+        GTAA
+        CATT
+        """
         if not self.circular:
             raise TypeError("DNA is not circular.")
         shift = shift % len(self)
         if not shift:
-            return _copy.deepcopy(self)
+            return copy.deepcopy(self)
         else:
             return (self[shift:] + self[:shift]).looped()
@@ -876,19 +976,30 @@ class Dseq(_Seq):
         Dseq(o8)
         catcgatc
         gtagctag
-        >>> a.T4("t")
+        >>> b = Dseq("iatcgatj")
+        >>> b
         Dseq(-8)
         catcgat
          tagctag
-        >>> a.T4("t").looped()
+        >>> b.looped()
+        Dseq(o7)
+        catcgat
+        gtagcta
+        >>> c = Dseq("jatcgati")
+        >>> c
+        Dseq(-8)
+         atcgatc
+        gtagcta
+        >>> c.looped()
         Dseq(o7)
         catcgat
         gtagcta
-        >>> a.T4("a")
+        >>> d = Dseq("ietcgazj")
+        >>> d
         Dseq(-8)
         catcga
           agctag
-        >>> a.T4("a").looped()
+        >>> d.looped()
         Traceback (most recent call last):
           File "<stdin>", line 1, in <module>
           File "/usr/local/lib/python2.7/dist-packages/pydna/dsdna.py", line 357, in looped
@@ -899,116 +1010,116 @@ class Dseq(_Seq):
         """
         if self.circular:
-            return _copy.deepcopy(self)
+            return copy.deepcopy(self)
         type5, sticky5 = self.five_prime_end()
         type3, sticky3 = self.three_prime_end()
-        if type5 == type3 and str(sticky5) == str(_rc(sticky3)):
-            nseq = self.__class__.quick(
-                self.watson,
-                self.crick[-self.ovhg :] + self.crick[: -self.ovhg],
-                ovhg=0,
-                # linear=False,
-                circular=True,
-            )
-            # assert len(nseq.crick) == len(nseq.watson)
-            return nseq
-        else:
-            raise TypeError(
-                "DNA cannot be circularized.\n" "5' and 3' sticky ends not compatible!"
-            )
-    def tolinear(self: DseqType) -> DseqType:  # pragma: no cover
-        """Returns a blunt, linear copy of a circular Dseq object. This can
-        only be done if the Dseq object is circular, otherwise a
-        TypeError is raised.
+        err = TypeError(
+            "DNA cannot be circularized.\n" "5' and 3' sticky ends not compatible!"
+        )
-        This method is deprecated, use slicing instead. See example below.
+        if type5 != type3:
+            raise err
-        Examples
-        --------
+        try:
+            # Test if sticky ends are compatible
+            self + self
+        except TypeError:
+            raise err
-        >>> from pydna.dseq import Dseq
-        >>> a=Dseq("catcgatc", circular=True)
-        >>> a
-        Dseq(o8)
-        catcgatc
-        gtagctag
-        >>> a[:]
-        Dseq(-8)
-        catcgatc
-        gtagctag
-        >>>
+        new = self.cast_to_ds_left()[: len(self) - len(sticky3)]
-        """
-        import warnings as _warnings
-        from pydna import _PydnaDeprecationWarning
+        new.circular = True
+        return new
-        _warnings.warn(
-            "tolinear method is obsolete; "
-            "please use obj[:] "
-            "instead of obj.tolinear().",
-            _PydnaDeprecationWarning,
-        )
-        if not self.circular:
-            raise TypeError("DNA is not circular.\n")
-        selfcopy = _copy.deepcopy(self)
-        selfcopy.circular = False
-        return selfcopy  # self.__class__(self.watson, linear=True)
+    def five_prime_end(self) -> Tuple[str, str]:
+        """Returns a 2-tuple of trings describing the structure of the 5' end of
+        the DNA fragment.
+        The tuple contains (type , sticky) where type is eiter "5'" or "3'".
+        sticky is always in lower case and contains the sequence of the
+        protruding end in 5'-3' direction.
+        See examples below:
-    def five_prime_end(self) -> _Tuple[str, str]:
-        """Returns a tuple describing the structure of the 5' end of
-        the DNA fragment
         Examples
         --------
         >>> from pydna.dseq import Dseq
-        >>> a=Dseq("aaa", "ttt")
+        >>> a = Dseq("aa", "tttg", ovhg=2)
         >>> a
-        Dseq(-3)
-        aaa
-        ttt
+        Dseq(-4)
+          aa
+        gttt
         >>> a.five_prime_end()
-        ('blunt', '')
-        >>> a=Dseq("aaa", "ttt", ovhg=1)
+        ("3'", 'tg')
+        >>> a = Dseq("caaa", "tt", ovhg=-2)
         >>> a
         Dseq(-4)
-         aaa
-        ttt
+        caaa
+          tt
         >>> a.five_prime_end()
-        ("3'", 't')
-        >>> a=Dseq("aaa", "ttt", ovhg=-1)
+        ("5'", 'ca')
+        >>> a = Dseq("aa", "tt")
         >>> a
-        Dseq(-4)
-        aaa
-         ttt
+        Dseq(-2)
+        aa
+        tt
         >>> a.five_prime_end()
-        ("5'", 'a')
-        >>>
+        ('blunt', '')
         See also
         --------
         pydna.dseq.Dseq.three_prime_end
         """
-        if self.watson and not self.crick:
-            return "5'", self.watson.lower()
-        if not self.watson and self.crick:
-            return "3'", self.crick.lower()
-        if self.ovhg < 0:
-            sticky = self.watson[: -self.ovhg].lower()
+        # See docstring for function pydna.utils.get_parts for details
+        # on what is contained in parts.
+        parts = self.get_parts()
+        sticky5 = parts.sticky_left5.translate(dscode_to_watson_table)
+        sticky3 = parts.sticky_left3.translate(dscode_to_crick_table)[::-1]
+        single_watson = parts.single_watson.translate(dscode_to_watson_table)
+        single_crick = parts.single_crick.translate(dscode_to_crick_table)[::-1]
+        # The walrus operator returns the value being assigned, so
+        # we can test if it is empty or not.
+        if sticky := single_watson:
+            type_ = "single"
+        elif sticky := single_crick:
+            type_ = "single"
+        elif sticky5 == sticky3 == "":
+            type_, sticky = "blunt", ""
+        elif sticky := sticky5:
             type_ = "5'"
-        elif self.ovhg > 0:
-            sticky = self.crick[-self.ovhg :].lower()
+        elif sticky := sticky3:
             type_ = "3'"
-        else:
-            sticky = ""
-            type_ = "blunt"
-        return type_, sticky
-    def three_prime_end(self) -> _Tuple[str, str]:
+        return type_, sticky.lower()
+    def three_prime_end(self) -> Tuple[str, str]:
         """Returns a tuple describing the structure of the 5' end of
         the DNA fragment
+        >>> a = Dseq("aa", "gttt", ovhg=0)
+        >>> a
+        Dseq(-4)
+        aa
+        tttg
+        >>> a.three_prime_end()
+        ("5'", 'gt')
+        >>> a = Dseq("aaac", "tt", ovhg=0)
+        >>> a
+        Dseq(-4)
+        aaac
+        tt
+        >>> a.three_prime_end()
+        ("3'", 'ac')
         >>> from pydna.dseq import Dseq
         >>> a=Dseq("aaa", "ttt")
         >>> a
@@ -1017,21 +1128,6 @@ class Dseq(_Seq):
         ttt
         >>> a.three_prime_end()
         ('blunt', '')
-        >>> a=Dseq("aaa", "ttt", ovhg=1)
-        >>> a
-        Dseq(-4)
-         aaa
-        ttt
-        >>> a.three_prime_end()
-        ("3'", 'a')
-        >>> a=Dseq("aaa", "ttt", ovhg=-1)
-        >>> a
-        Dseq(-4)
-        aaa
-         ttt
-        >>> a.three_prime_end()
-        ("5'", 't')
-        >>>
         See also
         --------
@@ -1039,42 +1135,73 @@ class Dseq(_Seq):
         """
-        ovhg = len(self.watson) - len(self.crick) + self.ovhg
+        # See docstring for function pydna.utils.get_parts for details
+        # on what is contained in parts.
+        parts = self.get_parts()
+        sticky5 = parts.sticky_right5.translate(dscode_to_crick_table)[::-1]
+        sticky3 = parts.sticky_right3.translate(dscode_to_watson_table)
+        single_watson = parts.single_watson.translate(dscode_to_watson_table)
+        single_crick = parts.single_crick.translate(dscode_to_crick_table)[::-1]
-        if ovhg < 0:
-            sticky = self.crick[:-ovhg].lower()
+        # The walrus operator returns the value being assigned, so
+        # we can test if it is empty or not.
+        if sticky := single_watson:
+            type_ = "single"
+        elif sticky := single_crick:
+            type_ = "single"
+        elif sticky5 == sticky3 == "":
+            type_, sticky = "blunt", ""
+        elif sticky := sticky5:
             type_ = "5'"
-        elif ovhg > 0:
-            sticky = self.watson[-ovhg:].lower()
+        elif sticky := sticky3:
             type_ = "3'"
-        else:
-            sticky = ""
-            type_ = "blunt"
-        return type_, sticky
-    def watson_ovhg(self) -> int:
-        """Returns the overhang of the watson strand at the three prime."""
-        return len(self.watson) - len(self.crick) + self.ovhg
+        return type_, sticky.lower()
-    def __add__(self: DseqType, other: DseqType) -> DseqType:
-        """Simulates ligation between two DNA fragments.
+    def __add__(self: DseqType, other: [DseqType, str, bytes]) -> DseqType:
+        """
+        Adding two Dseq objects together.
+        >>> ds = Dseq("a", "t", ovhg=0)
+        >>> ds
+        Dseq(-1)
+        a
+        t
+        >>> ds + ds
+        Dseq(-2)
+        aa
+        tt
+        >>> "g" + ds # adding a string of left side returns a Dseq
+        Dseq(-2)
+        ga
+        ct
+        >>> ds + "c" # adding a string of right side returns a Dseq
+        Dseq(-2)
+        ac
+        tg
-        Add other Dseq object at the end of the sequence.
-        Type error is raised if any of the points below are fulfilled:
-        * one or more objects are circular
-        * if three prime sticky end of self is not the same type
-          (5' or 3') as the sticky end of other
-        * three prime sticky end of self complementary with five
-          prime sticky end of other.
+        Parameters
+        ----------
+        other : [DseqType, str, bytes]
+            Object to be added.
-        Phosphorylation and dephosphorylation is not considered.
+        Raises
+        ------
+        TypeError
+            Preventing adding to a circular sequence.
-        DNA is allways presumed to have the necessary 5' phospate
-        group necessary for ligation.
+        Returns
+        -------
+        DseqType
+            A new Dseq object.
         """
-        # test for circular DNA
         if self.circular:
             raise TypeError("circular DNA cannot be ligated!")
         try:
@@ -1083,60 +1210,85 @@ class Dseq(_Seq):
         except AttributeError:
             pass
+        # If other evaluates to False, return a copy of self.
+        if not other:
+            return copy.deepcopy(self)
+        # If self evaluates to False, return a copy of other.
+        elif not self:
+            return copy.deepcopy(other)
+        # get right side end properties for self.
         self_type, self_tail = self.three_prime_end()
-        other_type, other_tail = other.five_prime_end()
-        if self_type == other_type and str(self_tail) == str(_rc(other_tail)):
-            answer = Dseq.quick(
-                self.watson + other.watson, other.crick + self.crick, self.ovhg
-            )
-        elif not self:
-            answer = _copy.deepcopy(other)
-        elif not other:
-            answer = _copy.deepcopy(self)
-        else:
-            raise TypeError("sticky ends not compatible!")
-        return answer
+        try:
+            other_type, other_tail = other.five_prime_end()
+        except AttributeError:
+            # if other does not have the expected properties
+            # most likely it is a string that can be cast as
+            # a Dseq.
+            other_type, other_tail = "blunt", ""
+            other = Dseq(other)
+        err = TypeError("sticky ends not compatible!")
+        # The sticky ends has to be of the same type
+        # or
+        # one or both of is "single" indicating a stranded molecule.
+        if (self_type != other_type) and ("single" not in (self_type, other_type)):
+            raise err
+        # tail length has to be equal for two phosphdiester bonds to form
+        if len(self_tail) != len(other_tail):
+            raise err
+        # Each basepair is checked against the pydna.alphabet basepair_dict
+        # which contains the permitted base pairings.
+        for w, c in zip(self_tail, other_tail[::-1]):
+            try:
+                basepair_dict[(w, c)]
+            except KeyError:
+                raise err
+        return self.__class__(
+            self.watson + other.watson, other.crick + self.crick, self.ovhg
+        )
     def __mul__(self: DseqType, number: int) -> DseqType:
         if not isinstance(number, int):
             raise TypeError(
-                "TypeError: can't multiply Dseq by non-int of type {}".format(
-                    type(number)
-                )
+                "TypeError: can't multiply Dseq" f" by non-int of type {type(number)}"
             )
-        if number <= 0:
-            return self.__class__("")
-        new = _copy.deepcopy(self)
-        for i in range(number - 1):
-            new += self
-        return new
+        return Dseq("").join(list(itertools.repeat(self, number)))
-    def _fill_in_five_prime(self: DseqType, nucleotides: str) -> str:
+    def _fill_in_left(self: DseqType, nucleotides: str) -> str:
         stuffer = ""
         type, se = self.five_prime_end()
         if type == "5'":
-            for n in _rc(se):
+            for n in rc(se):
                 if n in nucleotides:
                     stuffer += n
                 else:
                     break
         return self.crick + stuffer, self.ovhg + len(stuffer)
-    def _fill_in_three_prime(self: DseqType, nucleotides: str) -> str:
+    def _fill_in_right(self: DseqType, nucleotides: str) -> str:
         stuffer = ""
         type, se = self.three_prime_end()
         if type == "5'":
-            for n in _rc(se):
+            for n in rc(se):
                 if n in nucleotides:
                     stuffer += n
                 else:
                     break
         return self.watson + stuffer
-    def fill_in(self, nucleotides: _Union[None, str] = None) -> "Dseq":
+    def fill_in(self, nucleotides: Union[None, str] = None) -> DseqType:
         """Fill in of five prime protruding end with a DNA polymerase
-        that has only DNA polymerase activity (such as exo-klenow [#]_)
+        that has only DNA polymerase activity (such as Exo-Klenow [#]_).
+        Exo-Klenow is a modified version of the Klenow fragment of E.
+        coli DNA polymerase I, which has been engineered to lack both
+        3-5 proofreading and 5-3 exonuclease activities.
         and any combination of A, G, C or T. Default are all four
         nucleotides together.
@@ -1149,15 +1301,6 @@ class Dseq(_Seq):
         --------
         >>> from pydna.dseq import Dseq
-        >>> a=Dseq("aaa", "ttt")
-        >>> a
-        Dseq(-3)
-        aaa
-        ttt
-        >>> a.fill_in()
-        Dseq(-3)
-        aaa
-        ttt
         >>> b=Dseq("caaa", "cttt")
         >>> b
         Dseq(-5)
@@ -1184,7 +1327,15 @@ class Dseq(_Seq):
         Dseq(-5)
          aaac
         gttt
-        >>>
+        >>> a=Dseq("aaa", "ttt")
+        >>> a
+        Dseq(-3)
+        aaa
+        ttt
+        >>> a.fill_in()
+        Dseq(-3)
+        aaa
+        ttt
         References
         ----------
@@ -1195,32 +1346,31 @@ class Dseq(_Seq):
             nucleotides = "GATCRYWSMKHBVDN"
         nucleotides = set(nucleotides.lower() + nucleotides.upper())
-        crick, ovhg = self._fill_in_five_prime(nucleotides)
-        watson = self._fill_in_three_prime(nucleotides)
+        crick, ovhg = self._fill_in_left(nucleotides)
+        watson = self._fill_in_right(nucleotides)
         return Dseq(watson, crick, ovhg)
-    def transcribe(self) -> _Seq:
-        return _Seq(self.watson).transcribe()
-    def translate(
-        self, table="Standard", stop_symbol="*", to_stop=False, cds=False, gap="-"
-    ) -> _Seq:
-        return _Seq(
-            _translate_str(str(self), table, stop_symbol, to_stop, cds, gap=gap)
-        )
+    klenow = fill_in  # alias
-    def mung(self) -> "Dseq":
+    def nibble_to_blunt(self) -> DseqType:
         """
-        Simulates treatment a nuclease with 5'-3' and 3'-5' single
+        Simulates treatment a nuclease with both 5'-3' and 3'-5' single
         strand specific exonuclease activity (such as mung bean nuclease [#]_)
+        Mung bean nuclease is a nuclease enzyme derived from mung bean sprouts
+        that preferentially degrades single-stranded DNA and RNA into
+        5'-phosphate- and 3'-hydroxyl-containing nucleotides.
+        Treatment results in blunt DNA, regardless of wheter the protruding end
+        is 5' or 3'.
         ::
              ggatcc    ->     gatcc
               ctaggg          ctagg
-              ggatcc   ->      ggatc
-             tcctag            cctag
+              ggatcc   ->     ggatc
+             tcctag           cctag
          >>> from pydna.dseq import Dseq
          >>> b=Dseq("caaa", "cttt")
@@ -1250,19 +1400,60 @@ class Dseq(_Seq):
         """
-        return Dseq(
-            self.watson[
-                max(0, -self.ovhg) : min(len(self.watson), len(self.crick) - self.ovhg)
-            ]
-        )
+        parts = self.get_parts()
+        return self.__class__(parts.middle)
+    mung = nibble_to_blunt
+    def T4(self, nucleotides=None) -> DseqType:
+        """
+        Fill in 5' protruding ends and nibble 3' protruding ends.
+        This is done using a DNA polymerase providing 3'-5' nuclease activity
+        such as T4 DNA polymerase. This can be done in presence of any
+        combination of the four nucleotides A, G, C or T.
+        T4 DNA polymerase is widely used to “polish” DNA ends because of its
+        strong 3-5 exonuclease activity in the absence of dNTPs, it chews
+        back 3′ overhangs to create blunt ends; in the presence of limiting
+        dNTPs, it can fill in 5′ overhangs; and by carefully controlling
+        reaction time, temperature, and nucleotide supply, you can generate
+        defined recessed or blunt termini.
+        Tuning the nucleotide set can facilitate engineering of partial
+        sticky ends. Default are all four nucleotides together.
+        ::
+                  aaagatc-3        aaa      3' ends are always removed.
+                  |||       --->   |||      A and T needed or the molecule will
+            3-ctagttt              ttt      degrade completely.
+            5-gatcaaa              gatcaaaGATC      5' ends are filled in the
+                  |||       --->   |||||||||||      presence of GATC
+                  tttctag-5        CTAGtttctag
+            5-gatcaaa              gatcaaaGAT       5' ends are partially filled in the
+                  |||       --->    |||||||||       presence of GAT to produce a 1 nt
+                  tttctag-5         TAGtttctag      5' overhang
+            5-gatcaaa              gatcaaaGA       5' ends are partially filled in the
+                  |||       --->     |||||||       presence of GA to produce a 2 nt
+                  tttctag-5          AGtttctag     5' overhang
+            5-gatcaaa              gatcaaaG        5' ends are partially filled in the
+                  |||       --->      |||||        presence of G to produce a 3 nt
+                  tttctag-5           Gtttctag     5' overhang
-    def T4(self, nucleotides=None) -> "Dseq":
-        """Fill in five prime protruding ends and chewing back
-        three prime protruding ends by a DNA polymerase providing both
-        5'-3' DNA polymerase activity and 3'-5' nuclease acitivty
-        (such as T4 DNA polymerase). This can be done in presence of any
-        combination of the four A, G, C or T. Removing one or more nucleotides
-        can facilitate engineering of sticky ends. Default are all four nucleotides together.
         Parameters
         ----------
@@ -1273,29 +1464,31 @@ class Dseq(_Seq):
         --------
         >>> from pydna.dseq import Dseq
-        >>> a=Dseq("gatcgatc")
+        >>> a = Dseq.from_representation(
+        ... '''
+        ... gatcaaa
+        ...     tttctag
+        ... ''')
         >>> a
-        Dseq(-8)
-        gatcgatc
-        ctagctag
+        Dseq(-11)
+        gatcaaa
+            tttctag
         >>> a.T4()
-        Dseq(-8)
-        gatcgatc
-        ctagctag
-        >>> a.T4("t")
-        Dseq(-8)
-        gatcgat
-         tagctag
-        >>> a.T4("a")
-        Dseq(-8)
-        gatcga
-          agctag
-        >>> a.T4("g")
-        Dseq(-8)
-        gatcg
-           gctag
-        >>>
+        Dseq(-11)
+        gatcaaagatc
+        ctagtttctag
+        >>> a.T4("GAT")
+        Dseq(-11)
+        gatcaaagat
+         tagtttctag
+        >>> a.T4("GA")
+        Dseq(-11)
+        gatcaaaga
+          agtttctag
+        >>> a.T4("G")
+        Dseq(-11)
+        gatcaaag
+           gtttctag
         """
         if not nucleotides:
@@ -1303,7 +1496,7 @@ class Dseq(_Seq):
         nucleotides = set(nucleotides.lower() + nucleotides.upper())
         type, se = self.five_prime_end()
         if type == "5'":
-            crick, ovhg = self._fill_in_five_prime(nucleotides)
+            crick, ovhg = self._fill_in_left(nucleotides)
         else:
             if type == "3'":
                 ovhg = 0
@@ -1323,7 +1516,7 @@ class Dseq(_Seq):
         watson = self.watson
         type, se = self.three_prime_end()
         if type == "5'":
-            watson = self._fill_in_three_prime(nucleotides)
+            watson = self._fill_in_right(nucleotides)
         else:
             if type == "3'":
                 watson = self.watson[: -len(se)]
@@ -1337,32 +1530,311 @@ class Dseq(_Seq):
     t4 = T4  # alias for the T4 method.
-    def exo1_front(self: DseqType, n=1) -> DseqType:
-        """5'-3' resection at the start (left side) of the molecule."""
-        d = _copy.deepcopy(self)
-        d.ovhg += n
-        d.watson = d.watson[n:]
-        return d
+    def nibble_five_prime_left(self: DseqType, n: int = 1) -> DseqType:
+        """
+        5' => 3'  resection at the left side (start) of the molecule.
+        The argument n indicate the number of nucleotides that are to be
+        removed. The outcome of this depend on the structure of the molecule.
+        See the two examples below:
+        The figure below indicates a recess of length two from a blunt DNA
+        fragment. The resulting DNA fragment has a 3' protruding single strand.
+        ::
+            gatc           tc
+            ||||   -->     ||
+            ctag         ctag
+        The figure below indicates a recess of length two from a DNA fragment
+        with a 5' sticky end resulting in a blunt sequence.
+        ::
+          ttgatc         gatc
+            ||||   -->   ||||
+            ctag         ctag
+        >>> from pydna.dseq import Dseq
+        >>> ds = Dseq("gatc")
+        >>> ds
+        Dseq(-4)
+        gatc
+        ctag
+        >>> ds.nibble_five_prime_left(2)
+        Dseq(-4)
+          tc
+        ctag
+        >>> ds.nibble_five_prime_left(3)
+        Dseq(-4)
+           c
+        ctag
+        >>> ds.nibble_five_prime_left(4)
+        Dseq(-4)
+        <BLANKLINE>
+        ctag
+        >>> ds = Dseq.from_representation(
+        ... '''
+        ... GGgatc
+        ...   ctag
+        ... ''')
+        >>> ds
+        Dseq(-6)
+        GGgatc
+          ctag
+        >>> ds.nibble_five_prime_left(2)
+        Dseq(-4)
+        gatc
+        ctag
+        Parameters
+        ----------
+        n : int, optional
+            The default is 1. This is the number of nucleotides removed.
+        Returns
+        -------
+        DseqType
+            DESCRIPTION.
+        """
+        recessed = copy.deepcopy(self)
+        n += max(0, self.ovhg or 0)
+        recessed = Dseq(
+            self._data[:n]
+            .translate(dscode_to_crick_table)
+            .translate(complement_table_for_dscode)
+            .translate(dscode_to_crick_tail_table)
+            .lstrip()
+            + self._data[n:]
+        )
+        return recessed
+    def nibble_five_prime_right(self: DseqType, n: int = 1) -> DseqType:
+        """
+        5' => 3'  resection at the right side (end) of the molecule.
+        The argument n indicate the number of nucleotides that are to be
+        removed. The outcome of this depend on the structure of the molecule.
+        See the two examples below:
+        The figure below indicates a recess of length two from a blunt DNA
+        fragment. The resulting DNA fragment has a 3' protruding single strand.
+        ::
+            gatc         gatc
+            ||||   -->   ||
+            ctag         ct
+        The figure below indicates a recess of length two from a DNA fragment
+        with a 5' sticky end resulting in a blunt sequence.
-    def exo1_end(self: DseqType, n=1) -> DseqType:
-        """5'-3' resection at the end (right side) of the molecule."""
-        d = _copy.deepcopy(self)
-        d.crick = d.crick[n:]
-        return d
+        ::
+            gatc         gatc
+            ||||   -->   ||||
+            ctagtt       ctag
+        >>> from pydna.dseq import Dseq
+        >>> ds = Dseq("gatc")
+        >>> ds
+        Dseq(-4)
+        gatc
+        ctag
+        >>> ds.nibble_five_prime_right(2)
+        Dseq(-4)
+        gatc
+        ct
+        >>> ds.nibble_five_prime_right(3)
+        Dseq(-4)
+        gatc
+        c
+        >>> ds.nibble_five_prime_right(4)
+        Dseq(-4)
+        gatc
+        <BLANKLINE>
+        >>> ds = Dseq.from_representation(
+        ... '''
+        ... gatc
+        ... ctagGG
+        ... ''')
+        >>> ds.nibble_five_prime_right(2)
+        Dseq(-4)
+        gatc
+        ctag
+        """
+        recessed = copy.deepcopy(self)
+        n = len(self) - n
+        ovhg = len(self) if self.right_ovhg is None else self.right_ovhg
+        n -= max(0, ovhg)
+        recessed = Dseq(
+            self._data[:n]
+            + self._data[n:]
+            .translate(dscode_to_watson_table)
+            .translate(dscode_to_watson_tail_table)
+            .lstrip()
+        )
+        return recessed
+    exo1_front = nibble_five_prime_left  # TODO: consider using the new names
+    exo1_end = nibble_five_prime_right  # TODO: consider using the new names
+    def nibble_three_prime_left(self: DseqType, n=1) -> DseqType:
+        """
+        3' => 5' resection at the left side (beginning) of the molecule.
+        The argument n indicate the number of nucleotides that are to be
+        removed. The outcome of this depend on the structure of the molecule.
+        See the two examples below:
+        The figure below indicates a recess of length two from a blunt DNA
+        fragment. The resulting DNA fragment has a 5' protruding single strand.
+        ::
+            gatc         gatc
+            ||||   -->     ||
+            ctag           ag
+        The figure below indicates a recess of length two from a DNA fragment
+        with a 3' sticky end resulting in a blunt sequence.
+        ::
+            gatc         gatc
+            ||||   -->   ||||
+          ttctag         ctag
+        >>> from pydna.dseq import Dseq
+        >>> ds = Dseq("gatc")
+        >>> ds
+        Dseq(-4)
+        gatc
+        ctag
+        >>> ds.nibble_three_prime_left(2)
+        Dseq(-4)
+        gatc
+          ag
+        >>> ds.nibble_three_prime_left(3)
+        Dseq(-4)
+        gatc
+           g
+        >>> ds.nibble_three_prime_left(4)
+        Dseq(-4)
+        gatc
+        <BLANKLINE>
+        >>> ds = Dseq.from_representation(
+        ... '''
+        ...   gatc
+        ... CCctag
+        ... ''')
+        >>> ds
+        Dseq(-6)
+          gatc
+        CCctag
+        >>> ds.nibble_three_prime_left(2)
+        Dseq(-4)
+        gatc
+        ctag
+        """
+        ovhg = len(self) if self.ovhg is None else self.ovhg
+        n -= min(0, ovhg)
+        recessed = Dseq(
+            self._data[:n]
+            .translate(dscode_to_watson_table)
+            .translate(dscode_to_watson_tail_table)
+            .lstrip()
+            + self._data[n:]
+        )
+        return recessed
+    def nibble_three_prime_right(self: DseqType, n=1) -> DseqType:
+        """
+        3' => 5' resection at the right side (end) of the molecule.
+        The argument n indicate the number of nucleotides that are to be
+        removed. The outcome of this depend on the structure of the molecule.
+        See the two examples below:
+        The figure below indicates a recess of length two from a blunt DNA
+        fragment. The resulting DNA fragment has a 5' protruding single strand.
+        ::
+            gatc         ga
+            ||||   -->   ||
+            ctag         ctag
+        The figure below indicates a recess of length two from a DNA fragment
+        with a 3' sticky end resulting in a blunt sequence.
+        ::
+            gatctt       gatc
+            ||||   -->   ||||
+            ctag         ctag
+        >>> from pydna.dseq import Dseq
+        >>> ds = Dseq("gatc")
+        >>> ds
+        Dseq(-4)
+        gatc
+        ctag
+        >>> ds.nibble_three_prime_right(2)
+        Dseq(-4)
+        ga
+        ctag
+        >>> ds.nibble_three_prime_right(3)
+        Dseq(-4)
+        g
+        ctag
+        >>> ds.nibble_three_prime_right(4)
+        Dseq(-4)
+        <BLANKLINE>
+        ctag
+        >>> ds = Dseq.from_representation(
+        ... '''
+        ... gatcCC
+        ... ctag
+        ... ''')
+        >>> ds.nibble_three_prime_right(2)
+        Dseq(-4)
+        gatc
+        ctag
+        """
+        n = len(self) - n
+        ovhg = len(self) if self.right_ovhg is None else self.right_ovhg
+        n += min(0, ovhg)
+        recessed = Dseq(
+            self._data[:n]
+            + self._data[n:]
+            .translate(dscode_to_crick_table)
+            .translate(complement_table_for_dscode)
+            .translate(dscode_to_crick_tail_table)
+            .lstrip()
+        )
+        return recessed
     def no_cutters(
-        self, batch: _Union[_RestrictionBatch, None] = None
-    ) -> _RestrictionBatch:
+        self, batch: Union[RestrictionBatch, None] = None
+    ) -> RestrictionBatch:
         """Enzymes in a RestrictionBatch not cutting sequence."""
         if batch is None:
             batch = CommOnly
         ana = batch.search(self)
         ncut = {enz: sitelist for (enz, sitelist) in ana.items() if not sitelist}
-        return _RestrictionBatch(ncut)
+        return RestrictionBatch(ncut)
     def unique_cutters(
-        self, batch: _Union[_RestrictionBatch, None] = None
-    ) -> _RestrictionBatch:
+        self, batch: Union[RestrictionBatch, None] = None
+    ) -> RestrictionBatch:
         """Enzymes in a RestrictionBatch cutting sequence once."""
         if batch is None:
             batch = CommOnly
@@ -1371,44 +1843,42 @@ class Dseq(_Seq):
     once_cutters = unique_cutters  # alias for unique_cutters
     def twice_cutters(
-        self, batch: _Union[_RestrictionBatch, None] = None
-    ) -> _RestrictionBatch:
+        self, batch: Union[RestrictionBatch, None] = None
+    ) -> RestrictionBatch:
         """Enzymes in a RestrictionBatch cutting sequence twice."""
         if batch is None:
             batch = CommOnly
         return self.n_cutters(n=2, batch=batch)
     def n_cutters(
-        self, n=3, batch: _Union[_RestrictionBatch, None] = None
-    ) -> _RestrictionBatch:
+        self, n=3, batch: Union[RestrictionBatch, None] = None
+    ) -> RestrictionBatch:
         """Enzymes in a RestrictionBatch cutting n times."""
         if batch is None:
             batch = CommOnly
         ana = batch.search(self)
         ncut = {enz: sitelist for (enz, sitelist) in ana.items() if len(sitelist) == n}
-        return _RestrictionBatch(ncut)
+        return RestrictionBatch(ncut)
-    def cutters(
-        self, batch: _Union[_RestrictionBatch, None] = None
-    ) -> _RestrictionBatch:
+    def cutters(self, batch: Union[RestrictionBatch, None] = None) -> RestrictionBatch:
         """Enzymes in a RestrictionBatch cutting sequence at least once."""
         if batch is None:
             batch = CommOnly
         ana = batch.search(self)
         ncut = {enz: sitelist for (enz, sitelist) in ana.items() if sitelist}
-        return _RestrictionBatch(ncut)
+        return RestrictionBatch(ncut)
     def seguid(self) -> str:
         """SEGUID checksum for the sequence."""
         if self.circular:
-            cs = _cdseguid(
+            cs = cdseguid(
                 self.watson.upper(), self.crick.upper(), alphabet="{DNA-extended}"
             )
         else:
             """docstring."""
             w = f"{self.ovhg * '-'}{self.watson}{'-' * (-self.ovhg + len(self.crick) - len(self.watson))}".upper()
             c = f"{'-' * (self.ovhg + len(self.watson) - len(self.crick))}{self.crick}{-self.ovhg * '-'}".upper()
-            cs = _ldseguid(w, c, alphabet="{DNA-extended}")
+            cs = ldseguid(w, c, alphabet="{DNA-extended}")
         return cs
     def isblunt(self) -> bool:
@@ -1449,29 +1919,113 @@ class Dseq(_Seq):
         >>> a.isblunt()
         False
         """
-        return (
-            self.ovhg == 0 and len(self.watson) == len(self.crick) and not self.circular
+        parts = self.get_parts()
+        return not any(
+            (
+                parts.sticky_right5,
+                parts.sticky_right3,
+                parts.sticky_left3,
+                parts.sticky_left5,
+                self.circular,
+            )
         )
-    def cas9(self, RNA: str) -> _Tuple[slice, ...]:
-        """docstring."""
-        bRNA = bytes(RNA, "ASCII")
-        slices = []
-        cuts = [0]
-        for m in _re.finditer(bRNA, self._data):
-            cuts.append(m.start() + 17)
-        cuts.append(self.length)
-        slices = tuple(slice(x, y, 1) for x, y in zip(cuts, cuts[1:]))
-        return slices
-    def terminal_transferase(self, nucleotides="a") -> "Dseq":
-        """docstring."""
+    def terminal_transferase(self, nucleotides: str = "a") -> DseqType:
+        """
+        Terminal deoxynucleotidyl transferase (TdT) is a template-independent
+        DNA polymerase that adds nucleotides to the 3′-OH ends of DNA, typically
+        single-stranded or recessed 3′ ends. In cloning, it’s classically used
+        to create homopolymer tails (e.g. poly-dG on a vector and poly-dC on an insert)
+        so that fragments can anneal via complementary overhangs (“tailing” cloning).
+        This activity ia also present in some DNA polymerases, such as Taq polymerase.
+        This property is used in the populat T/A cloning protocol ([#]_).
+        ::
+            gct          gcta
+            |||   -->    |||
+            cga         acga
+        >>> from pydna.dseq import Dseq
+        >>> a = Dseq("aa")
+        >>> a = Dseq("gct")
+        >>> a
+        Dseq(-3)
+        gct
+        cga
+        >>> a.terminal_transferase()
+        Dseq(-5)
+         gcta
+        acga
+        >>> a.terminal_transferase("G")
+        Dseq(-5)
+         gctG
+        Gcga
+        Parameters
+        ----------
+        nucleotides : str, optional
+            The default is "a".
+        Returns
+        -------
+        DseqType
+            DESCRIPTION.
+        References
+        ----------
+        .. [#] https://en.wikipedia.org/wiki/TA_cloning
+        """
         ovhg = self.ovhg
         if self.ovhg >= 0:
             ovhg += len(nucleotides)
         return Dseq(self.watson + nucleotides, self.crick + nucleotides, ovhg)
-    def cut(self: DseqType, *enzymes: EnzymesType) -> _Tuple[DseqType, ...]:
+    def user(self) -> DseqType:
+        """
+        USER Enzyme treatment.
+        USER Enzyme is a mixture of Uracil DNA glycosylase (UDG) and the
+        DNA glycosylase-lyase Endonuclease VIII.
+        UDG catalyses the excision of an uracil base, forming an abasic
+        or apyrimidinic site (AP site). Endonuclease VIII removes the AP
+        site creating a DNA gap.
+        ::
+            tagaagtaggUat          tagaagtagg at
+            |||||||||||||  --->    |||||||||| ||
+            atcUtcatccata          atc tcatccata
+        >>> a = Dseq("tagaagtaggUat", "atcUtcatccata"[::-1], 0)
+        >>> a
+        Dseq(-13)
+        tagaagtaggUat
+        atcutcatccAta
+        >>> a.user()
+        Dseq(-13)
+        tagaagtagg at
+        atc tcatccAta
+        Returns
+        -------
+        DseqType
+            DNA fragment with uracile bases removed.
+        """
+        return Dseq(self._data.translate(bytes.maketrans(b"UuOo", b"ZzEe")))
+    def cut(self: DseqType, *enzymes: EnzymesType) -> Tuple[DseqType, ...]:
         """Returns a list of linear Dseq fragments produced in the digestion.
         If there are no cuts, an empty list is returned.
@@ -1522,11 +2076,73 @@ class Dseq(_Seq):
         return tuple(self.apply_cut(*cs) for cs in cutsite_pairs)
     def cutsite_is_valid(self, cutsite: CutSiteType) -> bool:
-        """Returns False if:
+        """
+        Check is a cutsite is valid.
+        A cutsite is a nested 2-tuple with this form:
+        ((cut_watson, ovhg), enz), for example ((396, -4), EcoRI)
+        The cut_watson (positive integer) is the cut position of the sequence as for example
+        returned by the Bio.Restriction module.
+        The ovhg (overhang, positive or negative integer or 0) has the same meaning as
+        for restriction enzymes in the Bio.Restriction module and for
+        pydna.dseq.Dseq objects (see docstring for this module and example below)
+        Enzyme can be None.
+        ::
+            Enzyme overhang
+            EcoRI  -4     --GAATTC--        --G       AATTC--
+                            ||||||     -->    |           |
+                          --CTTAAG--        --CTTAA       G--
+            KpnI    4     --GGTACC--        --GGTAC       C--
+                            ||||||     -->    |           |
+                          --CCATGG--        --C       CATGG--
+            SmaI    0     --CCCGGG--        --CCC       GGG--
+                            ||||||     -->    |||       |||
+                          --GGGCCC--        --GGG       CCC--
+        >>> from Bio.Restriction import EcoRI, KpnI, SmaI
+        >>> EcoRI.ovhg
+        -4
+        >>> KpnI.ovhg
+        4
+        >>> SmaI.ovhg
+        0
+        Returns False if:
         - Cut positions fall outside the sequence (could be moved to Biopython)
+        TODO: example
         - Overhang is not double stranded
+        TODO: example
         - Recognition site is not double stranded or is outside the sequence
+        TODO: example
         - For enzymes that cut twice, it checks that at least one possibility is valid
+        TODO: example
+        Parameters
+        ----------
+        cutsite : CutSiteType
+            DESCRIPTION.
+        Returns
+        -------
+        bool
+            True if cutsite can cut the DNA fragment.
         """
         assert cutsite is not None, "cutsite is None"
@@ -1536,7 +2152,7 @@ class Dseq(_Seq):
         # The overhang is double stranded
         overhang_dseq = self[watson:crick] if ovhg < 0 else self[crick:watson]
-        if overhang_dseq.ovhg != 0 or overhang_dseq.watson_ovhg() != 0:
+        if overhang_dseq.ovhg != 0 or overhang_dseq.watson_ovhg != 0:
             return False
         # The recognition site is double stranded and within the sequence
@@ -1550,7 +2166,7 @@ class Dseq(_Seq):
         if (
             len(recognition_site) == 0
             or recognition_site.ovhg != 0
-            or recognition_site.watson_ovhg() != 0
+            or recognition_site.watson_ovhg != 0
         ):
             if enz is None or enz.scd5 is None:
                 return False
@@ -1569,20 +2185,22 @@ class Dseq(_Seq):
                 if (
                     len(recognition_site) == 0
                     or recognition_site.ovhg != 0
-                    or recognition_site.watson_ovhg() != 0
+                    or recognition_site.watson_ovhg != 0
                 ):
                     return False
         return True
-    def get_cutsites(self: DseqType, *enzymes: EnzymesType) -> _List[CutSiteType]:
+    def get_cutsites(self: DseqType, *enzymes: EnzymesType) -> List[CutSiteType]:
         """Returns a list of cutsites, represented represented as `((cut_watson, ovhg), enz)`:
         - `cut_watson` is a positive integer contained in `[0,len(seq))`, where `seq` is the sequence
           that will be cut. It represents the position of the cut on the watson strand, using the full
           sequence as a reference. By "full sequence" I mean the one you would get from `str(Dseq)`.
         - `ovhg` is the overhang left after the cut. It has the same meaning as `ovhg` in
           the `Bio.Restriction` enzyme objects, or pydna's `Dseq` property.
         - `enz` is the enzyme object. It's not necessary to perform the cut, but can be
            used to keep track of which enzyme was used.
@@ -1592,7 +2210,7 @@ class Dseq(_Seq):
         Parameters
         ----------
-        enzymes : Union[_RestrictionBatch,list[_AbstractCut]]
+        enzymes : Union[RestrictionBatch,list[_AbstractCut]]
         Returns
         -------
@@ -1628,11 +2246,11 @@ class Dseq(_Seq):
         """
-        if len(enzymes) == 1 and isinstance(enzymes[0], _RestrictionBatch):
+        if len(enzymes) == 1 and isinstance(enzymes[0], RestrictionBatch):
             # argument is probably a RestrictionBatch
             enzymes = [e for e in enzymes[0]]
-        enzymes = _flatten(enzymes)
+        enzymes = flatten(enzymes)
         out = list()
         for e in enzymes:
             # Positions of the cut on the watson strand. They are 1-based, so we subtract
@@ -1643,7 +2261,7 @@ class Dseq(_Seq):
         return sorted([cutsite for cutsite in out if self.cutsite_is_valid(cutsite)])
-    def left_end_position(self) -> _Tuple[int, int]:
+    def left_end_position(self) -> Tuple[int, int]:
         """
         The index in the full sequence of the watson and crick start positions.
@@ -1660,7 +2278,7 @@ class Dseq(_Seq):
             return self.ovhg, 0
         return 0, -self.ovhg
-    def right_end_position(self) -> _Tuple[int, int]:
+    def right_end_position(self) -> Tuple[int, int]:
         """The index in the full sequence of the watson and crick end positions.
         full sequence (str(self)) for all three cases is AAA
@@ -1672,13 +2290,210 @@ class Dseq(_Seq):
         ```
         """
-        if self.watson_ovhg() < 0:
-            return len(self) + self.watson_ovhg(), len(self)
-        return len(self), len(self) - self.watson_ovhg()
+        if self.watson_ovhg < 0:
+            return len(self) + self.watson_ovhg, len(self)
+        return len(self), len(self) - self.watson_ovhg
+    def get_ss_meltsites(self: DseqType, length: int) -> tuple[int, int]:
+        """
+        Single stranded DNA melt sites
+        Two lists of 2-tuples of integers are returned. Each tuple
+        (`((from, to))`) contains the start and end positions of a single
+        stranded region, shorter or equal to `length`.
+        In the example below, the middle 2 nt part is released from the
+        molecule.
+        ::
+            tagaa ta gtatg
+            ||||| || |||||  -->   [(6,8)], []
+            atcttcatccatac
+            tagaagtaggtatg
+            ||||| || |||||  -->   [], [(6,8)]
+            atctt at catac
+        The output of this method is used in the `melt_ss_dna` method in order
+        to determine the start and end positions of single stranded regions.
+        See get_ds_meltsites for melting ds sequences.
+        Examples
+        --------
+        >>> from pydna.dseq import Dseq
+        >>> ds = Dseq("tagaaqtaqgtatg")
+        >>> ds
+        Dseq(-14)
+        tagaa ta gtatg
+        atcttcatccatac
+        >>> cutsites = ds.get_ss_meltsites(2)
+        >>> cutsites
+        ([(6, 8)], [])
+        >>> ds[6:8]
+        Dseq(-2)
+        ta
+        at
+        >>> ds = Dseq("tagaaptapgtatg")
+        >>> ds
+        Dseq(-14)
+        tagaagtaggtatg
+        atctt at catac
+        >>> cutsites = ds.get_ss_meltsites(2)
+        >>> cutsites
+        ([], [(6, 8)])
+        """
+        regex = regex_ss_melt_factory(length)
+        if self.circular:
+            spacer = length
+            cutfrom = self._data[-length:] + self._data + self._data[:length]
+        else:
+            spacer = 0
+            cutfrom = self._data
+        watson_cuts = []
+        crick_cuts = []
+        for m in regex.finditer(cutfrom):
+            if m.lastgroup == "watson":
+                cut1 = m.start() + spacer
+                cut2 = m.end() + spacer
+                watson_cuts.append((cut1, cut2))
+            else:
+                assert m.lastgroup == "crick"
+                cut1 = m.start() + spacer
+                cut2 = m.end() + spacer
+                crick_cuts.append((cut1, cut2))
+        return watson_cuts, crick_cuts
+    def get_ds_meltsites(self: DseqType, length: int) -> List[CutSiteType]:
+        """
+        Double stranded DNA melt sites
+        DNA molecules can fall apart by melting if they have internal single
+        stranded regions. In the example below, the molecule has two gaps
+        on opposite sides, two nucleotides apart, which means that it hangs
+        together by two basepairs.
+        This molecule can melt into two separate 8 bp double stranded
+        molecules, each with 3 nt 3' overhangs a depicted below.
+        ::
+            tagaagta gtatg        tagaagta          gtatg
+            ||||| || |||||  -->   |||||             |||||
+            atctt atccatac        atctt          atccatac
+        A list of 2-tuples is returned. Each tuple (`((cut_watson, ovhg), None)`)
+        contains cut position and the overhang value in the same format as
+        returned by the get_cutsites method for restriction enzymes.
+        Note that this function deals with melting that results in two double
+        stranded DNA molecules.
+        See get_ss_meltsites for melting of single stranded regions from
+        molecules.
+        Examples
+        --------
+        >>> from pydna.dseq import Dseq
+        >>> ds = Dseq("tagaaptaqgtatg")
+        >>> ds
+        Dseq(-14)
+        tagaagta gtatg
+        atctt atccatac
+        >>> cutsite = ds.get_ds_meltsites(2)
+        >>> cutsite
+        [((8, 2), None)]
+        """
+        if length < 1:
+            return tuple()
+        regex = regex_ds_melt_factory(length)
+        if self.circular:
+            spacer = length
+            cutfrom = self._data[-length:] + self._data + self._data[:length]
+        else:
+            spacer = 0
+            cutfrom = self._data
+        cuts = []
+        for m in regex.finditer(cutfrom):
+            if m.lastgroup == "watson":
+                cut = (m.end() - spacer, m.end() - m.start()), None
+            else:
+                assert m.lastgroup == "crick"
+                cut = (m.start() - spacer, m.start() - m.end()), None
+            cuts.append(cut)
+        return cuts
+    def cast_to_ds_right(self):
+        """
+        NNNN               NNNNGATC
+        ||||       -->     ||||||||
+        NNNNCTAG           NNNNCTAG
+        NNNNGATC           NNNNGATC
+        ||||       -->     ||||||||
+        NNNN               NNNNCTAG
+        """
+        p = self.get_parts()
+        ds_stuffer = (p.sticky_right5 or p.sticky_right3).translate(
+            dscode_to_full_sequence_table
+        )
+        result = (p.sticky_left5 or p.sticky_left3) + p.middle + ds_stuffer
+        return self.__class__(result, circular=False)
+    def cast_to_ds(self):
+        """Sequencially calls cast_to_ds_left and cast_to_ds_right."""
+        return self.cast_to_ds_left().cast_to_ds_right()
+    def cast_to_ds_left(self):
+        """
+        GATCNNNN           GATCNNNN
+            ||||   -->     ||||||||
+            NNNN           CTAGNNNN
+            NNNN           GATCNNNN
+            ||||   -->     ||||||||
+        CTAGNNNN           CTAGNNNN
+        """
+        p = self.get_parts()
+        ds_stuffer = (p.sticky_left5 or p.sticky_left3).translate(
+            dscode_to_full_sequence_table
+        )
+        result = ds_stuffer + p.middle + (p.sticky_right5 or p.sticky_right3)
+        return self.__class__(result, circular=False)
     def get_cut_parameters(
-        self, cut: _Union[CutSiteType, None], is_left: bool
-    ) -> _Tuple[int, int, int]:
+        self, cut: Union[CutSiteType, None], is_left: bool
+    ) -> Tuple[int, int, int]:
         """For a given cut expressed as ((cut_watson, ovhg), enz), returns
         a tuple (cut_watson, cut_crick, ovhg).
@@ -1703,7 +2518,169 @@ class Dseq(_Seq):
         if is_left:
             return *self.left_end_position(), self.ovhg
         # In the right end, the overhang does not matter
-        return *self.right_end_position(), self.watson_ovhg()
+        return *self.right_end_position(), self.watson_ovhg
+    def melt(self, length):
+        """
+        TBD
+        Parameters
+        ----------
+        length : TYPE
+            DESCRIPTION.
+        Returns
+        -------
+        TYPE
+            DESCRIPTION.
+        """
+        if not length or length < 1:
+            return tuple()
+        # First we need to get rid of single stranded sequences
+        new, strands = self.melt_ss_dna(length)
+        cutsites = new.get_ds_meltsites(length)
+        cutsite_pairs = self.get_cutsite_pairs(cutsites)
+        result = tuple(new.apply_cut(*cutsite_pair) for cutsite_pair in cutsite_pairs)
+        result = tuple([new]) if strands and not result else result
+        return tuple(strands) + tuple(result)
+    def melt_ss_dna(self, length) -> tuple["Dseq", list["Dseq"]]:
+        """
+        Melt to separate single stranded DNA
+        Single stranded DNA molecules shorter or equal to `length` shed from
+        a double stranded DNA molecule without affecting the length of the
+        remaining molecule.
+        In the examples below, the middle 2 nt part is released from the
+        molecule.
+        ::
+            tagaa ta gtatg        tagaa    gtatg          ta
+            ||||| || |||||  -->   |||||    |||||     +    ||
+            atcttcatccatac        atcttcatccatac
+            tagaagtaggtatg        tagaagtaggtatg
+            ||||| || |||||  -->   |||||    |||||     +    ||
+            atctt at catac        atctt    catac          at
+        Examples
+        --------
+        >>> from pydna.dseq import Dseq
+        >>> ds = Dseq("tagaaqtaqgtatg")
+        >>> ds
+        Dseq(-14)
+        tagaa ta gtatg
+        atcttcatccatac
+        >>> new, strands  = ds.melt_ss_dna(2)
+        >>> new
+        Dseq(-14)
+        tagaa    gtatg
+        atcttcatccatac
+        >>> strands[0]
+        Dseq(-2)
+        ta
+        <BLANKLINE>
+        >>> ds = Dseq("tagaaptapgtatg")
+        >>> ds
+        Dseq(-14)
+        tagaagtaggtatg
+        atctt at catac
+        >>> new, strands = ds.melt_ss_dna(2)
+        >>> new
+        Dseq(-14)
+        tagaagtaggtatg
+        atctt    catac
+        >>> strands[0]
+        Dseq(-2)
+        <BLANKLINE>
+        at
+        """
+        watsonnicks, cricknicks = self.get_ss_meltsites(length)
+        new, strands = self.shed_ss_dna(watsonnicks, cricknicks)
+        return new, strands
+    def shed_ss_dna(
+        self,
+        watson_cutpairs: list[tuple[int, int]] = None,
+        crick_cutpairs: list[tuple[int, int]] = None,
+    ):
+        """
+        Separate parts of one of the DNA strands
+        Examples
+        --------
+        >>> from pydna.dseq import Dseq
+        >>> ds = Dseq("tagaagtaggtatg")
+        >>> ds
+        Dseq(-14)
+        tagaagtaggtatg
+        atcttcatccatac
+        >>> new, strands = ds.shed_ss_dna([(6, 8)],[])
+        >>> new
+        Dseq(-14)
+        tagaag  ggtatg
+        atcttcatccatac
+        >>> strands[0]
+        Dseq(-2)
+        ta
+        <BLANKLINE>
+        >>> new, strands = ds.shed_ss_dna([],[(6, 8)])
+        >>> new
+        Dseq(-14)
+        tagaagtaggtatg
+        atcttc  ccatac
+        >>> strands[0]
+        Dseq(-2)
+        <BLANKLINE>
+        at
+        >>> ds = Dseq("tagaagtaggtatg")
+        >>> new, (strand1, strand2) = ds.shed_ss_dna([(6, 8), (9, 11)],[])
+        >>> new
+        Dseq(-14)
+        tagaag  g  atg
+        atcttcatccatac
+        >>> strand1
+        Dseq(-2)
+        ta
+        <BLANKLINE>
+        >>> strand2
+        Dseq(-2)
+        gt
+        <BLANKLINE>
+        """
+        watson_cutpairs = watson_cutpairs or list()
+        crick_cutpairs = crick_cutpairs or list()
+        strands = []
+        new = bytearray(self._data)
+        for x, y in watson_cutpairs:
+            stuffer = new[x:y]
+            ss = Dseq.quick(new[x:y].translate(dscode_to_watson_tail_table))
+            new[x:y] = stuffer.translate(dscode_to_crick_tail_table)
+            strands.append(ss)
+        for x, y in crick_cutpairs:
+            stuffer = new[x:y]
+            ss = Dseq.quick(stuffer.translate(dscode_to_crick_tail_table))
+            new[x:y] = stuffer.translate(dscode_to_watson_tail_table)
+            strands.append(ss)
+        return Dseq.quick(new), strands
     def apply_cut(self, left_cut: CutSiteType, right_cut: CutSiteType) -> "Dseq":
         """Extracts a subfragment of the sequence between two cuts.
@@ -1760,25 +2737,22 @@ class Dseq(_Seq):
             GttCTTAA
         """
-        if _cuts_overlap(left_cut, right_cut, len(self)):
+        if cuts_overlap(left_cut, right_cut, len(self)):
             raise ValueError("Cuts by {} {} overlap.".format(left_cut[1], right_cut[1]))
         left_watson, left_crick, ovhg_left = self.get_cut_parameters(left_cut, True)
         right_watson, right_crick, _ = self.get_cut_parameters(right_cut, False)
         return Dseq(
-            str(self[left_watson:right_watson]),
-            # The line below could be easier to understand as _rc(str(self[left_crick:right_crick])), but it does not preserve the case
-            str(
-                self.reverse_complement()[
-                    len(self) - right_crick : len(self) - left_crick
-                ]
-            ),
+            self[left_watson:right_watson]._data.translate(dscode_to_watson_table),
+            self[left_crick:right_crick]
+            .reverse_complement()
+            ._data.translate(dscode_to_watson_table),
             ovhg=ovhg_left,
         )
     def get_cutsite_pairs(
-        self, cutsites: _List[CutSiteType]
-    ) -> _List[_Tuple[_Union[None, CutSiteType], _Union[None, CutSiteType]]]:
+        self, cutsites: List[CutSiteType]
+    ) -> List[Tuple[Union[None, CutSiteType], Union[None, CutSiteType]]]:
         """Returns pairs of cutsites that render the edges of the resulting fragments.
         A fragment produced by restriction is represented by a tuple of length 2 that
@@ -1828,3 +2802,105 @@ class Dseq(_Seq):
             cutsites.append(cutsites[0])
         return list(zip(cutsites, cutsites[1:]))
+    def get_parts(self):
+        """
+        Returns a DseqParts instance containing the parts (strings) of a dsDNA
+        sequence. DseqParts instance field names:
+        ::
+             "sticky_left5"
+             |
+             |      "sticky_right5"
+             |      |
+            ---    ---
+            GGGATCC
+               TAGGTCA
+               ----
+                 |
+                 "middle"
+             "sticky_left3"
+             |
+             |      "sticky_right3"
+             |      |
+            ---    ---
+               ATCCAGT
+            CCCTAGG
+               ----
+                 |
+                 "middle"
+               "single_watson" (only an upper strand)
+               |
+            -------
+            ATCCAGT
+            |||||||
+               "single_crick" (only a lower strand)
+               |
+            -------
+            |||||||
+            CCCTAGG
+        Up to seven groups (0..6) are captured, but some are mutually exclusive
+        which means that one of them is an empty string:
+        0 or 1, not both, a DNA fragment has either 5' or 3' sticky end.
+        2 or 5 or 6, a DNA molecule has a ds region or is single stranded.
+        3 or 4, not both, either 5' or 3' sticky end.
+        Note that internal single stranded regions are not identified and will
+        be contained in the middle part if they are present.
+        Examples
+        --------
+        >>> from pydna.dseq import Dseq
+        >>> ds = Dseq("PPPATCFQZ")
+        >>> ds
+        Dseq(-9)
+        GGGATC
+           TAGTCA
+        >>> parts = ds.get_parts()
+        >>> parts
+        DseqParts(sticky_left5='PPP', sticky_left3='', middle='ATC', sticky_right3='', sticky_right5='FQZ', single_watson='', single_crick='')
+        >>> Dseq(parts.sticky_left5)
+        Dseq(-3)
+        GGG
+        <BLANKLINE>
+        >>> Dseq(parts.middle)
+        Dseq(-3)
+        ATC
+        TAG
+        >>> Dseq(parts.sticky_right5)
+        Dseq(-3)
+        <BLANKLINE>
+        TCA
+        Parameters
+        ----------
+        datastring : str
+            A string with dscode.
+        Returns
+        -------
+        namedtuple
+            Seven string fields describing the DNA molecule.
+            fragment(sticky_left5='', sticky_left3='',
+                     middle='',
+                     sticky_right3='', sticky_right5='',
+                     single_watson='', single_crick='')
+        """
+        return get_parts(self._data.decode("ascii"))

pydna 5.5.4__py3-none-any.whl → 5.5.5__py3-none-any.whl

pydna 5.5.4py3-none-any.whl → 5.5.5py3-none-any.whl