PyPI - pydna - Versions diffs - 5.5.3__py3-none-any.whl → 5.5.5__py3-none-any.whl - Mend

pydna 5.5.3py3-none-any.whl → 5.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pydna/__init__.py +24 -193
pydna/_pretty.py +8 -8
pydna/_thermodynamic_data.py +3 -3
pydna/alphabet.py +995 -0
pydna/amplicon.py +19 -24
pydna/amplify.py +75 -95
pydna/assembly.py +64 -81
pydna/assembly2.py +650 -405
pydna/codon.py +4 -4
pydna/common_sub_strings.py +6 -8
pydna/contig.py +203 -10
pydna/design.py +176 -60
pydna/download.py +6 -15
pydna/dseq.py +1794 -718
pydna/dseqrecord.py +220 -171
pydna/gateway.py +6 -6
pydna/gel.py +5 -5
pydna/genbank.py +43 -46
pydna/genbankfixer.py +89 -92
pydna/ladders.py +11 -12
pydna/oligonucleotide_hybridization.py +124 -0
pydna/opencloning_models.py +680 -0
pydna/parsers.py +45 -32
pydna/primer.py +4 -4
pydna/primer_screen.py +833 -0
pydna/readers.py +14 -9
pydna/seq.py +137 -47
pydna/seqrecord.py +54 -62
pydna/sequence_picker.py +2 -5
pydna/sequence_regex.py +6 -6
pydna/tm.py +17 -17
pydna/types.py +21 -18
pydna/utils.py +97 -75
{pydna-5.5.3.dist-info → pydna-5.5.5.dist-info}/METADATA +14 -46
pydna-5.5.5.dist-info/RECORD +43 -0
{pydna-5.5.3.dist-info → pydna-5.5.5.dist-info}/WHEEL +1 -1
pydna/conftest.py +0 -42
pydna/genbankfile.py +0 -42
pydna/genbankrecord.py +0 -168
pydna/goldengate.py +0 -45
pydna/ligate.py +0 -62
pydna/user_cloning.py +0 -29
pydna-5.5.3.dist-info/RECORD +0 -45
{pydna-5.5.3.dist-info → pydna-5.5.5.dist-info/licenses}/LICENSE.txt +0 -0

pydna/dseqrecord.py CHANGED Viewed

@@ -11,46 +11,44 @@ Seq and SeqRecord classes, respectively.
 The Dseq and Dseqrecord classes support the notion of circular and linear DNA topology.
 """
-from Bio.Restriction import RestrictionBatch as _RestrictionBatch
+from Bio.Restriction import RestrictionBatch
 from Bio.Restriction import CommOnly
-from pydna.dseq import Dseq as _Dseq
-from pydna._pretty import pretty_str as _pretty_str
-from pydna.utils import flatten as _flatten, location_boundaries as _location_boundaries
-# from pydna.utils import memorize as _memorize
-from pydna.utils import rc as _rc
-from pydna.utils import shift_location as _shift_location
-from pydna.utils import shift_feature as _shift_feature
-from pydna.common_sub_strings import common_sub_strings as _common_sub_strings
-from Bio.SeqFeature import SeqFeature as _SeqFeature
+from pydna.dseq import Dseq
+from pydna._pretty import pretty_str
+from pydna.utils import flatten, location_boundaries
+from pydna.utils import shift_location
+from pydna.utils import shift_feature
+from pydna.common_sub_strings import common_sub_strings
+from Bio.SeqFeature import SeqFeature
 from Bio import SeqIO
-from Bio.SeqFeature import CompoundLocation as _CompoundLocation
-from Bio.SeqFeature import SimpleLocation as _SimpleLocation
-from pydna.seqrecord import SeqRecord as _SeqRecord
-from Bio.Seq import translate as _translate
-from pydna.utils import identifier_from_string as _identifier_from_string
-import copy as _copy
-import operator as _operator
-import os as _os
-import re as _re
-import time as _time
-import datetime as _datetime
-# import logging as _logging
-# _module_logger = _logging.getLogger("pydna." + __name__)
+from Bio.SeqFeature import CompoundLocation
+from Bio.SeqFeature import SimpleLocation
+from pydna.seqrecord import SeqRecord
+from Bio.Seq import translate
+from pydna.utils import identifier_from_string
+import copy
+import operator
+import os
+import re
+import time
+import datetime
+from typing import Union, TYPE_CHECKING
+from pydna.opencloning_models import SequenceCutSource
+if TYPE_CHECKING:  # pragma: no cover
+    from pydna.opencloning_models import Source
 try:
-    from IPython.display import display_html as _display_html
+    from IPython.display import display_html
 except ImportError:
-    def _display_html(item, raw=None):
+    def display_html(item, raw=None):
         return item
-class Dseqrecord(_SeqRecord):
+class Dseqrecord(SeqRecord):
     """Dseqrecord is a double stranded version of the Biopython SeqRecord [#]_ class.
     The Dseqrecord object holds a Dseq object describing the sequence.
     Additionally, Dseqrecord hold meta information about the sequence in the
@@ -127,7 +125,8 @@ class Dseqrecord(_SeqRecord):
     """
-    seq: _Dseq
+    seq: Dseq
+    source: Union["Source", None] = None
     def __init__(
         self,
@@ -135,17 +134,15 @@ class Dseqrecord(_SeqRecord):
         *args,
         circular=None,
         n=5e-14,  # mol ( = 0.05 pmol)
+        source=None,
         **kwargs,
     ):
-        #        _module_logger.info("### Dseqrecord initialized ###")
-        #        _module_logger.info("argument circular = %s", circular)
-        #        _module_logger.info("circular = %s", circular)
         if isinstance(record, str):
-            #           _module_logger.info("record is a string")
             super().__init__(
-                _Dseq.from_string(
-                    record,
+                Dseq.quick(
+                    record.encode("ascii"),
                     # linear=linear,
                     circular=bool(circular),
                 ),
@@ -159,14 +156,14 @@ class Dseqrecord(_SeqRecord):
                 record = record[:]
             elif circular is True:
                 record = record.looped()
-            #           _module_logger.info("record is a Dseq object")
             super().__init__(record, *args, **kwargs)
         # record is a Bio.Seq object ?
         elif hasattr(record, "transcribe"):
-            #           _module_logger.info("record is a Seq object")
             super().__init__(
-                _Dseq(
+                Dseq(
                     str(record),
                     # linear=linear,
                     circular=bool(circular),
@@ -177,13 +174,13 @@ class Dseqrecord(_SeqRecord):
         # record is a Bio.SeqRecord or Dseqrecord object ?
         elif hasattr(record, "features"):
-            #           _module_logger.info("record is a Bio.SeqRecord or Dseqrecord object")
             for key, value in list(record.__dict__.items()):
                 setattr(self, key, value)
             self.letter_annotations = {}
             # record.seq is a Dseq object ?
             if hasattr(record.seq, "watson"):
-                new_seq = _copy.copy(record.seq)
+                new_seq = copy.copy(record.seq)
                 if circular is False:
                     new_seq = new_seq[:]
                 elif circular is True:
@@ -191,7 +188,7 @@ class Dseqrecord(_SeqRecord):
                 self.seq = new_seq
             # record.seq is Bio.SeqRecord object ?
             else:
-                self.seq = _Dseq(
+                self.seq = Dseq(
                     str(record.seq),
                     # linear=linear,
                     circular=bool(circular),
@@ -202,6 +199,7 @@ class Dseqrecord(_SeqRecord):
         self.map_target = None
         self.n = n  # amount, set to 5E-14 which is 5 pmols
         self.annotations.update({"molecule_type": "DNA"})
+        self.source = source
     @classmethod
     def from_string(
@@ -218,16 +216,14 @@ class Dseqrecord(_SeqRecord):
         # linear=True, circular=False, n = 5E-14, **kwargs):
         obj = cls.__new__(cls)  # Does not call __init__
         obj._per_letter_annotations = {}
-        obj.seq = _Dseq.quick(
-            record,
-            _rc(record),
-            ovhg=0,
+        obj.seq = Dseq.quick(
+            record.encode("ascii"),
             # linear=linear,
             circular=circular,
         )
-        obj.id = _pretty_str("id")
-        obj.name = _pretty_str("name")
-        obj.description = _pretty_str("description")
+        obj.id = pretty_str("id")
+        obj.name = pretty_str("name")
+        obj.description = pretty_str("description")
         obj.dbxrefs = []
         obj.annotations = {"molecule_type": "DNA"}
         obj.features = []
@@ -239,7 +235,7 @@ class Dseqrecord(_SeqRecord):
     @classmethod
     def from_SeqRecord(
         cls,
-        record: _SeqRecord,
+        record: SeqRecord,
         *args,
         circular=None,
         n=5e-14,
@@ -256,11 +252,10 @@ class Dseqrecord(_SeqRecord):
         obj.features = record.features
         obj.map_target = None
         obj.n = n
+        obj.source = None
         if circular is None:
             circular = record.annotations.get("topology") == "circular"
-        obj.seq = _Dseq.quick(
-            str(record.seq), _rc(str(record.seq)), ovhg=0, circular=circular
-        )
+        obj.seq = Dseq.quick(record.seq._data, ovhg=0, circular=circular)
         return obj
     @property
@@ -330,14 +325,14 @@ class Dseqrecord(_SeqRecord):
         qualifiers = {}
         qualifiers.update(kwargs)
-        location = _CompoundLocation(
+        location = CompoundLocation(
             (
-                _SimpleLocation(x, self.seq.length, strand=strand),
-                _SimpleLocation(0, y, strand=strand),
+                SimpleLocation(x, len(self.seq), strand=strand),
+                SimpleLocation(0, y, strand=strand),
             )
         )
-        sf = _SeqFeature(location, type=type_, qualifiers=qualifiers)
+        sf = SeqFeature(location, type=type_, qualifiers=qualifiers)
         if "label" not in qualifiers:
             qualifiers["label"] = [f"ft{len(location)}"]
@@ -386,35 +381,31 @@ class Dseqrecord(_SeqRecord):
         --------
         pydna.dseq.Dseq.looped
         """
-        new = _copy.copy(self)
-        # for key, value in list(self.__dict__.items()):
-        #     setattr(new, key, value)
-        new._seq = self.seq.looped()
-        five_prime = self.seq.five_prime_end()
-        for fn, fo in zip(new.features, self.features):
-            if five_prime[0] == "5'":
-                pass
-                # fn.location = fn.location + self.seq.ovhg
-            elif five_prime[0] == "3'":
-                fn.location = fn.location + (-self.seq.ovhg)
-            if fn.location.start < 0:
-                loc1 = _SimpleLocation(
-                    len(new) + fn.location.start, len(new), strand=fn.location.strand
-                )
-                loc2 = _SimpleLocation(0, fn.location.end, strand=fn.location.strand)
-                fn.location = _CompoundLocation([loc1, loc2])
-            if fn.location.end > len(new):
-                loc1 = _SimpleLocation(
-                    fn.location.start, len(new), strand=fn.location.strand
-                )
-                loc2 = _SimpleLocation(
-                    0, fn.location.end - len(new), strand=fn.location.strand
-                )
-                fn.location = _CompoundLocation([loc1, loc2])
-            fn.qualifiers = fo.qualifiers
+        new = copy.deepcopy(self)
+        new.seq = self.seq.looped()
+        old_length = len(self)  # Possibly longer, including sticky ends if any.
+        new_length = len(new)  # Possibly shorter, with blunt ends.
+        if old_length != new_length:  # Only False if self was blunt.
+            new_features = []
+            for fn in new.features:
+                if len(fn.location) > new_length:
+                    # Edge case: if the feature is longer than the sequence, it should be
+                    # dropped. This can happen in a sequence with overhangs, where the feature
+                    # spans both overhangs.
+                    #
+                    # Example:
+                    #  feature
+                    # <------>
+                    # aaACGT
+                    #   TGCAtt
+                    #
+                    # Circular sequence ACGTtt should not have that feature, so we drop it
+                    continue
+                fn.location = shift_location(fn.location, 0, new_length)
+                new_features.append(fn)
+            new.features = new_features
         return new
     def tolinear(self):  # pragma: no cover
@@ -436,16 +427,16 @@ class Dseqrecord(_SeqRecord):
         >>>
         """
-        import warnings as _warnings
+        import warnings
         from pydna import _PydnaDeprecationWarning
-        _warnings.warn(
+        warnings.warn(
             "tolinear method is obsolete; "
             "please use obj[:] "
             "instead of obj.tolinear().",
             _PydnaDeprecationWarning,
         )
-        new = _copy.copy(self)
+        new = copy.copy(self)
         for key, value in list(self.__dict__.items()):
             setattr(new, key, value)
         # new._seq = self.seq.tolinear()
@@ -456,7 +447,7 @@ class Dseqrecord(_SeqRecord):
     def terminal_transferase(self, nucleotides="a"):
         """docstring."""
-        newseq = _copy.deepcopy(self)
+        newseq = copy.deepcopy(self)
         newseq.seq = self.seq.terminal_transferase(nucleotides)
         for feature in newseq.features:
             feature.location += len(nucleotides)
@@ -496,12 +487,12 @@ class Dseqrecord(_SeqRecord):
         """
-        record = _copy.deepcopy(self)
+        record = copy.deepcopy(self)
         if f in ("genbank", "gb") and self.circular:
             record.annotations["topology"] = "circular"
         else:
             record.annotations["topology"] = "linear"
-        return _SeqRecord.format(record, f).strip()
+        return SeqRecord.format(record, f).strip()
     def write(self, filename=None, f="gb"):
         """Writes the Dseqrecord to a file using the format f, which must
@@ -534,9 +525,9 @@ class Dseqrecord(_SeqRecord):
             # generate a name if no name was given
         # if not isinstance(filename, str):  # is filename a string???
         #     raise ValueError("filename has to be a string, got", type(filename))
-        name, ext = _os.path.splitext(filename)
+        name, ext = os.path.splitext(filename)
         msg = f"<font face=monospace><a href='{filename}' target='_blank'>{filename}</a></font><br>"
-        if not _os.path.isfile(filename):
+        if not os.path.isfile(filename):
             with open(filename, "w", encoding="utf8") as fp:
                 fp.write(self.format(f))
         else:
@@ -547,16 +538,16 @@ class Dseqrecord(_SeqRecord):
             if self.seq != old_file.seq:
                 # If new sequence is different, the old file is
                 # renamed with "_OLD_" suffix:
-                oldmtime = _datetime.datetime.fromtimestamp(
-                    _os.path.getmtime(filename)
+                oldmtime = datetime.datetime.fromtimestamp(
+                    os.path.getmtime(filename)
                 ).isoformat()
-                tstmp = int(_time.time() * 1_000_000)
+                tstmp = int(time.time() * 1_000_000)
                 old_filename = f"{name}_OLD_{tstmp}{ext}"
-                _os.rename(filename, old_filename)
+                os.rename(filename, old_filename)
                 with open(filename, "w", encoding="utf8") as fp:
                     fp.write(self.format(f))
-                newmtime = _datetime.datetime.fromtimestamp(
-                    _os.path.getmtime(filename)
+                newmtime = datetime.datetime.fromtimestamp(
+                    os.path.getmtime(filename)
                 ).isoformat()
                 msg = f"""
                 <table style="padding:10px 10px;
@@ -602,8 +593,8 @@ class Dseqrecord(_SeqRecord):
             elif "seguid" in old_file.annotations.get("comment", ""):
                 pattern = r"(ldseguid|cdseguid)-(\S{27})(_[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}){0,1}"
                 # seguid=NNNNNNNNNNNNNNNNNNNNNNNNNNN_2020-10-10T11:11:11.111111
-                oldstamp = _re.search(pattern, old_file.description)
-                newstamp = _re.search(pattern, self.description)
+                oldstamp = re.search(pattern, old_file.description)
+                newstamp = re.search(pattern, self.description)
                 newdescription = self.description
                 if oldstamp and newstamp:
                     if oldstamp.group(0)[:35] == newstamp.group(0)[:35]:
@@ -612,7 +603,7 @@ class Dseqrecord(_SeqRecord):
                         )
                 elif oldstamp:
                     newdescription += " " + oldstamp.group(0)
-                newobj = _copy.copy(self)
+                newobj = copy.copy(self)
                 newobj.description = newdescription
                 with open(filename, "w", encoding="utf8") as fp:
@@ -620,7 +611,7 @@ class Dseqrecord(_SeqRecord):
             else:
                 with open(filename, "w", encoding="utf8") as fp:
                     fp.write(self.format(f))
-        return _display_html(msg, raw=True)
+        return display_html(msg, raw=True)
     def find(self, other):
         # TODO allow strings, seqs, seqrecords or Dseqrecords
@@ -638,7 +629,7 @@ class Dseqrecord(_SeqRecord):
     def __str__(self):
         return ("Dseqrecord\n" "circular: {}\n" "size: {}\n").format(
             self.circular, len(self)
-        ) + _SeqRecord.__str__(self)
+        ) + SeqRecord.__str__(self)
     def __contains__(self, other):
         if other.lower() in str(self.seq).lower():
@@ -649,7 +640,7 @@ class Dseqrecord(_SeqRecord):
             spc = 3 - ln % 3 if ln % 3 else 0
             s = "n" * spc + s + "nnn"
             for frame in range(3):
-                if other.lower() in _translate(s[frame : frame + spc + ln]).lower():
+                if other.lower() in translate(s[frame : frame + spc + ln]).lower():
                     return True
         return False
@@ -658,13 +649,13 @@ class Dseqrecord(_SeqRecord):
         >>> from pydna.dseqrecord import Dseqrecord
         >>> s=Dseqrecord("atgtacgatcgtatgctggttatattttag")
         >>> s.seq.translate()
-        Seq('MYDRMLVIF*')
+        ProteinSeq('MYDRMLVIF*')
         >>> "RML" in s
         True
         >>> "MMM" in s
         False
         >>> s.seq.rc().translate()
-        Seq('LKYNQHTIVH')
+        ProteinSeq('LKYNQHTIVH')
         >>> "QHT" in s.rc()
         True
         >>> "QHT" in s
@@ -680,7 +671,7 @@ class Dseqrecord(_SeqRecord):
         cgtatgctg
         gcatacgac
         >>> code.translate()
-        Seq('RML')
+        ProteinSeq('RML')
         """
         other = str(other).lower()
         assert self.seq.watson == "".join(self.seq.watson.split())
@@ -691,7 +682,7 @@ class Dseqrecord(_SeqRecord):
         start = None
         for frame in range(3):
             try:
-                start = _translate(s[frame : frame + ln + spc]).lower().index(other)
+                start = translate(s[frame : frame + ln + spc]).lower().index(other)
                 break
             except ValueError:
                 pass
@@ -739,7 +730,7 @@ class Dseqrecord(_SeqRecord):
         matching_reads = []
         for read_ in reads:
-            matches = _common_sub_strings(str(self.seq).lower(), str(read_.seq), limit)
+            matches = common_sub_strings(str(self.seq).lower(), str(read_.seq), limit)
             if not matches:
                 continue
@@ -760,14 +751,14 @@ class Dseqrecord(_SeqRecord):
             if len(newmatches) > 1:
                 ms = []
                 for m in newmatches:
-                    ms.append(_SimpleLocation(m[0], m[0] + m[2]))
-                loc = _CompoundLocation(ms)
+                    ms.append(SimpleLocation(m[0], m[0] + m[2]))
+                loc = CompoundLocation(ms)
             else:
                 a, b, c = newmatches[0]
-                loc = _SimpleLocation(a, a + c)
+                loc = SimpleLocation(a, a + c)
             self.features.append(
-                _SeqFeature(
+                SeqFeature(
                     loc,
                     qualifiers={"label": [read_.annotations["filename"]]},
                     type="trace",
@@ -777,9 +768,8 @@ class Dseqrecord(_SeqRecord):
         return [x.annotations["filename"] for x in matching_reads]
     def __repr__(self):
-        return "Dseqrecord({}{})".format(
-            {True: "-", False: "o"}[not self.circular], len(self)
-        )
+        top = {True: "-", False: "o"}[not self.circular]
+        return f"{self.__class__.__name__}({top}{len(self)})"
     def _repr_pretty_(self, p, cycle):
         p.text(
@@ -790,7 +780,7 @@ class Dseqrecord(_SeqRecord):
     def __add__(self, other):
         if hasattr(other, "seq") and hasattr(other.seq, "watson"):
-            other = _copy.deepcopy(other)
+            other = copy.deepcopy(other)
             other_five_prime = other.seq.five_prime_end()
             if other_five_prime[0] == "5'":
                 # add other.seq.ovhg
@@ -801,10 +791,10 @@ class Dseqrecord(_SeqRecord):
                 for f in other.features:
                     f.location = f.location + (-other.seq.ovhg)
-            answer = Dseqrecord(_SeqRecord.__add__(self, other))
+            answer = Dseqrecord(SeqRecord.__add__(self, other))
             answer.n = min(self.n, other.n)
         else:
-            answer = Dseqrecord(_SeqRecord.__add__(self, Dseqrecord(other)))
+            answer = Dseqrecord(SeqRecord.__add__(self, Dseqrecord(other)))
             answer.n = self.n
         return answer
@@ -818,7 +808,7 @@ class Dseqrecord(_SeqRecord):
         if self.circular:
             raise TypeError("TypeError: can't multiply circular Dseqrecord.")
         if number > 0:
-            new = _copy.deepcopy(self)
+            new = copy.deepcopy(self)
             for i in range(1, number):
                 new += self
             new._per_letter_annotations = self._per_letter_annotations
@@ -828,7 +818,7 @@ class Dseqrecord(_SeqRecord):
     def __getitem__(self, sl):
         """docstring."""
-        answer = Dseqrecord(_copy.copy(self))
+        answer = Dseqrecord(copy.copy(self))
         answer.seq = self.seq.__getitem__(sl)
         # answer.seq.alphabet = self.seq.alphabet
         # breakpoint()
@@ -850,9 +840,9 @@ class Dseqrecord(_SeqRecord):
                 f
                 for f in answer.features
                 if (
-                    _location_boundaries(f.location)[1] <= answer.seq.length
-                    and _location_boundaries(f.location)[0]
-                    < _location_boundaries(f.location)[1]
+                    location_boundaries(f.location)[1] <= len(answer.seq)
+                    and location_boundaries(f.location)[0]
+                    < location_boundaries(f.location)[1]
                 )
             ]
@@ -868,14 +858,18 @@ class Dseqrecord(_SeqRecord):
                 identifier = " ".join(sf.qualifiers["label"])
             elif "note" in sf.qualifiers:
                 identifier = " ".join(sf.qualifiers["note"])
-        answer.id = _identifier_from_string(identifier)[:16]
-        answer.name = _identifier_from_string("part_{name}".format(name=self.name))[:16]
+        answer.id = identifier_from_string(identifier)[:16]
+        answer.name = identifier_from_string("part_{name}".format(name=self.name))[:16]
         return answer
     def __eq__(self, other):
         """docstring."""
         try:
-            if self.seq == other.seq and str(self.__dict__) == str(other.__dict__):
+            this_dict = self.__dict__.copy()
+            other_dict = other.__dict__.copy()
+            del this_dict["source"]
+            del other_dict["source"]
+            if self.seq == other.seq and str(this_dict) == str(other_dict):
                 return True
         except AttributeError:
             pass
@@ -907,43 +901,34 @@ class Dseqrecord(_SeqRecord):
         answer.name = answer.id[:16]
         return fragments[0]
-    def no_cutters(self, batch: _RestrictionBatch = None):
+    def no_cutters(self, batch: RestrictionBatch = None):
         """docstring."""
         return self.seq.no_cutters(batch=batch or CommOnly)
-    def unique_cutters(self, batch: _RestrictionBatch = None):
+    def unique_cutters(self, batch: RestrictionBatch = None):
         """docstring."""
         return self.seq.unique_cutters(batch=batch or CommOnly)
-    def once_cutters(self, batch: _RestrictionBatch = None):
+    def once_cutters(self, batch: RestrictionBatch = None):
         """docstring."""
         return self.seq.once_cutters(batch=batch or CommOnly)
-    def twice_cutters(self, batch: _RestrictionBatch = None):
+    def twice_cutters(self, batch: RestrictionBatch = None):
         """docstring."""
         return self.seq.twice_cutters(batch=batch or CommOnly)
-    def n_cutters(self, n=3, batch: _RestrictionBatch = None):
+    def n_cutters(self, n=3, batch: RestrictionBatch = None):
         """docstring."""
         return self.seq.n_cutters(n=n, batch=batch or CommOnly)
-    def cutters(self, batch: _RestrictionBatch = None):
+    def cutters(self, batch: RestrictionBatch = None):
         """docstring."""
         return self.seq.cutters(batch=batch or CommOnly)
     def number_of_cuts(self, *enzymes):
         """The number of cuts by digestion with the Restriction enzymes
         contained in the iterable."""
-        return sum([len(enzyme.search(self.seq)) for enzyme in _flatten(enzymes)])
-    def cas9(self, RNA: str):
-        """docstring."""
-        fragments = []
-        result = []
-        for target in (self.seq, self.seq.rc()):
-            fragments = [self[sl.start : sl.stop] for sl in target.cas9(RNA)]
-            result.append(fragments)
-        return result
+        return sum([len(enzyme.search(self.seq)) for enzyme in flatten(enzymes)])
     def reverse_complement(self):
         """Reverse complement.
@@ -1020,7 +1005,7 @@ class Dseqrecord(_SeqRecord):
         if not self.circular:
             raise TypeError("Only circular DNA can be synced!")
-        newseq = _copy.copy(self)
+        newseq = copy.copy(self)
         s = str(self.seq.watson).lower()
         s_rc = str(self.seq.crick).lower()
@@ -1036,8 +1021,8 @@ class Dseqrecord(_SeqRecord):
         lim = min(limit, limit * (len(s) // limit) + 1)
-        c = _common_sub_strings(s + s, r, limit=lim)
-        d = _common_sub_strings(s_rc + s_rc, r, limit=lim)
+        c = common_sub_strings(s + s, r, limit=lim)
+        d = common_sub_strings(s_rc + s_rc, r, limit=lim)
         c = [(x[0], x[2]) for x in c if x[1] == 0]
         d = [(x[0], x[2]) for x in d if x[1] == 0]
@@ -1063,7 +1048,7 @@ class Dseqrecord(_SeqRecord):
             result = newseq
         else:
             result = newseq.shifted(start)
-        #       _module_logger.info("synced")
         return result
     def upper(self):
@@ -1092,7 +1077,7 @@ class Dseqrecord(_SeqRecord):
         --------
         pydna.dseqrecord.Dseqrecord.lower"""
-        upper = _copy.deepcopy(self)
+        upper = copy.deepcopy(self)
         # This is because the @seq.setter methods otherwise sets the _per_letter_annotations to an empty dict
         prev_per_letter_annotation = upper._per_letter_annotations
         upper.seq = upper.seq.upper()
@@ -1126,7 +1111,7 @@ class Dseqrecord(_SeqRecord):
         pydna.dseqrecord.Dseqrecord.upper
         """
-        lower = _copy.deepcopy(self)
+        lower = copy.deepcopy(self)
         prev_per_letter_annotation = lower._per_letter_annotations
         lower.seq = lower.seq.lower()
         lower._per_letter_annotations = prev_per_letter_annotation
@@ -1144,8 +1129,8 @@ class Dseqrecord(_SeqRecord):
                 orf = self[x:y]
                 prt = orf.translate()
                 features.append(
-                    _SeqFeature(
-                        _SimpleLocation(x, y, strand=strand),
+                    SeqFeature(
+                        SimpleLocation(x, y, strand=strand),
                         type="CDS",
                         qualifiers={
                             "note": f"{y - x}bp {(y - x) // 3}aa",
@@ -1183,11 +1168,11 @@ class Dseqrecord(_SeqRecord):
         if self.features:
             f = self.features[feature]
             locations = sorted(
-                self.features[feature].location.parts, key=_SimpleLocation.start.fget
+                self.features[feature].location.parts, key=SimpleLocation.start.fget
             )
             strand = f.location.strand
         else:
-            locations = [_SimpleLocation(0, 0, 1)]
+            locations = [SimpleLocation(0, 0, 1)]
             strand = 1
         ovhg = self.seq.ovhg + len(self.seq.watson) - len(self.seq.crick)
@@ -1218,7 +1203,7 @@ class Dseqrecord(_SeqRecord):
             result += f"{s1}\n{s2}"
         else:
             result += f"{s2}\n{s1}"
-        return _pretty_str(result)
+        return pretty_str(result)
     def shifted(self, shift):
         """Circular Dseqrecord with a new origin <shift>.
@@ -1271,15 +1256,15 @@ class Dseqrecord(_SeqRecord):
             )
         ln = len(self)
         if not shift % ln:
-            return _copy.deepcopy(self)  # shift is a multiple of ln or 0
+            return copy.deepcopy(self)  # shift is a multiple of ln or 0
         else:
             shift %= ln  # 0<=shift<=ln
         newseq = (self.seq[shift:] + self.seq[:shift]).looped()
-        newfeatures = _copy.deepcopy(self.features)
+        newfeatures = copy.deepcopy(self.features)
         for feature in newfeatures:
-            feature.location = _shift_location(feature.location, -shift, ln)
-        newfeatures.sort(key=_operator.attrgetter("location.start"))
-        answer = _copy.deepcopy(self)
+            feature.location = shift_location(feature.location, -shift, ln)
+        newfeatures.sort(key=operator.attrgetter("location.start"))
+        answer = copy.deepcopy(self)
         answer.features = newfeatures
         answer.seq = newseq
         return answer
@@ -1333,7 +1318,7 @@ class Dseqrecord(_SeqRecord):
         if left_cut == right_cut:
             # Not really a cut, but to handle the general case
             if left_cut is None:
-                features = _copy.deepcopy(self.features)
+                features = copy.deepcopy(self.features)
             else:
                 # The features that span the origin if shifting with left_cut, but that do not cross
                 # the cut site should be included, and if there is a feature within the cut site, it should
@@ -1356,7 +1341,7 @@ class Dseqrecord(_SeqRecord):
                 initial_shift = left_watson if left_ovhg < 0 else left_crick
                 features = self.shifted(initial_shift).features
                 # for f in features:
-                #     print(f.id, f.location, _location_boundaries(f.location))
+                #     print(f.id, f.location, location_boundaries(f.location))
                 # Here, we have done what's shown below (* indicates the origin).
                 # The features 0 and 2 have the right location for the final product:
                 #
@@ -1370,10 +1355,10 @@ class Dseqrecord(_SeqRecord):
                 features_need_transfer = [
                     f
                     for f in features
-                    if (_location_boundaries(f.location)[1] <= abs(left_ovhg))
+                    if (location_boundaries(f.location)[1] <= abs(left_ovhg))
                 ]
                 features_need_transfer = [
-                    _shift_feature(f, -abs(left_ovhg), len(self))
+                    shift_feature(f, -abs(left_ovhg), len(self))
                     for f in features_need_transfer
                 ]
@@ -1390,7 +1375,7 @@ class Dseqrecord(_SeqRecord):
                 # as the original one. However, the final product is longer because of the overhang.
                 features += [
-                    _shift_feature(f, abs(left_ovhg), len(dseq))
+                    shift_feature(f, abs(left_ovhg), len(dseq))
                     for f in features_need_transfer
                 ]
                 #                             ^                ^^^^^^^^^
@@ -1402,9 +1387,9 @@ class Dseqrecord(_SeqRecord):
                     f
                     for f in features
                     if (
-                        _location_boundaries(f.location)[1] <= len(dseq)
-                        and _location_boundaries(f.location)[0]
-                        <= _location_boundaries(f.location)[1]
+                        location_boundaries(f.location)[1] <= len(dseq)
+                        and location_boundaries(f.location)[0]
+                        <= location_boundaries(f.location)[1]
                     )
                 ]
         else:
@@ -1419,4 +1404,68 @@ class Dseqrecord(_SeqRecord):
             right_edge = right_watson if right_ovhg > 0 else right_crick
             features = self[left_edge:right_edge].features
-        return Dseqrecord(dseq, features=features)
+        # This will need to be generalised to all types of cuts
+        source = SequenceCutSource.from_parent(self, left_cut, right_cut)
+        return Dseqrecord(dseq, features=features, source=source)
+    def history(self):
+        """
+        Returns a string representation of the cloning history of the sequence.
+        Returns an empty string if the sequence has no source.
+        Check the documentation notebooks for extensive examples.
+        Returns
+        -------
+            str: A string representation of the cloning history of the sequence.
+        Examples
+        --------
+        >>> from pydna.dseqrecord import Dseqrecord
+        >>> from pydna.assembly2 import gibson_assembly
+        >>> fragments = [
+        ...    Dseqrecord("TTTTacgatAAtgctccCCCC", circular=False, name="fragment1"),
+        ...    Dseqrecord("CCCCtcatGGGG", circular=False, name="fragment2"),
+        ...    Dseqrecord("GGGGatataTTTT", circular=False, name="fragment3"),
+        ... ]
+        >>> product, *_ = gibson_assembly(fragments, limit=4)
+        >>> product.name = "product_name"
+        >>> print(product.history())
+        ╙── product_name (Dseqrecord(o34))
+            └─╼ GibsonAssemblySource
+                ├─╼ fragment1 (Dseqrecord(-21))
+                ├─╼ fragment2 (Dseqrecord(-12))
+                └─╼ fragment3 (Dseqrecord(-13))
+        """
+        if self.source is None:
+            return ""
+        return self.source.history_string(self)
+    def join(self, fragments):
+        """
+        Join an iterable of Dseqrecords with this instance as the separator.
+        Example:
+        >>> sep = Dseqrecord("a")
+        >>> joined = sep.join([Dseqrecord("A"), Dseqrecord("B"), Dseqrecord("C")])
+        >>> joined
+        Dseqrecord(-5)
+        >>> joined.seq
+        Dseq(-5)
+        AaBaC
+        TtVtG
+        """
+        it = iter(fragments)
+        try:
+            result = next(it)  # first element (no leading separator)
+        except StopIteration:
+            # Empty iterable -> return empty Dseqrecord in analogy with
+            # str.join
+            return Dseqrecord("")
+        # Interleave: result = first + sep + x + sep + y + ...
+        for x in it:
+            result = result + self + x
+        return result

pydna 5.5.3__py3-none-any.whl → 5.5.5__py3-none-any.whl

pydna 5.5.3py3-none-any.whl → 5.5.5py3-none-any.whl