PyPI - pydna - Versions diffs - 5.5.1__py3-none-any.whl → 5.5.3__py3-none-any.whl - Mend

pydna 5.5.1py3-none-any.whl → 5.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pydna/__init__.py +116 -134
pydna/_pretty.py +2 -14
pydna/all.py +10 -20
pydna/amplicon.py +25 -20
pydna/amplify.py +46 -26
pydna/assembly.py +50 -27
pydna/assembly2.py +2627 -0
pydna/common_sub_strings.py +2 -12
pydna/contig.py +39 -22
pydna/cre_lox.py +130 -0
pydna/crispr.py +8 -13
pydna/design.py +89 -59
pydna/download.py +10 -18
pydna/dseq.py +119 -59
pydna/dseqrecord.py +88 -45
pydna/fakeseq.py +0 -11
pydna/fusionpcr.py +3 -1
pydna/gateway.py +154 -152
pydna/gel.py +8 -13
pydna/genbank.py +33 -32
pydna/genbankfile.py +8 -13
pydna/genbankfixer.py +41 -28
pydna/genbankrecord.py +11 -14
pydna/goldengate.py +2 -2
pydna/ladders.py +4 -11
pydna/ligate.py +8 -14
pydna/parsers.py +25 -9
pydna/primer.py +3 -12
pydna/readers.py +0 -11
pydna/seq.py +21 -18
pydna/seqrecord.py +20 -20
pydna/sequence_picker.py +3 -12
pydna/sequence_regex.py +44 -0
pydna/tm.py +13 -15
pydna/types.py +41 -0
pydna/utils.py +173 -58
{pydna-5.5.1.dist-info → pydna-5.5.3.dist-info}/METADATA +22 -18
pydna-5.5.3.dist-info/RECORD +45 -0
pydna/editor.py +0 -119
pydna/myenzymes.py +0 -51
pydna/myprimers.py +0 -219
pydna-5.5.1.dist-info/RECORD +0 -44
{pydna-5.5.1.dist-info → pydna-5.5.3.dist-info}/LICENSE.txt +0 -0
{pydna-5.5.1.dist-info → pydna-5.5.3.dist-info}/WHEEL +0 -0

pydna/dseqrecord.py CHANGED Viewed

@@ -37,9 +37,9 @@ import time as _time
 import datetime as _datetime
-import logging as _logging
+# import logging as _logging
-_module_logger = _logging.getLogger("pydna." + __name__)
+# _module_logger = _logging.getLogger("pydna." + __name__)
 try:
@@ -127,6 +127,8 @@ class Dseqrecord(_SeqRecord):
     """
+    seq: _Dseq
     def __init__(
         self,
         record,
@@ -135,12 +137,12 @@ class Dseqrecord(_SeqRecord):
         n=5e-14,  # mol ( = 0.05 pmol)
         **kwargs,
     ):
-        _module_logger.info("### Dseqrecord initialized ###")
-        _module_logger.info("argument circular = %s", circular)
-        _module_logger.info("circular = %s", circular)
+        #        _module_logger.info("### Dseqrecord initialized ###")
+        #        _module_logger.info("argument circular = %s", circular)
+        #        _module_logger.info("circular = %s", circular)
         if isinstance(record, str):
-            _module_logger.info("record is a string")
+            #           _module_logger.info("record is a string")
             super().__init__(
                 _Dseq.from_string(
                     record,
@@ -157,12 +159,12 @@ class Dseqrecord(_SeqRecord):
                 record = record[:]
             elif circular is True:
                 record = record.looped()
-            _module_logger.info("record is a Dseq object")
+            #           _module_logger.info("record is a Dseq object")
             super().__init__(record, *args, **kwargs)
         # record is a Bio.Seq object ?
         elif hasattr(record, "transcribe"):
-            _module_logger.info("record is a Seq object")
+            #           _module_logger.info("record is a Seq object")
             super().__init__(
                 _Dseq(
                     str(record),
@@ -175,7 +177,7 @@ class Dseqrecord(_SeqRecord):
         # record is a Bio.SeqRecord or Dseqrecord object ?
         elif hasattr(record, "features"):
-            _module_logger.info("record is a Bio.SeqRecord or Dseqrecord object")
+            #           _module_logger.info("record is a Bio.SeqRecord or Dseqrecord object")
             for key, value in list(record.__dict__.items()):
                 setattr(self, key, value)
             self.letter_annotations = {}
@@ -256,7 +258,9 @@ class Dseqrecord(_SeqRecord):
         obj.n = n
         if circular is None:
             circular = record.annotations.get("topology") == "circular"
-        obj.seq = _Dseq.quick(str(record.seq), _rc(str(record.seq)), ovhg=0, circular=circular)
+        obj.seq = _Dseq.quick(
+            str(record.seq), _rc(str(record.seq)), ovhg=0, circular=circular
+        )
         return obj
     @property
@@ -295,7 +299,9 @@ class Dseqrecord(_SeqRecord):
         """
         return super().extract_feature(n)
-    def add_feature(self, x=None, y=None, seq=None, type_="misc", strand=1, *args, **kwargs):
+    def add_feature(
+        self, x=None, y=None, seq=None, type_="misc", strand=1, *args, **kwargs
+    ):
         """Add a feature of type misc to the feature list of the sequence.
         Parameters
@@ -392,13 +398,19 @@ class Dseqrecord(_SeqRecord):
             elif five_prime[0] == "3'":
                 fn.location = fn.location + (-self.seq.ovhg)
             if fn.location.start < 0:
-                loc1 = _SimpleLocation(len(new) + fn.location.start, len(new), strand=fn.location.strand)
+                loc1 = _SimpleLocation(
+                    len(new) + fn.location.start, len(new), strand=fn.location.strand
+                )
                 loc2 = _SimpleLocation(0, fn.location.end, strand=fn.location.strand)
                 fn.location = _CompoundLocation([loc1, loc2])
             if fn.location.end > len(new):
-                loc1 = _SimpleLocation(fn.location.start, len(new), strand=fn.location.strand)
-                loc2 = _SimpleLocation(0, fn.location.end - len(new), strand=fn.location.strand)
+                loc1 = _SimpleLocation(
+                    fn.location.start, len(new), strand=fn.location.strand
+                )
+                loc2 = _SimpleLocation(
+                    0, fn.location.end - len(new), strand=fn.location.strand
+                )
                 fn.location = _CompoundLocation([loc1, loc2])
             fn.qualifiers = fo.qualifiers
@@ -428,7 +440,9 @@ class Dseqrecord(_SeqRecord):
         from pydna import _PydnaDeprecationWarning
         _warnings.warn(
-            "tolinear method is obsolete; " "please use obj[:] " "instead of obj.tolinear().",
+            "tolinear method is obsolete; "
+            "please use obj[:] "
+            "instead of obj.tolinear().",
             _PydnaDeprecationWarning,
         )
         new = _copy.copy(self)
@@ -533,13 +547,17 @@ class Dseqrecord(_SeqRecord):
             if self.seq != old_file.seq:
                 # If new sequence is different, the old file is
                 # renamed with "_OLD_" suffix:
-                oldmtime = _datetime.datetime.fromtimestamp(_os.path.getmtime(filename)).isoformat()
+                oldmtime = _datetime.datetime.fromtimestamp(
+                    _os.path.getmtime(filename)
+                ).isoformat()
                 tstmp = int(_time.time() * 1_000_000)
                 old_filename = f"{name}_OLD_{tstmp}{ext}"
                 _os.rename(filename, old_filename)
                 with open(filename, "w", encoding="utf8") as fp:
                     fp.write(self.format(f))
-                newmtime = _datetime.datetime.fromtimestamp(_os.path.getmtime(filename)).isoformat()
+                newmtime = _datetime.datetime.fromtimestamp(
+                    _os.path.getmtime(filename)
+                ).isoformat()
                 msg = f"""
                 <table style="padding:10px 10px;
                 word-break:normal;
@@ -589,7 +607,9 @@ class Dseqrecord(_SeqRecord):
                 newdescription = self.description
                 if oldstamp and newstamp:
                     if oldstamp.group(0)[:35] == newstamp.group(0)[:35]:
-                        newdescription = newdescription.replace(newstamp.group(0), oldstamp.group(0))
+                        newdescription = newdescription.replace(
+                            newstamp.group(0), oldstamp.group(0)
+                        )
                 elif oldstamp:
                     newdescription += " " + oldstamp.group(0)
                 newobj = _copy.copy(self)
@@ -616,9 +636,9 @@ class Dseqrecord(_SeqRecord):
         return s.find(o)
     def __str__(self):
-        return ("Dseqrecord\n" "circular: {}\n" "size: {}\n").format(self.circular, len(self)) + _SeqRecord.__str__(
-            self
-        )
+        return ("Dseqrecord\n" "circular: {}\n" "size: {}\n").format(
+            self.circular, len(self)
+        ) + _SeqRecord.__str__(self)
     def __contains__(self, other):
         if other.lower() in str(self.seq).lower():
@@ -757,10 +777,16 @@ class Dseqrecord(_SeqRecord):
         return [x.annotations["filename"] for x in matching_reads]
     def __repr__(self):
-        return "Dseqrecord({}{})".format({True: "-", False: "o"}[not self.circular], len(self))
+        return "Dseqrecord({}{})".format(
+            {True: "-", False: "o"}[not self.circular], len(self)
+        )
     def _repr_pretty_(self, p, cycle):
-        p.text("Dseqrecord({}{})".format({True: "-", False: "o"}[not self.circular], len(self)))
+        p.text(
+            "Dseqrecord({}{})".format(
+                {True: "-", False: "o"}[not self.circular], len(self)
+            )
+        )
     def __add__(self, other):
         if hasattr(other, "seq") and hasattr(other.seq, "watson"):
@@ -784,7 +810,11 @@ class Dseqrecord(_SeqRecord):
     def __mul__(self, number):
         if not isinstance(number, int):
-            raise TypeError("TypeError: can't multiply Dseqrecord by non-int of type {}".format(type(number)))
+            raise TypeError(
+                "TypeError: can't multiply Dseqrecord by non-int of type {}".format(
+                    type(number)
+                )
+            )
         if self.circular:
             raise TypeError("TypeError: can't multiply circular Dseqrecord.")
         if number > 0:
@@ -821,7 +851,8 @@ class Dseqrecord(_SeqRecord):
                 for f in answer.features
                 if (
                     _location_boundaries(f.location)[1] <= answer.seq.length
-                    and _location_boundaries(f.location)[0] < _location_boundaries(f.location)[1]
+                    and _location_boundaries(f.location)[0]
+                    < _location_boundaries(f.location)[1]
                 )
             ]
@@ -1032,7 +1063,7 @@ class Dseqrecord(_SeqRecord):
             result = newseq
         else:
             result = newseq.shifted(start)
-        _module_logger.info("synced")
+        #       _module_logger.info("synced")
         return result
     def upper(self):
@@ -1118,7 +1149,10 @@ class Dseqrecord(_SeqRecord):
                         type="CDS",
                         qualifiers={
                             "note": f"{y - x}bp {(y - x) // 3}aa",
-                            "checksum": [orf.seguid() + " (DNA)", prt.seguid() + " (protein)"],
+                            "checksum": [
+                                orf.seguid() + " (DNA)",
+                                prt.seguid() + " (protein)",
+                            ],
                             "codon_start": 1,
                             "transl_table": 11,
                             "translation": str(prt.seq),
@@ -1148,7 +1182,9 @@ class Dseqrecord(_SeqRecord):
         """docstring."""
         if self.features:
             f = self.features[feature]
-            locations = sorted(self.features[feature].location.parts, key=_SimpleLocation.start.fget)
+            locations = sorted(
+                self.features[feature].location.parts, key=_SimpleLocation.start.fget
+            )
             strand = f.location.strand
         else:
             locations = [_SimpleLocation(0, 0, 1)]
@@ -1229,7 +1265,10 @@ class Dseqrecord(_SeqRecord):
         """
         if not self.circular:
-            raise TypeError("Sequence is linear, origin can only be " "shifted for circular sequences.\n")
+            raise TypeError(
+                "Sequence is linear, origin can only be "
+                "shifted for circular sequences.\n"
+            )
         ln = len(self)
         if not shift % ln:
             return _copy.deepcopy(self)  # shift is a multiple of ln or 0
@@ -1311,7 +1350,9 @@ class Dseqrecord(_SeqRecord):
                 #     000
                 #     2222
                 #
-                left_watson, left_crick, left_ovhg = self.seq.get_cut_parameters(left_cut, True)
+                left_watson, left_crick, left_ovhg = self.seq.get_cut_parameters(
+                    left_cut, True
+                )
                 initial_shift = left_watson if left_ovhg < 0 else left_crick
                 features = self.shifted(initial_shift).features
                 # for f in features:
@@ -1327,10 +1368,13 @@ class Dseqrecord(_SeqRecord):
                 #      2222
                 features_need_transfer = [
-                    f for f in features if (_location_boundaries(f.location)[1] <= abs(left_ovhg))
+                    f
+                    for f in features
+                    if (_location_boundaries(f.location)[1] <= abs(left_ovhg))
                 ]
                 features_need_transfer = [
-                    _shift_feature(f, -abs(left_ovhg), len(self)) for f in features_need_transfer
+                    _shift_feature(f, -abs(left_ovhg), len(self))
+                    for f in features_need_transfer
                 ]
                 #                                           ^                ^^^^^^^^^
@@ -1345,7 +1389,10 @@ class Dseqrecord(_SeqRecord):
                 # The features 0 and 1 would have the right location if the final sequence had the same length
                 # as the original one. However, the final product is longer because of the overhang.
-                features += [_shift_feature(f, abs(left_ovhg), len(dseq)) for f in features_need_transfer]
+                features += [
+                    _shift_feature(f, abs(left_ovhg), len(dseq))
+                    for f in features_need_transfer
+                ]
                 #                             ^                ^^^^^^^^^
                 # So we shift back by the same amount in the opposite direction, but this time we pass the
                 # length of the final product.
@@ -1356,24 +1403,20 @@ class Dseqrecord(_SeqRecord):
                     for f in features
                     if (
                         _location_boundaries(f.location)[1] <= len(dseq)
-                        and _location_boundaries(f.location)[0] <= _location_boundaries(f.location)[1]
+                        and _location_boundaries(f.location)[0]
+                        <= _location_boundaries(f.location)[1]
                     )
                 ]
         else:
-            left_watson, left_crick, left_ovhg = self.seq.get_cut_parameters(left_cut, True)
-            right_watson, right_crick, right_ovhg = self.seq.get_cut_parameters(right_cut, False)
+            left_watson, left_crick, left_ovhg = self.seq.get_cut_parameters(
+                left_cut, True
+            )
+            right_watson, right_crick, right_ovhg = self.seq.get_cut_parameters(
+                right_cut, False
+            )
             left_edge = left_crick if left_ovhg > 0 else left_watson
             right_edge = right_watson if right_ovhg > 0 else right_crick
             features = self[left_edge:right_edge].features
         return Dseqrecord(dseq, features=features)
-if __name__ == "__main__":
-    cache = _os.getenv("pydna_cache")
-    _os.environ["pydna_cache"] = "nocache"
-    import doctest
-    doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
-    # _os.environ["pydna_cache"] = cache

pydna/fakeseq.py CHANGED Viewed

@@ -44,14 +44,3 @@ class FakeSeq:
     def __str__(self) -> str:
         """docstring."""
         return self.__repr__()
-if __name__ == "__main__":
-    import os as _os
-    cached = _os.getenv("pydna_cached_funcs", "")
-    _os.environ["pydna_cached_funcs"] = ""
-    import doctest
-    doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
-    _os.environ["pydna_cached_funcs"] = cached

pydna/fusionpcr.py CHANGED Viewed

@@ -17,7 +17,9 @@ def fuse_by_pcr(fragments, limit=15):
         new = None
         for a, b in [(x, y), (x, y.rc()), (x.rc(), y)]:
             try:
-                ((s1, s2, ln), *r) = terminal_overlap(a.seq.watson.lower(), rc(b.seq.crick.lower()), limit=limit)
+                ((s1, s2, ln), *r) = terminal_overlap(
+                    a.seq.watson.lower(), rc(b.seq.crick.lower()), limit=limit
+                )
             except ValueError as err:
                 if "not enough values to unpack" not in str(err):
                     raise err

pydna/gateway.py CHANGED Viewed

@@ -1,162 +1,164 @@
-#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# Copyright 2013-2023 by Björn Johansson.  All rights reserved.
-# This code is part of the Python-dna distribution and governed by its
-# license.  Please see the LICENSE.txt file that should have been included
-# as part of this package.
-"""Assembly of sequences by Gateway recombination.
-Given a list of sequences (Dseqrecords), all sequences are analyzed for
-presence of att(P|B|L|R)N where N is 1,2,3 or 4.
-A graph is constructed where the att sites form a nodes and
-sequences separating att sites form edges.
-The NetworkX package is used to trace linear and circular paths through the
-graph.
-"""
-# from Bio.SeqFeature import ExactPosition as _ExactPosition
-# from Bio.SeqFeature import SimpleLocation as _SimpleLocation
-# from Bio.SeqFeature import CompoundLocation as _CompoundLocation
-# from pydna.utils import rc as _rc
-# from pydna._pretty import pretty_str as _pretty_str
-# from pydna.contig import Contig as _Contig
-# from pydna.common_sub_strings import common_sub_strings
-# from pydna.dseqrecord import Dseqrecord as _Dseqrecord
-# import networkx as _nx
-# from copy import deepcopy as _deepcopy
-# import itertools as _itertools
-import logging as _logging
-_module_logger = _logging.getLogger("pydna." + __name__)
-ambiguous_dna_regex = {
-    "A": "T",
-    "C": "G",
-    "G": "C",
-    "T": "A",
-    "M": "[ACM]",
-    "R": "[AGR]",
-    "W": "[ATW]",
-    "S": "[CGS]",
-    "Y": "[CTY]",
-    "K": "[GTK]",
-    "V": "[ACGVMSR]",
-    "H": "[ACTHMYW]",
-    "D": "[AGTDRWK]",
-    "B": "[CGTBSKY]",
-    "X": "X",
-    "N": "[ACGTBDHKMNRSVWY]",
+from Bio.Seq import reverse_complement
+from pydna.dseqrecord import Dseqrecord as _Dseqrecord
+import re
+import itertools as _itertools
+from Bio.SeqFeature import SimpleLocation, SeqFeature
+from pydna.utils import shift_location
+from pydna.sequence_regex import compute_regex_site, dseqrecord_finditer
+raw_gateway_common = {
+    "attB1": "CHWVTWTGTACAAAAAANNNG",
+    "attB2": "CHWVTWTGTACAAGAAANNNG",
+    "attB3": "CHWVTWTGTATAATAAANNNG",
+    "attB4": "CHWVTWTGTATAGAAAANNNG",
+    "attB5": "CHWVTWTGTATACAAAANNNG",
+    "attL1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAAAAANNNG",
+    "attL2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAGAAANNNG",
+    "attL3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAATAAANNNG",
+    "attL4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAGAAAANNNG",
+    "attL5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATACAAAANNNG",
+    "attR1": "CHWVTWTGTACAAAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+    "attR2": "CHWVTWTGTACAAGAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+    "attR3": "CHWVTWTGTATAATAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+    "attR4": "CHWVTWTGTATAGAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+    "attR5": "CHWVTWTGTATACAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+    "overlap_1": "twtGTACAAAaaa",
+    "overlap_2": "twtGTACAAGaaa",
+    "overlap_3": "twtGTATAATaaa",
+    "overlap_4": "twtGTATAGAaaa",
+    "overlap_5": "twtGTATACAaaa",
 }
-atts = """
-attP1 AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT GTACAAA AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG CMASTWT AAAGYWG
-attP2 AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT GTACAAG AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG CMASTWT AAAGYWG
-attP3 AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT GTATAAT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG CMASTWT AAAGYWG
-attP4 AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT GTATAGA AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG CMASTWT AAAGYWG
-attP5 AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT GTATACA AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG CMASTWT AAAGYWG
-attB1 CMASTWT GTACAAA AAAGYWG AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG
-attB2 CMASTWT GTACAAG AAAGYWG AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG
-attB3 CMASTWT GTATAAT AAAGYWG AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG
-attB4 CMASTWT GTATAGA AAAGYWG AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG
-attB5 CMASTWT GTATACA AAAGYWG AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG
-attR1 CMASTWT GTACAAA AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT AAAGYWG
-attR2 CMASTWT GTACAAG AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT AAAGYWG
-attR3 CMASTWT GTATAAT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT AAAGYWG
-attR4 CMASTWT GTATAGA AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT AAAGYWG
-attR5 CMASTWT GTATACA AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT AAAGYWG
-attL1 AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT GTACAAA AAAGYWG CMASTWT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG
-attL2 AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT GTACAAG AAAGYWG CMASTWT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG
-attL3 AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT GTATAAT AAAGYWG CMASTWT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG
-attL4 AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT GTATAGA AAAGYWG CMASTWT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG
-attL5 AAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATKMTTTYTTATAATGCCMASTTT GTATACA AAAGYWG CMASTWT AAAGYWGAACGAGAAACGTAAAATGATATAAATATCAATATATTAAATTAGATTTTGCATAAAAAACAGACTACATAATRCTGTAAAACACAACATATSCAGTCAYWWTG
-"""
-retable = str.maketrans(ambiguous_dna_regex)
-for line in (line for line in atts.splitlines() if line.strip()):
-    name, *parts = line.split()
-    for part in parts:
-        part.translate(retable)
-class Gateway(object):
-    """Assembly of linear DNA fragments into linear or circular constructs.
-    The Assembly is meant to replace the Assembly method as it
-    is easier to use. Accepts a list of Dseqrecords (source fragments) to
-    initiate an Assembly object. Several methods are available for analysis
-    of overlapping sequences, graph construction and assembly.
-    Parameters
-    ----------
-    fragments : list
-        a list of Dseqrecord objects.
-    """
-    def __init__(self, molecules=None):
-        self.molecules = molecules
-"""
-Created on Sat Aug 21 15:41:42 2021
-@author: bjorn
-https://en.wikipedia.org/wiki/Cre-Lox_recombination
-13bp	      8bp	   13bp
-ATAACTTCGTATA-NNNTANNN-TATACGAAGTTAT
-Name	    13 bp  	        8 bp  	    13 bp
-            Recognition     Spacer      Recognition
-            Region          Region      Region
-Wild-Type	ATAACTTCGTATA	ATGTATGC	TATACGAAGTTAT
-lox 511	    ATAACTTCGTATA	ATGTATaC	TATACGAAGTTAT
-lox 5171	ATAACTTCGTATA	ATGTgTaC	TATACGAAGTTAT
-lox 2272	ATAACTTCGTATA	AaGTATcC	TATACGAAGTTAT
-M2	        ATAACTTCGTATA	AgaaAcca	TATACGAAGTTAT
-M3	        ATAACTTCGTATA	taaTACCA	TATACGAAGTTAT
-M7	        ATAACTTCGTATA	AgaTAGAA	TATACGAAGTTAT
-M11	        ATAACTTCGTATA	cgaTAcca	TATACGAAGTTAT
-lox 71	    TACCGTTCGTATA	NNNTANNN	TATACGAAGTTAT
-lox 66	    ATAACTTCGTATA	NNNTANNN	TATACGAACGGTA
-"""
-"""
-https://blog.addgene.org/plasmids-101-cre-lox
+raw_gateway_sites_greedy = {
+    **raw_gateway_common,
+    "attP1": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+    "attP2": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTACAAGAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+    "attP3": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAATAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+    "attP4": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATAGAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+    "attP5": "VAAWWAWKRWTTTWWTTYGACTGATAGTGACCTGTWCGTYGMAACAVATTGATRAGCAATKMTTTYYTATAWTGHCMASTWTGTATACAAAAGYWGARCGAGAARCGTAARRTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATRCTGTAARACACAACATATBCAGTCV",
+}
-https://en.wikipedia.org/wiki/Cre-Lox_recombination
+raw_gateway_sites_conservative = {
+    **raw_gateway_common,
+    "attP1": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTACAAAAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
+    "attP2": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTACAAGAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
+    "attP3": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTATAATAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
+    "attP4": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTATAGAAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
+    "attP5": "AAAWWAWKRWTTTWWTTTGACTGATAGTGACCTGTTCGTTGCAACAMATTGATRAGCAATGCTTTYTTATAATGCCMASTTTGTATACAAAAGYWGAACGAGAARCGTAAARTGATATAAATATCAATATATTAAATTAGAYTTTGCATAAAAAACAGACTACATAATACTGTAAAACACAACATATSCAGTCACTATGAAYCAACTACTTAGATGGTATTAGTGACCTGTA",
+}
-13bp	      8bp	   13bp
-ATAACTTCGTATA-NNNTANNN-TATACGAAGTTAT
+gateway_sites_greedy = {
+    k: {
+        "forward_regex": compute_regex_site(v),
+        "reverse_regex": compute_regex_site(reverse_complement(v)),
+        "consensus_sequence": v,
+    }
+    for k, v in raw_gateway_sites_greedy.items()
+}
+gateway_sites_conservative = {
+    k: {
+        "forward_regex": compute_regex_site(v),
+        "reverse_regex": compute_regex_site(reverse_complement(v)),
+        "consensus_sequence": v,
+    }
+    for k, v in raw_gateway_sites_conservative.items()
+}
-Name	    13 bp  	        8 bp  	    13 bp
-            Recognition     Spacer      Recognition
-            Region          Region      Region
+# From snapgene - ask Valerie
+primer_design_attB = {
+    "attB1": "ACAAGTTTGTACAAAAAAGCAGGCT",
+    "attB2": "ACCACTTTGTACAAGAAAGCTGGGT",
+    "attB3": "ACAACTTTGTATAATAAAGTTGTA",
+    "attB4": "ACAACTTTGTATAGAAAAGTTGTA",
+    "attB5": "ACAACTTTGTATACAAAAGTTGTA",
+}
-Wild-Type	ATAACTTCGTATA	ATGTATGC	TATACGAAGTTAT
-lox511	    ATAACTTCGTATA	ATGTATaC	TATACGAAGTTAT
-lox5171	    ATAACTTCGTATA	ATGTgTaC	TATACGAAGTTAT
-lox2272	    ATAACTTCGTATA	AaGTATcC	TATACGAAGTTAT
-M2	        ATAACTTCGTATA	AgaaAcca	TATACGAAGTTAT
-M3	        ATAACTTCGTATA	taaTACCA	TATACGAAGTTAT
-M7	        ATAACTTCGTATA	AgaTAGAA	TATACGAAGTTAT
-M11	        ATAACTTCGTATA	cgaTAcca	TATACGAAGTTAT
-lox71	    TACCGTTCGTATA	NNNTANNN	TATACGAAGTTAT
-lox66	    ATAACTTCGTATA	NNNTANNN	TATACGAACGGTA
-"""
+def gateway_overlap(
+    seqx: _Dseqrecord, seqy: _Dseqrecord, reaction: str, greedy: bool
+) -> list[tuple[int, int, int]]:
+    """
+    Find gateway overlaps. If greedy is True, it uses a more greedy consensus site to find attP sites,
+    which might give false positives
+    """
+    if reaction not in ["BP", "LR"]:
+        raise ValueError(f"Invalid overlap type: {reaction}")
+    gateway_sites = gateway_sites_greedy if greedy else gateway_sites_conservative
+    out = list()
+    # Iterate over the four possible att sites
+    for num in range(1, 5):
+        # Iterate over the two possible orientations
+        # The sites have to be in the same orientation (fwd + fwd or rev + rev)
+        for pattern in ["forward_regex", "reverse_regex"]:
+            # The overlap regex is the same for all types
+            overlap_regex = gateway_sites[f"overlap_{num}"][pattern]
+            # Iterate over pairs B, P and P, B for BP and L, R and R, L for LR
+            for site_x, site_y in zip(reaction, reaction[::-1]):
+                site_x_regex = gateway_sites[f"att{site_x}{num}"][pattern]
+                matches_x = list(dseqrecord_finditer(site_x_regex, seqx))
+                if len(matches_x) == 0:
+                    continue
+                site_y_regex = gateway_sites[f"att{site_y}{num}"][pattern]
+                matches_y = list(dseqrecord_finditer(site_y_regex, seqy))
+                if len(matches_y) == 0:
+                    continue
+                for match_x, match_y in _itertools.product(matches_x, matches_y):
+                    # Find the overlap sequence within each match, and use the
+                    # core 7 pbs that are constant
+                    overlap_x = re.search(overlap_regex, match_x.group())
+                    overlap_y = re.search(overlap_regex, match_y.group())
+                    # Sanity check
+                    assert (
+                        overlap_x is not None and overlap_y is not None
+                    ), "Something went wrong, no overlap found within the matches"
+                    out.append(
+                        (
+                            match_x.start() + overlap_x.start() + 3,
+                            match_y.start() + overlap_y.start() + 3,
+                            7,
+                        )
+                    )
+    return out
+def find_gateway_sites(
+    seq: _Dseqrecord, greedy: bool
+) -> dict[str, list[SimpleLocation]]:
+    """Find all gateway sites in a sequence and return a dictionary with the name and positions of the sites."""
+    gateway_sites = gateway_sites_greedy if greedy else gateway_sites_conservative
+    out = dict()
+    for site in gateway_sites:
+        if not site.startswith("att"):
+            continue
+        for pattern in ["forward_regex", "reverse_regex"]:
+            matches = list(dseqrecord_finditer(gateway_sites[site][pattern], seq))
+            for match in matches:
+                if site not in out:
+                    out[site] = []
+                strand = 1 if pattern == "forward_regex" else -1
+                loc = SimpleLocation(match.start(), match.end(), strand)
+                loc = shift_location(loc, 0, len(seq))
+                out[site].append(loc)
+    return out
+def annotate_gateway_sites(seq: _Dseqrecord, greedy: bool) -> _Dseqrecord:
+    sites = find_gateway_sites(seq, greedy)
+    for site in sites:
+        for loc in sites[site]:
+            seq.features.append(
+                SeqFeature(loc, type="protein_bind", qualifiers={"label": [site]})
+            )
+    return seq

pydna 5.5.1__py3-none-any.whl → 5.5.3__py3-none-any.whl

pydna 5.5.1py3-none-any.whl → 5.5.3py3-none-any.whl