PyPI - pydna - Versions diffs - 5.5.3__py3-none-any.whl → 5.5.4__py3-none-any.whl - Mend

pydna 5.5.3py3-none-any.whl → 5.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

pydna/__init__.py +1 -1
pydna/assembly2.py +415 -159
pydna/dseqrecord.py +50 -2
pydna/opencloning_models.py +553 -0
pydna/types.py +5 -2
{pydna-5.5.3.dist-info → pydna-5.5.4.dist-info}/METADATA +8 -40
{pydna-5.5.3.dist-info → pydna-5.5.4.dist-info}/RECORD +9 -8
{pydna-5.5.3.dist-info → pydna-5.5.4.dist-info}/WHEEL +1 -1
{pydna-5.5.3.dist-info → pydna-5.5.4.dist-info/licenses}/LICENSE.txt +0 -0

pydna/assembly2.py CHANGED Viewed

@@ -39,9 +39,26 @@ from pydna.types import (
 from pydna.gateway import gateway_overlap, find_gateway_sites
 from pydna.cre_lox import cre_loxP_overlap
-from typing import TYPE_CHECKING, Callable
+from typing import TYPE_CHECKING, Callable, Literal
+from pydna.opencloning_models import (
+    AssemblySource,
+    RestrictionAndLigationSource,
+    GibsonAssemblySource,
+    InFusionSource,
+    OverlapExtensionPCRLigationSource,
+    InVivoAssemblySource,
+    LigationSource,
+    GatewaySource,
+    HomologousRecombinationSource,
+    CreLoxRecombinationSource,
+    PCRSource,
+    SourceInput,
+    CRISPRSource,
+)
+from pydna.crispr import cas9
+import warnings
-if TYPE_CHECKING:
+if TYPE_CHECKING:  # pragma: no cover
     from Bio.Restriction import AbstractCut as _AbstractCut
@@ -80,15 +97,22 @@ def ends_from_cutsite(
 ) -> tuple[tuple[str, str], tuple[str, str]]:
     """Get the sticky or blunt ends created by a restriction enzyme cut.
-    Args:
-        cutsite (CutSiteType): A tuple ((cut_watson, ovhg), enzyme) describing where the cut occurs
-        seq (_Dseq): The DNA sequence being cut
+    Parameters
+    ----------
+    cutsite : CutSiteType
+        A tuple ((cut_watson, ovhg), enzyme) describing where the cut occurs
+    seq : _Dseq
+        The DNA sequence being cut
-    Raises:
-        ValueError: If cutsite is None
+    Raises
+    ------
+    ValueError
+        If cutsite is None
-    Returns:
-        tuple[tuple[str, str], tuple[str, str]]: A tuple of two tuples, each containing the type of end ('5\'', '3\'', or 'blunt')
+    Returns
+    -------
+    tuple[tuple[str, str], tuple[str, str]]
+        A tuple of two tuples, each containing the type of end ('5\'', '3\'', or 'blunt')
         and the sequence of the overhang. The first tuple is for the left end, second for the right end.
     >>> from Bio.Restriction import NotI
@@ -129,14 +153,23 @@ def restriction_ligation_overlap(
     Like in sticky and gibson, the order matters (see example below of partial overlap)
-    Args:
-        seqx (_Dseqrecord): The first sequence
-        seqy (_Dseqrecord): The second sequence
-        enzymes (RestrictionBatch): The enzymes to use
-        partial (bool): Whether to allow partial overlaps
-        allow_blunt (bool): Whether to allow blunt ends
-    Returns:
-        list[SequenceOverlap]: A list of overlaps between the two sequences
+    Parameters
+    ----------
+    seqx : _Dseqrecord
+        The first sequence
+    seqy : _Dseqrecord
+        The second sequence
+    enzymes : RestrictionBatch
+        The enzymes to use
+    partial : bool
+        Whether to allow partial overlaps
+    allow_blunt : bool
+        Whether to allow blunt ends
+    Returns
+    -------
+    list[SequenceOverlap]
+        A list of overlaps between the two sequences
     >>> from pydna.dseqrecord import Dseqrecord
     >>> from pydna.assembly2 import restriction_ligation_overlap
@@ -230,13 +263,19 @@ def blunt_overlap(
     It basically returns [(len(seqx), 0, 0)] if the right end of seqx is blunt and the
     left end of seqy is blunt (compatible with blunt ligation). Otherwise, it returns an empty list.
-    Args:
-        seqx (_Dseqrecord): The first sequence
-        seqy (_Dseqrecord): The second sequence
-        limit (int): There for compatibility, but it is ignored
+    Parameters
+    ----------
+    seqx : _Dseqrecord
+        The first sequence
+    seqy : _Dseqrecord
+        The second sequence
+    limit : int
+        There for compatibility, but it is ignored
-    Returns:
-        list[SequenceOverlap]: A list of overlaps between the two sequences
+    Returns
+    -------
+    list[SequenceOverlap]
+        A list of overlaps between the two sequences
     >>> from pydna.assembly2 import blunt_overlap
     >>> from pydna.dseqrecord import Dseqrecord
@@ -322,25 +361,31 @@ def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
     Assembly algorithm to find terminal overlaps (e.g. for Gibson assembly).
     The order matters, we want alignments like:
-    ```
-    seqx:    oooo------xxxx
-    seqy:              xxxx------oooo
-    Product: oooo------xxxx------oooo
+    ::
-    Not like:
+        seqx:    oooo------xxxx
+        seqy:              xxxx------oooo
+        Product: oooo------xxxx------oooo
-    seqx:               oooo------xxxx
-    seqy:     xxxx------oooo
-    Product (unwanted): oooo
-    ```
+        Not like:
-    Args:
-        seqx (_Dseqrecord): The first sequence
-        seqy (_Dseqrecord): The second sequence
-        limit (int): Minimum length of the overlap
+        seqx:               oooo------xxxx
+        seqy:     xxxx------oooo
+        Product (unwanted): oooo
-    Returns:
-        list[SequenceOverlap]: A list of overlaps between the two sequences
+    Parameters
+    ----------
+    seqx : _Dseqrecord
+        The first sequence
+    seqy : _Dseqrecord
+        The second sequence
+    limit : int
+        Minimum length of the overlap
+    Returns
+    -------
+    list[SequenceOverlap]
+        A list of overlaps between the two sequences
     >>> from pydna.dseqrecord import Dseqrecord
     >>> from pydna.assembly2 import gibson_overlap
@@ -384,13 +429,19 @@ def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = F
     For now, if limit 0 / False (default) only full overlaps are considered.
     Otherwise, partial overlaps are also returned.
-    Args:
-        seqx (_Dseqrecord): The first sequence
-        seqy (_Dseqrecord): The second sequence
-        limit (bool): Whether to allow partial overlaps
+    Parameters
+    ----------
+    seqx : _Dseqrecord
+        The first sequence
+    seqy : _Dseqrecord
+        The second sequence
+    limit : bool
+        Whether to allow partial overlaps
-    Returns:
-        list[SequenceOverlap]: A list of overlaps between the two sequences
+    Returns
+    -------
+    list[SequenceOverlap]
+        A list of overlaps between the two sequences
     Ligation of fully overlapping sticky ends, note how the order matters
@@ -520,14 +571,21 @@ def primer_template_overlap(
     If seqx is a template and seqy is a primer, it represents the binding of a reverse primer,
     where the primer has been passed as its reverse complement (see examples).
-    Args:
-        seqx (_Dseqrecord | _Primer): The primer
-        seqy (_Dseqrecord | _Primer): The template
-        limit (int): Minimum length of the overlap
-        mismatches (int): Maximum number of mismatches (only substitutions, no deletion or insertion)
+    Parameters
+    ----------
+    seqx : _Dseqrecord | _Primer
+        The primer
+    seqy : _Dseqrecord | _Primer
+        The template
+    limit : int
+        Minimum length of the overlap
+    mismatches : int
+        Maximum number of mismatches (only substitutions, no deletion or insertion)
-    Returns:
-        list[SequenceOverlap]: A list of overlaps between the primer and the template
+    Returns
+    -------
+    list[SequenceOverlap]
+        A list of overlaps between the primer and the template
     >>> from pydna.dseqrecord import Dseqrecord
     >>> from pydna.primer import Primer
@@ -537,7 +595,7 @@ def primer_template_overlap(
     >>> primer_template_overlap(primer, template, limit=8, mismatches=0)
     [(0, 2, 8)]
-    This actually represents the binding of the primer `GCTGCTAA` (reverse complement)
+    This actually represents the binding of the primer ``GCTGCTAA`` (reverse complement)
     >>> primer_template_overlap(template, primer, limit=8, mismatches=0)
     [(2, 0, 8)]
     >>> primer_template_overlap(primer, template.reverse_complement(), limit=8, mismatches=0)
@@ -702,7 +760,7 @@ def assembly2str(assembly: EdgeRepresentationAssembly) -> str:
     ('1[8:14]:2[1:7]', '2[10:17]:3[1:8]')
     The reason for this is that by default, a feature '[8:14]' when present in a tuple
-    is printed to the console as `SimpleLocation(ExactPosition(8), ExactPosition(14), strand=1)` (very long).
+    is printed to the console as ``SimpleLocation(ExactPosition(8), ExactPosition(14), strand=1)`` (very long).
     """
     return str(tuple(f"{u}{lu}:{v}{lv}" for u, v, lu, lv in assembly))
@@ -791,7 +849,7 @@ def assemble(
     out_dseqrecord = _Dseqrecord(subfragments[0])
     for fragment, overlap in zip(subfragments[1:], fragment_overlaps):
-        # Shift the features of the right fragment to the left by `overlap`
+        # Shift the features of the right fragment to the left by ``overlap``
         new_features = [
             f._shift(len(out_dseqrecord) - overlap) for f in fragment.features
         ]
@@ -808,22 +866,25 @@ def assemble(
         # Special case for blunt circularisation
         if overlap == 0:
-            return out_dseqrecord.looped()
-        # Remove trailing overlap
-        out_dseqrecord = _Dseqrecord(
-            fill_dseq(out_dseqrecord.seq)[:-overlap],
-            features=out_dseqrecord.features,
-            circular=True,
-        )
-        for feature in out_dseqrecord.features:
-            start, end = _location_boundaries(feature.location)
-            if start >= len(out_dseqrecord) or end > len(out_dseqrecord):
-                # Wrap around the origin
-                feature.location = _shift_location(
-                    feature.location, 0, len(out_dseqrecord)
-                )
+            out_dseqrecord = out_dseqrecord.looped()
+        else:
+            # Remove trailing overlap
+            out_dseqrecord = _Dseqrecord(
+                fill_dseq(out_dseqrecord.seq)[:-overlap],
+                features=out_dseqrecord.features,
+                circular=True,
+            )
+            for feature in out_dseqrecord.features:
+                start, end = _location_boundaries(feature.location)
+                if start >= len(out_dseqrecord) or end > len(out_dseqrecord):
+                    # Wrap around the origin
+                    feature.location = _shift_location(
+                        feature.location, 0, len(out_dseqrecord)
+                    )
+    out_dseqrecord.source = AssemblySource.from_subfragment_representation(
+        subfragment_representation, fragments, is_circular
+    )
     return out_dseqrecord
@@ -916,30 +977,29 @@ def get_assembly_subfragments(
     Subfragments are the slices of the fragments that are joined together
-    For example:
-    ```
-      --A--
-    TACGTAAT
-      --B--
-     TCGTAACGA
-    Gives: TACGTAA / CGTAACGA
-    ```
-    To reproduce:
-    ```
-    a = Dseqrecord('TACGTAAT')
-    b = Dseqrecord('TCGTAACGA')
-    f = Assembly([a, b], limit=5)
-    a0 = f.get_linear_assemblies()[0]
-    print(assembly2str(a0))
-    a0_subfragment_rep =edge_representation2subfragment_representation(a0, False)
-    for f in get_assembly_subfragments([a, b], a0_subfragment_rep):
-        print(f.seq)
-    # prints TACGTAA and CGTAACGA
-    ```
-    Subfragments: `cccccgtatcgtgt`, `atcgtgtactgtcatattc`
+    For example::
+          --A--
+        TACGTAAT
+          --B--
+         TCGTAACGA
+        Gives: TACGTAA / CGTAACGA
+    To reproduce::
+        a = Dseqrecord('TACGTAAT')
+        b = Dseqrecord('TCGTAACGA')
+        f = Assembly([a, b], limit=5)
+        a0 = f.get_linear_assemblies()[0]
+        print(assembly2str(a0))
+        a0_subfragment_rep =edge_representation2subfragment_representation(a0, False)
+        for f in get_assembly_subfragments([a, b], a0_subfragment_rep):
+            print(f.seq)
+        # prints TACGTAA and CGTAACGA
+    Subfragments: ``cccccgtatcgtgt``, ``atcgtgtactgtcatattc``
     """
     subfragments = list()
     for node, start_location, end_location in subfragment_representation:
@@ -1028,33 +1088,38 @@ class Assembly:
     The assembly contains a directed graph, where nodes represent fragments and
     edges represent overlaps between fragments. :
     - The node keys are integers, representing the index of the fragment in the
-    input list of fragments. The sign of the node key represents the orientation
-    of the fragment, positive for forward orientation, negative for reverse orientation.
+      input list of fragments. The sign of the node key represents the orientation
+      of the fragment, positive for forward orientation, negative for reverse orientation.
     - The edges contain the locations of the overlaps in the fragments. For an edge (u, v, key):
         - u and v are the nodes connected by the edge.
         - key is a string that represents the location of the overlap. In the format:
-        'u[start:end](strand):v[start:end](strand)'.
+          'u[start:end](strand):v[start:end](strand)'.
         - Edges have a 'locations' attribute, which is a list of two FeatureLocation objects,
-        representing the location of the overlap in the u and v fragment, respectively.
+          representing the location of the overlap in the u and v fragment, respectively.
         - You can think of an edge as a representation of the join of two fragments.
     If fragment 1 and 2 share a subsequence of 6bp, [8:14] in fragment 1 and [1:7] in fragment 2,
     there will be 4 edges representing that overlap in the graph, for all possible
     orientations of the fragments (see add_edges_from_match for details):
-    - `(1, 2, '1[8:14]:2[1:7]')`
-    - `(2, 1, '2[1:7]:1[8:14]')`
-    - `(-1, -2, '-1[0:6]:-2[10:16]')`
-    - `(-2, -1, '-2[10:16]:-1[0:6]')`
+    - ``(1, 2, '1[8:14]:2[1:7]')``
+    - ``(2, 1, '2[1:7]:1[8:14]')``
+    - ``(-1, -2, '-1[0:6]:-2[10:16]')``
+    - ``(-2, -1, '-2[10:16]:-1[0:6]')``
     An assembly can be thought of as a tuple of graph edges, but instead of representing them with node indexes and keys, we represent them
     as u, v, locu, locv, where u and v are the nodes connected by the edge, and locu and locv are the locations of the overlap in the first
     and second fragment. Assemblies are then represented as:
     - Linear: ((1, 2, [8:14], [1:7]), (2, 3, [10:17], [1:8]))
     - Circular: ((1, 2, [8:14], [1:7]), (2, 3, [10:17], [1:8]), (3, 1, [12:17], [1:6]))
     Note that the first and last fragment are the same in a circular assembly.
     The following constrains are applied to remove duplicate assemblies:
     - Circular assemblies: the first subfragment is not reversed, and has the smallest index in the input fragment list.
       use_fragment_order is ignored.
     - Linear assemblies:
@@ -1065,7 +1130,7 @@ class Assembly:
     frags : list
         A list of Dseqrecord objects.
     limit : int, optional
-        The shortest shared homology to be considered, this is passed as the third argument to the `algorithm` function.
+        The shortest shared homology to be considered, this is passed as the third argument to the ``algorithm`` function.
         For certain algorithms, this might be ignored.
     algorithm : function, optional
         The algorithm used to determine the shared sequences. It's a function that takes two Dseqrecord objects as inputs,
@@ -1232,11 +1297,12 @@ class Assembly:
         first: _Dseqrecord,
         secnd: _Dseqrecord,
     ):
-        """Add edges to the graph from a match returned by the `algorithm` function (see pydna.common_substrings). For
+        """Add edges to the graph from a match returned by the ``algorithm`` function (see pydna.common_substrings). For
         format of edges (see documentation of the Assembly class).
-        Matches are directional, because not all `algorithm` functions return the same match for (u,v) and (v,u). For example,
+        Matches are directional, because not all ``algorithm`` functions return the same match for (u,v) and (v,u). For example,
         homologous recombination does but sticky end ligation does not. The function returns two edges:
         - Fragments in the orientation they were passed, with locations of the match (u, v, loc_u, loc_v)
         - Reverse complement of the fragments with inverted order, with flipped locations (-v, -u, flip(loc_v), flip(loc_u))/
@@ -1446,17 +1512,18 @@ class Assembly:
         Here we check if one of the joins between fragments represents the edges of an insertion assembly
         The fragment must be linear, and the join must be as indicated below
-        ```
-        --------         -------           Fragment 1
-            ||            ||
-            xxxxxxxx      ||               Fragment 2
-                  ||      ||
-                  oooooooooo               Fragment 3
-        ```
+        ::
+            --------         -------           Fragment 1
+                ||            ||
+                xxxxxxxx      ||               Fragment 2
+                      ||      ||
+                      oooooooooo               Fragment 3
         The above example will be [(1, 2, [4:6], [0:2]), (2, 3, [6:8], [0:2]), (3, 1, [8:10], [9:11)])]
         These could be returned in any order by simple_cycles, so we sort the edges so that the first
-        and last `u` and `v` match the fragment that gets the insertion (1 in the example above).
+        and last ``u`` and ``v`` match the fragment that gets the insertion (1 in the example above).
         """
         edge_pair_index = list()
@@ -1637,8 +1704,8 @@ class Assembly:
     def get_locations_on_fragments(self) -> dict[int, dict[str, list[Location]]]:
         """Get a dictionary where the keys are the nodes in the graph, and the values are dictionaries with keys
-        `left`, `right`, containing (for each fragment) the locations where the fragment is joined to another fragment on its left
-        and right side. The values in `left` and `right` are often the same, except in restriction-ligation with partial overlap enabled,
+        ``left``, ``right``, containing (for each fragment) the locations where the fragment is joined to another fragment on its left
+        and right side. The values in ``left`` and ``right`` are often the same, except in restriction-ligation with partial overlap enabled,
         where we can end up with a situation like this:
         GGTCTCCCCAATT and aGGTCTCCAACCAA as fragments
@@ -1651,13 +1718,14 @@ class Assembly:
         aGGTCTCCxxCCAATT
         tCCAGAGGTTGGxxAA
-        Would return
-        {
-            1: {'left': [7:9], 'right': [9:11]},
-            2: {'left': [8:10], 'right': [10:12]},
-            -1: {'left': [2:4], 'right': [4:6]},
-            -2: {'left': [2:4], 'right': [4:6]}
-        }
+        Would return::
+            {
+                1: {'left': [7:9], 'right': [9:11]},
+                2: {'left': [8:10], 'right': [10:12]},
+                -1: {'left': [2:4], 'right': [4:6]},
+                -2: {'left': [2:4], 'right': [4:6]}
+            }
         """
@@ -1686,10 +1754,10 @@ class Assembly:
         and prevent including partially digested fragments. For example, imagine the following fragment being an input for a digestion
         and ligation assembly, where the enzyme cuts at the sites indicated by the vertical lines:
-        ```
-                 x       y       z
-          -------|-------|-------|---------
-        ```
+        ::
+                     x       y       z
+              -------|-------|-------|---------
         We would only want assemblies that contain subfragments start-x, x-y, y-z, z-end, and not start-x, y-end, for instance.
         The latter would indicate that the fragment was partially digested.
@@ -1750,8 +1818,8 @@ class Assembly:
 class PCRAssembly(Assembly):
     """
-    An assembly that represents a PCR, where `fragments` is a list of primer, template, primer (in that order).
-    It always uses the `primer_template_overlap` algorithm and accepts the `mismatches` argument to indicate
+    An assembly that represents a PCR, where ``fragments`` is a list of primer, template, primer (in that order).
+    It always uses the ``primer_template_overlap`` algorithm and accepts the ``mismatches`` argument to indicate
     the number of mismatches allowed in the overlap. Only supports substitution mismatches, not indels.
     """
@@ -1959,6 +2027,21 @@ def common_function_assembly_products(
     return [assemble(frags, a) for a in output_assemblies]
+def _recast_sources(
+    products: list[_Dseqrecord], source_cls, **extra_fields
+) -> list[_Dseqrecord]:
+    """Recast the `source` of each product to `source_cls` with optional extras.
+    This avoids repeating the same for-loop across many assembly functions.
+    """
+    for prod in products:
+        prod.source = source_cls(
+            **prod.source.model_dump(),
+            **extra_fields,
+        )
+    return products
 def gibson_assembly(
     frags: list[_Dseqrecord], limit: int = 25, circular_only: bool = False
 ) -> list[_Dseqrecord]:
@@ -1978,9 +2061,11 @@ def gibson_assembly(
     list[_Dseqrecord]
         List of assembled DNA molecules
     """
-    return common_function_assembly_products(
+    products = common_function_assembly_products(
         frags, limit, gibson_overlap, circular_only
     )
+    return _recast_sources(products, GibsonAssemblySource)
 def in_fusion_assembly(
@@ -2003,7 +2088,9 @@ def in_fusion_assembly(
     list[_Dseqrecord]
         List of assembled DNA molecules
     """
-    return gibson_assembly(frags, limit)
+    products = gibson_assembly(frags, limit)
+    return _recast_sources(products, InFusionSource)
 def fusion_pcr_assembly(
@@ -2026,7 +2113,8 @@ def fusion_pcr_assembly(
     list[_Dseqrecord]
         List of assembled DNA molecules
     """
-    return gibson_assembly(frags, limit)
+    products = gibson_assembly(frags, limit)
+    return _recast_sources(products, OverlapExtensionPCRLigationSource)
 def in_vivo_assembly(
@@ -2048,9 +2136,10 @@ def in_vivo_assembly(
     list[_Dseqrecord]
         List of assembled DNA molecules
     """
-    return common_function_assembly_products(
+    products = common_function_assembly_products(
         frags, limit, common_sub_strings, circular_only
     )
+    return _recast_sources(products, InVivoAssemblySource)
 def restriction_ligation_assembly(
@@ -2060,9 +2149,10 @@ def restriction_ligation_assembly(
     circular_only: bool = False,
 ) -> list[_Dseqrecord]:
     """Returns the products for restriction ligation assembly:
-    * Finds cutsites in the fragments
-    * Finds all products that could be assembled by ligating the fragments based on those cutsites
-    * Will NOT return products that combine an existing end with an end generated by the same enzyme (see example below)
+    - Finds cutsites in the fragments
+    - Finds all products that could be assembled by ligating the fragments based on those cutsites
+    - Will NOT return products that combine an existing end with an end generated by the same enzyme (see example below)
     Parameters
     ----------
@@ -2083,9 +2173,9 @@ def restriction_ligation_assembly(
     Examples
     --------
     In the example below, we plan to assemble a plasmid from a backbone and an insert, using the EcoRI and SalI enzymes.
-    Note how 2 circular products are returned, one contains the insert (`acgt`)
-    and the desired part of the backbone (`cccccc`), the other contains the
-    reversed insert (`tgga`) and the cut-out part of the backbone (`aaa`).
+    Note how 2 circular products are returned, one contains the insert (``acgt``)
+    and the desired part of the backbone (``cccccc``), the other contains the
+    reversed insert (``tgga``) and the cut-out part of the backbone (``aaa``).
     >>> from pydna.assembly2 import restriction_ligation_assembly
     >>> from pydna.dseqrecord import Dseqrecord
@@ -2119,11 +2209,16 @@ def restriction_ligation_assembly(
     TTAAGtttC
     """
-    def algo(x, y, _l):
+    def algorithm_fn(x, y, _l):
         # By default, we allow blunt ends
         return restriction_ligation_overlap(x, y, enzymes, False, allow_blunt)
-    return common_function_assembly_products(frags, None, algo, circular_only)
+    products = common_function_assembly_products(
+        frags, None, algorithm_fn, circular_only
+    )
+    return _recast_sources(
+        products, RestrictionAndLigationSource, restriction_enzymes=enzymes
+    )
 def golden_gate_assembly(
@@ -2134,7 +2229,7 @@ def golden_gate_assembly(
 ) -> list[_Dseqrecord]:
     """Returns the products for Golden Gate assembly. This is the same as
     restriction ligation assembly, but with a different name. Check the documentation
-    for `restriction_ligation_assembly` for more details.
+    for ``restriction_ligation_assembly`` for more details.
     Parameters
     ----------
@@ -2154,7 +2249,7 @@ def golden_gate_assembly(
     Examples
     --------
-    See the example for `restriction_ligation_assembly`.
+    See the example for ``restriction_ligation_assembly``.
     """
     return restriction_ligation_assembly(frags, enzymes, allow_blunt, circular_only)
@@ -2168,7 +2263,7 @@ def ligation_assembly(
     """Returns the products for ligation assembly, as inputs pass the fragments (digested if needed) that
     will be ligated.
-    For most cases, you probably should use `restriction_ligation_assembly` instead.
+    For most cases, you probably should use ``restriction_ligation_assembly`` instead.
     Parameters
     ----------
@@ -2215,11 +2310,14 @@ def ligation_assembly(
         return sticky_end_sub_strings(x, y, allow_partial_overlap)
     if allow_blunt:
-        algo = combine_algorithms(sticky_end_algorithm, blunt_overlap)
+        algorithm_fn = combine_algorithms(sticky_end_algorithm, blunt_overlap)
     else:
-        algo = sticky_end_algorithm
+        algorithm_fn = sticky_end_algorithm
-    return common_function_assembly_products(frags, None, algo, circular_only)
+    products = common_function_assembly_products(
+        frags, None, algorithm_fn, circular_only
+    )
+    return _recast_sources(products, LigationSource)
 def assembly_is_multi_site(asm: list[EdgeRepresentationAssembly]) -> bool:
@@ -2236,7 +2334,7 @@ def assembly_is_multi_site(asm: list[EdgeRepresentationAssembly]) -> bool:
 def gateway_assembly(
     frags: list[_Dseqrecord],
-    reaction_type: str,
+    reaction_type: Literal["BP", "LR"],
     greedy: bool = False,
     circular_only: bool = False,
     multi_site_only: bool = False,
@@ -2247,8 +2345,8 @@ def gateway_assembly(
     ----------
     frags : list[_Dseqrecord]
         List of DNA fragments to assemble
-    reaction_type : str
-        Type of Gateway reaction, either 'BP' or 'LR'
+    reaction_type : Literal['BP', 'LR']
+        Type of Gateway reaction
     greedy : bool, optional
         If True, use greedy gateway consensus sites, by default False
     circular_only : bool, optional
@@ -2288,9 +2386,9 @@ def gateway_assembly(
     >>> len(products_LR)
     2
-    Now let's understand the `multi_site_only` parameter. Let's consider a case where we are swapping fragments
+    Now let's understand the ``multi_site_only`` parameter. Let's consider a case where we are swapping fragments
     between two plasmids using an LR reaction. Experimentally, we expect to obtain two plasmids, resulting from the
-    swapping between the two att sites. That's what we get if we set `multi_site_only` to True.
+    swapping between the two att sites. That's what we get if we set ``multi_site_only`` to True.
     >>> attL2 = 'aaataatgattttattttgactgatagtgacctgttcgttgcaacaaattgataagcaatgctttcttataatgccaactttgtacaagaaagctg'
     >>> attR2 = 'accactttgtacaagaaagctgaacgagaaacgtaaaatgatataaatatcaatatattaaattagattttgcataaaaaacagactacataatactgtaaaacacaacatatccagtcactatg'
@@ -2300,7 +2398,7 @@ def gateway_assembly(
     >>> len(products)
     2
-    However, if we set `multi_site_only` to False, we get 4 products, which also include the intermediate products
+    However, if we set ``multi_site_only`` to False, we get 4 products, which also include the intermediate products
     where the two plasmids are combined into a single one through recombination of a single att site. This is an
     intermediate of the reaction, and typically we don't want it:
@@ -2316,13 +2414,19 @@ def gateway_assembly(
             f"Invalid reaction type: {reaction_type}, can only be BP or LR"
         )
-    def algo(x, y, _l):
+    def algorithm_fn(x, y, _l):
         return gateway_overlap(x, y, reaction_type, greedy)
     filter_results_function = None if not multi_site_only else assembly_is_multi_site
     products = common_function_assembly_products(
-        frags, None, algo, circular_only, filter_results_function
+        frags, None, algorithm_fn, circular_only, filter_results_function
+    )
+    products = _recast_sources(
+        products,
+        GatewaySource,
+        reaction_type=reaction_type,
+        greedy=greedy,
     )
     if len(products) == 0:
@@ -2479,7 +2583,10 @@ def homologous_recombination_integration(
     """
     fragments = common_handle_insertion_fragments(genome, inserts)
-    return common_function_integration_products(fragments, limit, common_sub_strings)
+    products = common_function_integration_products(
+        fragments, limit, common_sub_strings
+    )
+    return _recast_sources(products, HomologousRecombinationSource)
 def homologous_recombination_excision(
@@ -2515,7 +2622,8 @@ def homologous_recombination_excision(
     >>> products
     [Dseqrecord(o25), Dseqrecord(-32)]
     """
-    return common_function_excision_products(genome, limit, common_sub_strings)
+    products = common_function_excision_products(genome, limit, common_sub_strings)
+    return _recast_sources(products, HomologousRecombinationSource)
 def cre_lox_integration(
@@ -2524,7 +2632,7 @@ def cre_lox_integration(
     """Returns the products resulting from the integration of an insert (or inserts joined
     through cre-lox recombination among them) into the genome through cre-lox integration.
-    Also works with lox66 and lox71 (see `pydna.cre_lox` for more details).
+    Also works with lox66 and lox71 (see ``pydna.cre_lox`` for more details).
     Parameters
     ----------
@@ -2574,7 +2682,8 @@ def cre_lox_integration(
     """
     fragments = common_handle_insertion_fragments(genome, inserts)
-    return common_function_integration_products(fragments, None, cre_loxP_overlap)
+    products = common_function_integration_products(fragments, None, cre_loxP_overlap)
+    return _recast_sources(products, CreLoxRecombinationSource)
 def cre_lox_excision(genome: _Dseqrecord) -> list[_Dseqrecord]:
@@ -2624,4 +2733,151 @@ def cre_lox_excision(genome: _Dseqrecord) -> list[_Dseqrecord]:
     >>> res2
     [Dseqrecord(o39), Dseqrecord(-45)]
     """
-    return common_function_excision_products(genome, None, cre_loxP_overlap)
+    products = common_function_excision_products(genome, None, cre_loxP_overlap)
+    return _recast_sources(products, CreLoxRecombinationSource)
+def crispr_integration(
+    genome: _Dseqrecord,
+    inserts: list[_Dseqrecord],
+    guides: list[_Primer],
+    limit: int = 40,
+) -> list[_Dseqrecord]:
+    """
+    Returns the products for CRISPR integration.
+    Parameters
+    ----------
+    genome : _Dseqrecord
+        Target genome sequence
+    inserts : list[_Dseqrecord]
+        DNA fragment(s) to insert
+    guides : list[_Primer]
+        List of guide RNAs as Primer objects. This may change in the future.
+    limit : int, optional
+        Minimum overlap length required, by default 40
+    Returns
+    -------
+    list[_Dseqrecord]
+        List of integrated DNA molecules
+    Examples
+    --------
+    >>> from pydna.dseqrecord import Dseqrecord
+    >>> from pydna.assembly2 import crispr_integration
+    >>> from pydna.primer import Primer
+    >>> genome = Dseqrecord("aaccggttcaatgcaaacagtaatgatggatgacattcaaagcac", name="genome")
+    >>> insert = Dseqrecord("aaccggttAAAAAAAAAttcaaagcac", name="insert")
+    >>> guide = Primer("ttcaatgcaaacagtaatga", name="guide")
+    >>> product, *_ = crispr_integration(genome, [insert], [guide], 8)
+    >>> product
+    Dseqrecord(-27)
+    """
+    if len(guides) == 0:
+        raise ValueError("At least one guide RNA is required for CRISPR integration")
+    # Get all the possible products from the homologous recombination integration
+    products = homologous_recombination_integration(genome, inserts, limit)
+    # Verify that the guides cut in the region that will be repaired
+    # First we collect the positions where the guides cut
+    guide_cuts = []
+    for guide in guides:
+        enzyme = cas9(str(guide.seq))
+        possible_cuts = genome.seq.get_cutsites(enzyme)
+        if len(possible_cuts) == 0:
+            raise ValueError(
+                f"Could not find Cas9 cutsite in the target sequence using the guide: {guide.name}"
+            )
+        # Keep only the position of the cut
+        possible_cuts = [cut[0] for (cut, _) in possible_cuts]
+        guide_cuts.append(possible_cuts)
+    # Then, we check it the possible homologous recombination products contain the cuts
+    # from the guides inside the repair region.
+    # We also add the used guides to each product. This is very important!
+    valid_products = []
+    for i, product in enumerate(products):
+        # The second element of product.source.input is conventionally the insert/repair fragment
+        # The other two (first and third) are the two bits of the genome
+        repair_start = _location_boundaries(product.source.input[0].right_location)[0]
+        repair_end = _location_boundaries(product.source.input[2].left_location)[1]
+        repair_location = create_location(repair_start, repair_end, len(genome))
+        some_cuts_inside_repair = []
+        all_cuts_inside_repair = []
+        for cut_group in guide_cuts:
+            cuts_in_repair = [cut for cut in cut_group if cut in repair_location]
+            some_cuts_inside_repair.append(len(cuts_in_repair) != 0)
+            all_cuts_inside_repair.append(len(cuts_in_repair) == len(cut_group))
+        if all(some_cuts_inside_repair):
+            used_guides = [g for i, g in enumerate(guides) if all_cuts_inside_repair[i]]
+            # Add the used guides to the product <----- VERY IMPORTANT!
+            product.source.input.extend([SourceInput(sequence=g) for g in used_guides])
+            valid_products.append(product)
+            if not all(all_cuts_inside_repair):
+                raise ValueError(
+                    "Some guides cut outside the repair region, please check the guides"
+                )
+    if len(valid_products) != len(products):
+        warnings.warn(
+            "Some recombination products were discarded because they had off-target cuts",
+            category=UserWarning,
+            stacklevel=2,
+        )
+    return _recast_sources(valid_products, CRISPRSource)
+def pcr_assembly(
+    template: _Dseqrecord,
+    fwd_primer: _Primer,
+    rvs_primer: _Primer,
+    add_primer_features: bool = False,
+    limit: int = 14,
+    mismatches: int = 0,
+) -> list[_Dseqrecord]:
+    """Returns the products for PCR assembly.
+    Parameters
+    ----------
+    template : _Dseqrecord
+        Template sequence
+    fwd_primer : _Primer
+        Forward primer
+    rvs_primer : _Primer
+        Reverse primer
+    add_primer_features : bool, optional
+        If True, add primer features to the product, by default False
+    limit : int, optional
+        Minimum overlap length required, by default 14
+    mismatches : int, optional
+        Maximum number of mismatches, by default 0
+    Returns
+    -------
+    list[_Dseqrecord]
+        List of assembled DNA molecules
+    """
+    minimal_annealing = limit + mismatches
+    fragments = [fwd_primer, template, rvs_primer]
+    asm = PCRAssembly(
+        fragments,
+        limit=minimal_annealing,
+        mismatches=mismatches,
+    )
+    products = asm.assemble_linear()
+    # If both primers are the same, remove duplicates
+    if str(fwd_primer.seq).upper() == str(rvs_primer.seq).upper():
+        products = [p for p in products if not p.source.input[1].reverse_complemented]
+    if add_primer_features:
+        products = [annotate_primer_binding_sites(prod, fragments) for prod in products]
+    return _recast_sources(products, PCRSource, add_primer_features=add_primer_features)

pydna 5.5.3__py3-none-any.whl → 5.5.4__py3-none-any.whl

pydna 5.5.3py3-none-any.whl → 5.5.4py3-none-any.whl