PyPI - pydna - Versions diffs - 5.5.3__py3-none-any.whl → 5.5.5__py3-none-any.whl - Mend

pydna 5.5.3py3-none-any.whl → 5.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pydna/__init__.py +24 -193
pydna/_pretty.py +8 -8
pydna/_thermodynamic_data.py +3 -3
pydna/alphabet.py +995 -0
pydna/amplicon.py +19 -24
pydna/amplify.py +75 -95
pydna/assembly.py +64 -81
pydna/assembly2.py +650 -405
pydna/codon.py +4 -4
pydna/common_sub_strings.py +6 -8
pydna/contig.py +203 -10
pydna/design.py +176 -60
pydna/download.py +6 -15
pydna/dseq.py +1794 -718
pydna/dseqrecord.py +220 -171
pydna/gateway.py +6 -6
pydna/gel.py +5 -5
pydna/genbank.py +43 -46
pydna/genbankfixer.py +89 -92
pydna/ladders.py +11 -12
pydna/oligonucleotide_hybridization.py +124 -0
pydna/opencloning_models.py +680 -0
pydna/parsers.py +45 -32
pydna/primer.py +4 -4
pydna/primer_screen.py +833 -0
pydna/readers.py +14 -9
pydna/seq.py +137 -47
pydna/seqrecord.py +54 -62
pydna/sequence_picker.py +2 -5
pydna/sequence_regex.py +6 -6
pydna/tm.py +17 -17
pydna/types.py +21 -18
pydna/utils.py +97 -75
{pydna-5.5.3.dist-info → pydna-5.5.5.dist-info}/METADATA +14 -46
pydna-5.5.5.dist-info/RECORD +43 -0
{pydna-5.5.3.dist-info → pydna-5.5.5.dist-info}/WHEEL +1 -1
pydna/conftest.py +0 -42
pydna/genbankfile.py +0 -42
pydna/genbankrecord.py +0 -168
pydna/goldengate.py +0 -45
pydna/ligate.py +0 -62
pydna/user_cloning.py +0 -29
pydna-5.5.3.dist-info/RECORD +0 -45
{pydna-5.5.3.dist-info → pydna-5.5.5.dist-info/licenses}/LICENSE.txt +0 -0

pydna/assembly2.py CHANGED Viewed

@@ -4,29 +4,29 @@ Improved implementation of the assembly module. To see a list of issues with the
 see [issues tagged with fixed-with-new-assembly-model](https://github.com/pydna-group/pydna/issues?q=is%3Aissue%20state%3Aopen%20label%3Afixed-with-new-assembly-model)
 """
-import networkx as _nx
-import itertools as _itertools
+import networkx as nx
+import itertools
 from Bio.SeqFeature import SimpleLocation, Location
-from Bio.Seq import reverse_complement
 from Bio.Restriction.Restriction import RestrictionBatch
 import regex
 import copy
 from pydna.utils import (
-    shift_location as _shift_location,
+    shift_location,
     flatten,
-    location_boundaries as _location_boundaries,
-    locations_overlap as _locations_overlap,
+    location_boundaries,
+    locations_overlap,
     sum_is_sticky,
     limit_iterator,
     create_location,
 )
-from pydna._pretty import pretty_str as _pretty_str
+from pydna._pretty import pretty_str as ps
 from pydna.common_sub_strings import common_sub_strings as common_sub_strings_str
-from pydna.dseqrecord import Dseqrecord as _Dseqrecord
-from pydna.dseq import Dseq as _Dseq
-from pydna.primer import Primer as _Primer
-from pydna.seqrecord import SeqRecord as _SeqRecord
+from pydna.dseqrecord import Dseqrecord
+from pydna.dseq import Dseq
+from pydna.primer import Primer
+from pydna.seqrecord import SeqRecord
 from pydna.types import (
     CutSiteType,
     # TODO: allow user to enforce multi-site
@@ -38,11 +38,29 @@ from pydna.types import (
 )
 from pydna.gateway import gateway_overlap, find_gateway_sites
 from pydna.cre_lox import cre_loxP_overlap
+from pydna.alphabet import anneal_strands
+from typing import TYPE_CHECKING, Callable, Literal
+from pydna.opencloning_models import (
+    AssemblySource,
+    RestrictionAndLigationSource,
+    GibsonAssemblySource,
+    InFusionSource,
+    OverlapExtensionPCRLigationSource,
+    InVivoAssemblySource,
+    LigationSource,
+    GatewaySource,
+    HomologousRecombinationSource,
+    CreLoxRecombinationSource,
+    PCRSource,
+    SourceInput,
+    CRISPRSource,
+)
+from pydna.crispr import cas9
+import warnings
-from typing import TYPE_CHECKING, Callable
-if TYPE_CHECKING:
-    from Bio.Restriction import AbstractCut as _AbstractCut
+if TYPE_CHECKING:  # pragma: no cover
+    from Bio.Restriction import AbstractCut
 def gather_overlapping_locations(
@@ -54,45 +72,52 @@ def gather_overlapping_locations(
     the output will be [(loc1, loc2), (loc3,)].
     """
     # Make a graph with all the locations as nodes
-    G = _nx.Graph()
+    G = nx.Graph()
     for i, loc in enumerate(locs):
         G.add_node(i, location=loc)
     # Add edges between nodes that overlap
     for i in range(len(locs)):
         for j in range(i + 1, len(locs)):
-            if _locations_overlap(locs[i], locs[j], fragment_length):
+            if locations_overlap(locs[i], locs[j], fragment_length):
                 G.add_edge(i, j)
     # Get groups of overlapping locations
     groups = list()
-    for loc_set in _nx.connected_components(G):
+    for loc_set in nx.connected_components(G):
         groups.append(tuple(locs[i] for i in loc_set))
     # Sort by location of the first element in each group (does not matter which since they are overlapping)
-    groups.sort(key=lambda x: _location_boundaries(x[0])[0])
+    groups.sort(key=lambda x: location_boundaries(x[0])[0])
     return groups
 def ends_from_cutsite(
-    cutsite: CutSiteType, seq: _Dseq
+    cutsite: CutSiteType, seq: Dseq
 ) -> tuple[tuple[str, str], tuple[str, str]]:
     """Get the sticky or blunt ends created by a restriction enzyme cut.
-    Args:
-        cutsite (CutSiteType): A tuple ((cut_watson, ovhg), enzyme) describing where the cut occurs
-        seq (_Dseq): The DNA sequence being cut
+    Parameters
+    ----------
+    cutsite : CutSiteType
+        A tuple ((cut_watson, ovhg), enzyme) describing where the cut occurs
+    seq : _Dseq
+        The DNA sequence being cut
-    Raises:
-        ValueError: If cutsite is None
+    Raises
+    ------
+    ValueError
+        If cutsite is None
-    Returns:
-        tuple[tuple[str, str], tuple[str, str]]: A tuple of two tuples, each containing the type of end ('5\'', '3\'', or 'blunt')
+    Returns
+    -------
+    tuple[tuple[str, str], tuple[str, str]]
+        A tuple of two tuples, each containing the type of end ('5\'', '3\'', or 'blunt')
         and the sequence of the overhang. The first tuple is for the left end, second for the right end.
     >>> from Bio.Restriction import NotI
-    >>> x = _Dseq("ctcgGCGGCCGCcagcggccg")
+    >>> x = Dseq("ctcgGCGGCCGCcagcggccg")
     >>> x.get_cutsites(NotI)
     [((6, -4), NotI)]
     >>> ends_from_cutsite(x.get_cutsites(NotI)[0], x)
@@ -119,8 +144,8 @@ def ends_from_cutsite(
 def restriction_ligation_overlap(
-    seqx: _Dseqrecord,
-    seqy: _Dseqrecord,
+    seqx: Dseqrecord,
+    seqy: Dseqrecord,
     enzymes=RestrictionBatch,
     partial=False,
     allow_blunt=False,
@@ -129,14 +154,23 @@ def restriction_ligation_overlap(
     Like in sticky and gibson, the order matters (see example below of partial overlap)
-    Args:
-        seqx (_Dseqrecord): The first sequence
-        seqy (_Dseqrecord): The second sequence
-        enzymes (RestrictionBatch): The enzymes to use
-        partial (bool): Whether to allow partial overlaps
-        allow_blunt (bool): Whether to allow blunt ends
-    Returns:
-        list[SequenceOverlap]: A list of overlaps between the two sequences
+    Parameters
+    ----------
+    seqx : Dseqrecord
+        The first sequence
+    seqy : Dseqrecord
+        The second sequence
+    enzymes : RestrictionBatch
+        The enzymes to use
+    partial : bool
+        Whether to allow partial overlaps
+    allow_blunt : bool
+        Whether to allow blunt ends
+    Returns
+    -------
+    list[SequenceOverlap]
+        A list of overlaps between the two sequences
     >>> from pydna.dseqrecord import Dseqrecord
     >>> from pydna.assembly2 import restriction_ligation_overlap
@@ -178,7 +212,7 @@ def restriction_ligation_overlap(
     #     if not seqy.circular:
     #         cuts_y.append(((0, 0), None))
     matches = list()
-    for cut_x, cut_y in _itertools.product(cuts_x, cuts_y):
+    for cut_x, cut_y in itertools.product(cuts_x, cuts_y):
         # A blunt end
         if allow_blunt and cut_x[0][1] == cut_y[0][1] == 0:
             matches.append((cut_x[0][0], cut_y[0][0], 0))
@@ -222,7 +256,7 @@ def combine_algorithms(*algorithms: AssemblyAlgorithmType) -> AssemblyAlgorithmT
 def blunt_overlap(
-    seqx: _Dseqrecord, seqy: _Dseqrecord, limit=None
+    seqx: Dseqrecord, seqy: Dseqrecord, limit=None
 ) -> list[SequenceOverlap]:
     """
     Assembly algorithm to find blunt overlaps. Used for blunt ligation.
@@ -230,13 +264,19 @@ def blunt_overlap(
     It basically returns [(len(seqx), 0, 0)] if the right end of seqx is blunt and the
     left end of seqy is blunt (compatible with blunt ligation). Otherwise, it returns an empty list.
-    Args:
-        seqx (_Dseqrecord): The first sequence
-        seqy (_Dseqrecord): The second sequence
-        limit (int): There for compatibility, but it is ignored
+    Parameters
+    ----------
+    seqx : Dseqrecord
+        The first sequence
+    seqy : Dseqrecord
+        The second sequence
+    limit : int
+        There for compatibility, but it is ignored
-    Returns:
-        list[SequenceOverlap]: A list of overlaps between the two sequences
+    Returns
+    -------
+    list[SequenceOverlap]
+        A list of overlaps between the two sequences
     >>> from pydna.assembly2 import blunt_overlap
     >>> from pydna.dseqrecord import Dseqrecord
@@ -254,7 +294,7 @@ def blunt_overlap(
 def common_sub_strings(
-    seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25
+    seqx: Dseqrecord, seqy: Dseqrecord, limit=25
 ) -> list[SequenceOverlap]:
     """
     Assembly algorithm to find common substrings of length == limit. see the docs of
@@ -317,30 +357,36 @@ def common_sub_strings(
     return [r for r in results if r not in shifted_matches]
-def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
+def gibson_overlap(seqx: Dseqrecord, seqy: Dseqrecord, limit=25):
     """
     Assembly algorithm to find terminal overlaps (e.g. for Gibson assembly).
     The order matters, we want alignments like:
-    ```
-    seqx:    oooo------xxxx
-    seqy:              xxxx------oooo
-    Product: oooo------xxxx------oooo
+    ::
-    Not like:
+        seqx:    oooo------xxxx
+        seqy:              xxxx------oooo
+        Product: oooo------xxxx------oooo
-    seqx:               oooo------xxxx
-    seqy:     xxxx------oooo
-    Product (unwanted): oooo
-    ```
+        Not like:
-    Args:
-        seqx (_Dseqrecord): The first sequence
-        seqy (_Dseqrecord): The second sequence
-        limit (int): Minimum length of the overlap
+        seqx:               oooo------xxxx
+        seqy:     xxxx------oooo
+        Product (unwanted): oooo
+    Parameters
+    ----------
+    seqx : Dseqrecord
+        The first sequence
+    seqy : Dseqrecord
+        The second sequence
+    limit : int
+        Minimum length of the overlap
-    Returns:
-        list[SequenceOverlap]: A list of overlaps between the two sequences
+    Returns
+    -------
+    list[SequenceOverlap]
+        A list of overlaps between the two sequences
     >>> from pydna.dseqrecord import Dseqrecord
     >>> from pydna.assembly2 import gibson_overlap
@@ -357,9 +403,9 @@ def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
     # This is only relevant for linear fragments, so we don't need to worry about
     # shifting locations for circular fragments.
     trim_x_left = -seqx.seq.ovhg if seqx.seq.ovhg < 0 else 0
-    trim_x_right = seqx.seq.watson_ovhg() if seqx.seq.watson_ovhg() < 0 else None
+    trim_x_right = seqx.seq.watson_ovhg if seqx.seq.watson_ovhg < 0 else None
     trim_y_left = -seqy.seq.ovhg if seqy.seq.ovhg < 0 else 0
-    trim_y_right = seqy.seq.watson_ovhg() if seqy.seq.watson_ovhg() < 0 else None
+    trim_y_right = seqy.seq.watson_ovhg if seqy.seq.watson_ovhg < 0 else None
     stringx = str(seqx.seq[trim_x_left:trim_x_right]).upper()
     stringy = str(seqy.seq[trim_y_left:trim_y_right]).upper()
@@ -377,20 +423,26 @@ def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
     return [tuple(m) for m in matches]
-def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = False):
+def sticky_end_sub_strings(seqx: Dseqrecord, seqy: Dseqrecord, limit: bool = False):
     """
     Assembly algorithm for ligation of sticky ends.
     For now, if limit 0 / False (default) only full overlaps are considered.
     Otherwise, partial overlaps are also returned.
-    Args:
-        seqx (_Dseqrecord): The first sequence
-        seqy (_Dseqrecord): The second sequence
-        limit (bool): Whether to allow partial overlaps
+    Parameters
+    ----------
+    seqx : Dseqrecord
+        The first sequence
+    seqy : Dseqrecord
+        The second sequence
+    limit : bool
+        Whether to allow partial overlaps
-    Returns:
-        list[SequenceOverlap]: A list of overlaps between the two sequences
+    Returns
+    -------
+    list[SequenceOverlap]
+        A list of overlaps between the two sequences
     Ligation of fully overlapping sticky ends, note how the order matters
@@ -415,6 +467,7 @@ def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = F
     [(4, 0, 2)]
     """
     overlap = sum_is_sticky(
         seqx.seq.three_prime_end(), seqy.seq.five_prime_end(), limit
     )
@@ -424,7 +477,7 @@ def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = F
 def zip_match_leftwards(
-    seqx: _SeqRecord, seqy: _SeqRecord, match: SequenceOverlap
+    seqx: SeqRecord, seqy: SeqRecord, match: SequenceOverlap
 ) -> SequenceOverlap:
     """
     Starting from the rightmost edge of the match, return a new match encompassing the max
@@ -432,15 +485,15 @@ def zip_match_leftwards(
     than the limit or a shorter match if there are mismatches. This is convenient to maintain
     as many features as possible. It is used in PCR assembly.
-    >>> seq = _Dseqrecord('AAAAACGTCCCGT')
-    >>> primer = _Dseqrecord('ACGTCCCGT')
+    >>> seq = Dseqrecord('AAAAACGTCCCGT')
+    >>> primer = Dseqrecord('ACGTCCCGT')
     >>> match = (13, 9, 0) # an empty match at the end of each
     >>> zip_match_leftwards(seq, primer, match)
     (4, 0, 9)
     Works in circular molecules if the match spans the origin:
-    >>> seq = _Dseqrecord('TCCCGTAAAAACG', circular=True)
-    >>> primer = _Dseqrecord('ACGTCCCGT')
+    >>> seq = Dseqrecord('TCCCGTAAAAACG', circular=True)
+    >>> primer = Dseqrecord('ACGTCCCGT')
     >>> match = (6, 9, 0)
     >>> zip_match_leftwards(seq, primer, match)
     (10, 0, 9)
@@ -461,11 +514,11 @@ def zip_match_leftwards(
     # For those cases we shift by length, then go back
     end_on_x = match[0] + match[2]
-    if isinstance(seqx, _Dseqrecord) and seqx.circular and end_on_x <= len(seqx):
+    if isinstance(seqx, Dseqrecord) and seqx.circular and end_on_x <= len(seqx):
         end_on_x += len(seqx)
     end_on_y = match[1] + match[2]
-    if isinstance(seqy, _Dseqrecord) and seqy.circular and end_on_y <= len(seqy):
+    if isinstance(seqy, Dseqrecord) and seqy.circular and end_on_y <= len(seqy):
         end_on_y += len(seqy)
     count = 0
@@ -482,7 +535,7 @@ def zip_match_leftwards(
 def zip_match_rightwards(
-    seqx: _Dseqrecord, seqy: _Dseqrecord, match: SequenceOverlap
+    seqx: Dseqrecord, seqy: Dseqrecord, match: SequenceOverlap
 ) -> SequenceOverlap:
     """Same as zip_match_leftwards, but towards the right."""
@@ -498,19 +551,19 @@ def zip_match_rightwards(
     return (start_on_x, start_on_y, count)
-def seqrecord2_uppercase_DNA_string(seqr: _SeqRecord) -> str:
+def seqrecord2_uppercase_DNA_string(seqr: SeqRecord) -> str:
     """
     Transform a Dseqrecord to a sequence string where U is replaced by T, everything is upper case and
     circular sequences are repeated twice. This is used for PCR, to support primers with U's (e.g. for USER cloning).
     """
     out = str(seqr.seq).upper().replace("U", "T")
-    if isinstance(seqr, _Dseqrecord) and seqr.circular:
+    if isinstance(seqr, Dseqrecord) and seqr.circular:
         return out * 2
     return out
 def primer_template_overlap(
-    seqx: _Dseqrecord | _Primer, seqy: _Dseqrecord | _Primer, limit=25, mismatches=0
+    seqx: Dseqrecord | Primer, seqy: Dseqrecord | Primer, limit=25, mismatches=0
 ) -> list[SequenceOverlap]:
     """
     Assembly algorithm to find overlaps between a primer and a template. It accepts mismatches.
@@ -520,14 +573,21 @@ def primer_template_overlap(
     If seqx is a template and seqy is a primer, it represents the binding of a reverse primer,
     where the primer has been passed as its reverse complement (see examples).
-    Args:
-        seqx (_Dseqrecord | _Primer): The primer
-        seqy (_Dseqrecord | _Primer): The template
-        limit (int): Minimum length of the overlap
-        mismatches (int): Maximum number of mismatches (only substitutions, no deletion or insertion)
+    Parameters
+    ----------
+    seqx : Dseqrecord | Primer
+        The primer
+    seqy : Dseqrecord | Primer
+        The template
+    limit : int
+        Minimum length of the overlap
+    mismatches : int
+        Maximum number of mismatches (only substitutions, no deletion or insertion)
-    Returns:
-        list[SequenceOverlap]: A list of overlaps between the primer and the template
+    Returns
+    -------
+    list[SequenceOverlap]
+        A list of overlaps between the primer and the template
     >>> from pydna.dseqrecord import Dseqrecord
     >>> from pydna.primer import Primer
@@ -537,7 +597,7 @@ def primer_template_overlap(
     >>> primer_template_overlap(primer, template, limit=8, mismatches=0)
     [(0, 2, 8)]
-    This actually represents the binding of the primer `GCTGCTAA` (reverse complement)
+    This actually represents the binding of the primer ``GCTGCTAA`` (reverse complement)
     >>> primer_template_overlap(template, primer, limit=8, mismatches=0)
     [(2, 0, 8)]
     >>> primer_template_overlap(primer, template.reverse_complement(), limit=8, mismatches=0)
@@ -546,11 +606,11 @@ def primer_template_overlap(
     []
     """
-    if isinstance(seqx, _Primer) and isinstance(seqy, _Dseqrecord):
+    if isinstance(seqx, Primer) and isinstance(seqy, Dseqrecord):
         primer = seqx
         template = seqy
         reverse_primer = False
-    elif isinstance(seqx, _Dseqrecord) and isinstance(seqy, _Primer):
+    elif isinstance(seqx, Dseqrecord) and isinstance(seqy, Primer):
         primer = seqy
         template = seqx
         reverse_primer = True
@@ -604,45 +664,8 @@ def primer_template_overlap(
     return list(sorted(out))
-def fill_left(seq: _Dseq) -> _Dseq:
-    """Fill the left overhang of a sequence with the complementary sequence."""
-    new_watson = seq.watson
-    new_crick = seq.crick
-    # Watson 5' overhang
-    if seq.ovhg < 0:
-        new_crick = new_crick + reverse_complement(seq.watson[: -seq.ovhg])
-    # Crick 5' overhang
-    elif seq.ovhg > 0:
-        new_watson = reverse_complement(seq.crick[-seq.ovhg :]) + new_watson
-    return _Dseq(new_watson, new_crick, 0)
-def fill_right(seq: _Dseq) -> _Dseq:
-    """Fill the right overhang of a sequence with the complementary sequence."""
-    new_watson = seq.watson
-    new_crick = seq.crick
-    # Watson 3' overhang
-    watson_ovhg = seq.watson_ovhg()
-    if watson_ovhg < 0:
-        new_watson = new_watson + reverse_complement(seq.crick[:-watson_ovhg])
-    # Crick 3' overhang
-    elif watson_ovhg > 0:
-        new_crick = reverse_complement(seq.watson[-watson_ovhg:]) + new_crick
-    return _Dseq(new_watson, new_crick, seq.ovhg)
-def fill_dseq(seq: _Dseq) -> _Dseq:
-    """Fill the overhangs of a sequence with the complementary sequence."""
-    return fill_left(fill_right(seq))
 def reverse_complement_assembly(
-    assembly: EdgeRepresentationAssembly, fragments: list[_Dseqrecord]
+    assembly: EdgeRepresentationAssembly, fragments: list[Dseqrecord]
 ) -> EdgeRepresentationAssembly:
     """Complement an assembly, i.e. reverse the order of the fragments and the orientation of the overlaps."""
     new_assembly = list()
@@ -656,7 +679,7 @@ def reverse_complement_assembly(
 def filter_linear_subassemblies(
     linear_assemblies: list[EdgeRepresentationAssembly],
     circular_assemblies: list[EdgeRepresentationAssembly],
-    fragments: list[_Dseqrecord],
+    fragments: list[Dseqrecord],
 ) -> list[EdgeRepresentationAssembly]:
     """Remove linear assemblies which are sub-assemblies of circular assemblies"""
     all_circular_assemblies = circular_assemblies + [
@@ -702,7 +725,7 @@ def assembly2str(assembly: EdgeRepresentationAssembly) -> str:
     ('1[8:14]:2[1:7]', '2[10:17]:3[1:8]')
     The reason for this is that by default, a feature '[8:14]' when present in a tuple
-    is printed to the console as `SimpleLocation(ExactPosition(8), ExactPosition(14), strand=1)` (very long).
+    is printed to the console as ``SimpleLocation(ExactPosition(8), ExactPosition(14), strand=1)`` (very long).
     """
     return str(tuple(f"{u}{lu}:{v}{lv}" for u, v, lu, lv in assembly))
@@ -715,7 +738,7 @@ def assembly2str_tuple(assembly: EdgeRepresentationAssembly) -> str:
 def assembly_has_mismatches(
-    fragments: list[_Dseqrecord], assembly: EdgeRepresentationAssembly
+    fragments: list[Dseqrecord], assembly: EdgeRepresentationAssembly
 ) -> bool:
     """Check if an assembly has mismatches. This should never happen and if so it returns an error."""
     for u, v, loc_u, loc_v in assembly:
@@ -731,7 +754,7 @@ def assembly_has_mismatches(
 def assembly_is_circular(
-    assembly: EdgeRepresentationAssembly, fragments: list[_Dseqrecord]
+    assembly: EdgeRepresentationAssembly, fragments: list[Dseqrecord]
 ) -> bool:
     """
     Based on the topology of the locations of an assembly, determine if it is circular.
@@ -740,22 +763,22 @@ def assembly_is_circular(
     if assembly[0][0] != assembly[-1][1]:
         return False
     elif (
-        isinstance(fragments[abs(assembly[0][0]) - 1], _Dseqrecord)
+        isinstance(fragments[abs(assembly[0][0]) - 1], Dseqrecord)
         and fragments[abs(assembly[0][0]) - 1].circular
     ):
         return True
     else:
         return (
-            _location_boundaries(assembly[0][2])[0]
-            > _location_boundaries(assembly[-1][3])[0]
+            location_boundaries(assembly[0][2])[0]
+            > location_boundaries(assembly[-1][3])[0]
         )
 def assemble(
-    fragments: list[_Dseqrecord],
+    fragments: list[Dseqrecord],
     assembly: EdgeRepresentationAssembly,
     is_insertion: bool = False,
-) -> _Dseqrecord:
+) -> Dseqrecord:
     """Generate a Dseqrecord from an assembly and a list of fragments."""
     if is_insertion:
@@ -772,14 +795,15 @@ def assemble(
         u, v, loc_u, loc_v = asm_edge
         f_u = fragments[u - 1] if u > 0 else fragments[-u - 1].reverse_complement()
         f_v = fragments[v - 1] if v > 0 else fragments[-v - 1].reverse_complement()
-        seq_u = str(loc_u.extract(f_u).seq).upper()
-        seq_v = str(loc_v.extract(f_v).seq).upper()
-        if seq_u != seq_v:
+        seq_u = str(loc_u.extract(f_u).seq)
+        seq_v = str(loc_v.extract(f_v).seq.rc())
+        # Test if seq_u and seq_v anneal
+        if not anneal_strands(seq_u, seq_v):
             raise ValueError("Mismatch in assembly")
     # We transform into Dseqrecords (for primers)
     dseqr_fragments = [
-        f if isinstance(f, _Dseqrecord) else _Dseqrecord(f) for f in fragments
+        f if isinstance(f, Dseqrecord) else Dseqrecord(f) for f in fragments
     ]
     subfragments = get_assembly_subfragments(
         dseqr_fragments, subfragment_representation
@@ -787,49 +811,33 @@ def assemble(
     # Length of the overlaps between consecutive assembly fragments
     fragment_overlaps = [len(e[-1]) for e in assembly]
+    out_dseqrecord = subfragments.pop(0)
-    out_dseqrecord = _Dseqrecord(subfragments[0])
+    for fragment, overlap in zip(subfragments, fragment_overlaps):
+        out_dseqrecord.seq = out_dseqrecord.seq.cast_to_ds_right()
+        out_dseqrecord.seq = out_dseqrecord.seq.exo1_end(overlap)
+        fragment.seq = fragment.seq.cast_to_ds_left()
+        fragment.seq = fragment.seq.exo1_front(overlap)
+        out_dseqrecord += fragment
-    for fragment, overlap in zip(subfragments[1:], fragment_overlaps):
-        # Shift the features of the right fragment to the left by `overlap`
-        new_features = [
-            f._shift(len(out_dseqrecord) - overlap) for f in fragment.features
-        ]
-        # Join the left sequence including the overlap with the right sequence without the overlap
-        # we use fill_right / fill_left so that it works for ligation of sticky ends
-        out_dseqrecord = _Dseqrecord(
-            fill_right(out_dseqrecord.seq) + fill_left(fragment.seq)[overlap:],
-            features=out_dseqrecord.features + new_features,
-        )
-    # For circular assemblies, close the loop and wrap origin-spanning features
+    # For circular assemblies, process the fragment and loop
     if is_circular:
+        out_dseqrecord.seq = out_dseqrecord.seq.cast_to_ds_left()
+        out_dseqrecord.seq = out_dseqrecord.seq.cast_to_ds_right()
         overlap = fragment_overlaps[-1]
+        out_dseqrecord.seq = out_dseqrecord.seq.exo1_front(overlap)
+        out_dseqrecord.seq = out_dseqrecord.seq.exo1_end(overlap)
+        out_dseqrecord = out_dseqrecord.looped()
-        # Special case for blunt circularisation
-        if overlap == 0:
-            return out_dseqrecord.looped()
-        # Remove trailing overlap
-        out_dseqrecord = _Dseqrecord(
-            fill_dseq(out_dseqrecord.seq)[:-overlap],
-            features=out_dseqrecord.features,
-            circular=True,
-        )
-        for feature in out_dseqrecord.features:
-            start, end = _location_boundaries(feature.location)
-            if start >= len(out_dseqrecord) or end > len(out_dseqrecord):
-                # Wrap around the origin
-                feature.location = _shift_location(
-                    feature.location, 0, len(out_dseqrecord)
-                )
+    out_dseqrecord.source = AssemblySource.from_subfragment_representation(
+        subfragment_representation, fragments, is_circular
+    )
     return out_dseqrecord
 def annotate_primer_binding_sites(
-    input_dseqr: _Dseqrecord, fragments: list[_Dseqrecord]
-) -> _Dseqrecord:
+    input_dseqr: Dseqrecord, fragments: list[Dseqrecord]
+) -> Dseqrecord:
     """Annotate the primer binding sites in a Dseqrecord."""
     fwd, _, rvs = fragments
     start_rvs = len(input_dseqr) - len(rvs)
@@ -909,37 +917,36 @@ def subfragment_representation2edge_representation(
 def get_assembly_subfragments(
-    fragments: list[_Dseqrecord],
+    fragments: list[Dseqrecord],
     subfragment_representation: SubFragmentRepresentationAssembly,
-) -> list[_Dseqrecord]:
+) -> list[Dseqrecord]:
     """From the fragment representation returned by edge_representation2subfragment_representation, get the subfragments that are joined together.
     Subfragments are the slices of the fragments that are joined together
-    For example:
-    ```
-      --A--
-    TACGTAAT
-      --B--
-     TCGTAACGA
-    Gives: TACGTAA / CGTAACGA
-    ```
-    To reproduce:
-    ```
-    a = Dseqrecord('TACGTAAT')
-    b = Dseqrecord('TCGTAACGA')
-    f = Assembly([a, b], limit=5)
-    a0 = f.get_linear_assemblies()[0]
-    print(assembly2str(a0))
-    a0_subfragment_rep =edge_representation2subfragment_representation(a0, False)
-    for f in get_assembly_subfragments([a, b], a0_subfragment_rep):
-        print(f.seq)
-    # prints TACGTAA and CGTAACGA
-    ```
-    Subfragments: `cccccgtatcgtgt`, `atcgtgtactgtcatattc`
+    For example::
+          --A--
+        TACGTAAT
+          --B--
+         TCGTAACGA
+        Gives: TACGTAA / CGTAACGA
+    To reproduce::
+        a = Dseqrecord('TACGTAAT')
+        b = Dseqrecord('TCGTAACGA')
+        f = Assembly([a, b], limit=5)
+        a0 = f.get_linear_assemblies()[0]
+        print(assembly2str(a0))
+        a0_subfragment_rep =edge_representation2subfragment_representation(a0, False)
+        for f in get_assembly_subfragments([a, b], a0_subfragment_rep):
+            print(f.seq)
+        # prints TACGTAA and CGTAACGA
+    Subfragments: ``cccccgtatcgtgt``, ``atcgtgtactgtcatattc``
     """
     subfragments = list()
     for node, start_location, end_location in subfragment_representation:
@@ -953,19 +960,26 @@ def get_assembly_subfragments(
 def extract_subfragment(
-    seq: _Dseqrecord, start_location: Location, end_location: Location
-) -> _Dseqrecord:
+    seq: Dseqrecord, start_location: Location | None, end_location: Location | None
+) -> Dseqrecord:
     """Extract a subfragment from a sequence for an assembly, given the start and end locations of the subfragment."""
-    start = 0 if start_location is None else _location_boundaries(start_location)[0]
-    end = None if end_location is None else _location_boundaries(end_location)[1]
+    if seq.circular and (start_location is None or end_location is None):
+        raise ValueError(
+            "Start and end locations cannot be None for circular sequences"
+        )
+        # This could be used to have consistent behaviour for circular sequences, where the start is arbitrary. However,
+        # they should never get None, so this is not used.
+        # if start_location is None:
+        #     start_location = end_location
+        # elif end_location is None:
+        #     end_location = start_location
+    start = 0 if start_location is None else location_boundaries(start_location)[0]
+    end = None if end_location is None else location_boundaries(end_location)[1]
     # Special case, some of it could be handled by better Dseqrecord slicing in the future
-    if (
-        seq.circular
-        and start_location is not None
-        and end_location is not None
-        and _locations_overlap(start_location, end_location, len(seq))
-    ):
+    if seq.circular and locations_overlap(start_location, end_location, len(seq)):
         # The overhang is different for origin-spanning features, for instance
         # for a feature join{[12:13], [0:3]} in a sequence of length 13, the overhang
         # is -4, not 9
@@ -975,7 +989,7 @@ def extract_subfragment(
             ovhg = 0
         dummy_cut = ((start, ovhg), None)
         open_seq = seq.apply_cut(dummy_cut, dummy_cut)
-        return _Dseqrecord(fill_dseq(open_seq.seq), features=open_seq.features)
+        return Dseqrecord(open_seq.seq.cast_to_ds(), features=open_seq.features)
     return seq[start:end]
@@ -1028,33 +1042,38 @@ class Assembly:
     The assembly contains a directed graph, where nodes represent fragments and
     edges represent overlaps between fragments. :
     - The node keys are integers, representing the index of the fragment in the
-    input list of fragments. The sign of the node key represents the orientation
-    of the fragment, positive for forward orientation, negative for reverse orientation.
+      input list of fragments. The sign of the node key represents the orientation
+      of the fragment, positive for forward orientation, negative for reverse orientation.
     - The edges contain the locations of the overlaps in the fragments. For an edge (u, v, key):
         - u and v are the nodes connected by the edge.
         - key is a string that represents the location of the overlap. In the format:
-        'u[start:end](strand):v[start:end](strand)'.
+          'u[start:end](strand):v[start:end](strand)'.
         - Edges have a 'locations' attribute, which is a list of two FeatureLocation objects,
-        representing the location of the overlap in the u and v fragment, respectively.
+          representing the location of the overlap in the u and v fragment, respectively.
         - You can think of an edge as a representation of the join of two fragments.
     If fragment 1 and 2 share a subsequence of 6bp, [8:14] in fragment 1 and [1:7] in fragment 2,
     there will be 4 edges representing that overlap in the graph, for all possible
     orientations of the fragments (see add_edges_from_match for details):
-    - `(1, 2, '1[8:14]:2[1:7]')`
-    - `(2, 1, '2[1:7]:1[8:14]')`
-    - `(-1, -2, '-1[0:6]:-2[10:16]')`
-    - `(-2, -1, '-2[10:16]:-1[0:6]')`
+    - ``(1, 2, '1[8:14]:2[1:7]')``
+    - ``(2, 1, '2[1:7]:1[8:14]')``
+    - ``(-1, -2, '-1[0:6]:-2[10:16]')``
+    - ``(-2, -1, '-2[10:16]:-1[0:6]')``
     An assembly can be thought of as a tuple of graph edges, but instead of representing them with node indexes and keys, we represent them
     as u, v, locu, locv, where u and v are the nodes connected by the edge, and locu and locv are the locations of the overlap in the first
     and second fragment. Assemblies are then represented as:
     - Linear: ((1, 2, [8:14], [1:7]), (2, 3, [10:17], [1:8]))
     - Circular: ((1, 2, [8:14], [1:7]), (2, 3, [10:17], [1:8]), (3, 1, [12:17], [1:6]))
     Note that the first and last fragment are the same in a circular assembly.
     The following constrains are applied to remove duplicate assemblies:
     - Circular assemblies: the first subfragment is not reversed, and has the smallest index in the input fragment list.
       use_fragment_order is ignored.
     - Linear assemblies:
@@ -1065,7 +1084,7 @@ class Assembly:
     frags : list
         A list of Dseqrecord objects.
     limit : int, optional
-        The shortest shared homology to be considered, this is passed as the third argument to the `algorithm` function.
+        The shortest shared homology to be considered, this is passed as the third argument to the ``algorithm`` function.
         For certain algorithms, this might be ignored.
     algorithm : function, optional
         The algorithm used to determine the shared sequences. It's a function that takes two Dseqrecord objects as inputs,
@@ -1113,14 +1132,15 @@ class Assembly:
     def __init__(
         self,
-        frags: list[_Dseqrecord],
+        frags: list[Dseqrecord],
         limit: int = 25,
         algorithm: AssemblyAlgorithmType = common_sub_strings,
         use_fragment_order: bool = True,
         use_all_fragments: bool = False,
     ):
         # TODO: allow for the same fragment to be included more than once?
-        self.G = _nx.MultiDiGraph()
+        self.G = nx.MultiDiGraph()
         # Add positive and negative nodes for forward and reverse fragments
         self.G.add_nodes_from((i + 1, {"seq": f}) for (i, f) in enumerate(frags))
         self.G.add_nodes_from(
@@ -1128,12 +1148,12 @@ class Assembly:
         )
         # Iterate over all possible combinations of fragments
-        fragment_pairs = _itertools.combinations(
+        fragment_pairs = itertools.combinations(
             filter(lambda x: x > 0, self.G.nodes), 2
         )
         for i, j in fragment_pairs:
             # All the relative orientations of the fragments in the pair
-            for u, v in _itertools.product([i, -i], [j, -j]):
+            for u, v in itertools.product([i, -i], [j, -j]):
                 u_seq = self.G.nodes[u]["seq"]
                 v_seq = self.G.nodes[v]["seq"]
                 matches = algorithm(u_seq, v_seq, limit)
@@ -1151,7 +1171,7 @@ class Assembly:
     @classmethod
     def assembly_is_valid(
         cls,
-        fragments: list[_Dseqrecord | _Primer],
+        fragments: list[Dseqrecord | Primer],
         assembly: EdgeRepresentationAssembly,
         is_circular: bool,
         use_all_fragments: bool,
@@ -1167,6 +1187,23 @@ class Assembly:
         if len(assembly) == 0:
             return False
+        # Topology check -> Circular sequences cannot be first or last in a linear assembly.
+        # For example, let's imagine aACGTc (linear) and gACGTc (circular).
+        # It should not be possible to join them into a linear assembly. It's similar if we
+        # think of a restriction-ligation assembly, example: aGAATTCc (linear) and gGAATTCc
+        # (circular).
+        # A linear product can be generated where the circular molecule is cut open, and one end
+        # it joins the linear molecule and on the other it's free, but for now it's not a
+        # relevant product and it's excluded.
+        first_fragment = fragments[abs(assembly[0][0]) - 1]
+        last_fragment = fragments[abs(assembly[-1][1]) - 1]
+        if not is_circular and (
+            isinstance(first_fragment, Dseqrecord)
+            and first_fragment.circular
+            or (isinstance(last_fragment, Dseqrecord) and last_fragment.circular)
+        ):
+            return False
         if use_all_fragments and len(fragments) != len(
             set(flatten(map(abs, e[:2]) for e in assembly))
         ):
@@ -1204,8 +1241,8 @@ class Assembly:
             # Incompatible as described in figure above
             fragment = fragments[abs(v1) - 1]
             if (
-                isinstance(fragment, _Primer) or not fragment.circular
-            ) and _location_boundaries(start_location)[1] >= _location_boundaries(
+                isinstance(fragment, Primer) or not fragment.circular
+            ) and location_boundaries(start_location)[1] >= location_boundaries(
                 end_location
             )[
                 1
@@ -1229,14 +1266,15 @@ class Assembly:
         match: SequenceOverlap,
         u: int,
         v: int,
-        first: _Dseqrecord,
-        secnd: _Dseqrecord,
+        first: Dseqrecord,
+        secnd: Dseqrecord,
     ):
-        """Add edges to the graph from a match returned by the `algorithm` function (see pydna.common_substrings). For
+        """Add edges to the graph from a match returned by the ``algorithm`` function (see pydna.common_substrings). For
         format of edges (see documentation of the Assembly class).
-        Matches are directional, because not all `algorithm` functions return the same match for (u,v) and (v,u). For example,
+        Matches are directional, because not all ``algorithm`` functions return the same match for (u,v) and (v,u). For example,
         homologous recombination does but sticky end ligation does not. The function returns two edges:
         - Fragments in the orientation they were passed, with locations of the match (u, v, loc_u, loc_v)
         - Reverse complement of the fragments with inverted order, with flipped locations (-v, -u, flip(loc_v), flip(loc_u))/
@@ -1248,10 +1286,10 @@ class Assembly:
         else:
             # We use shift_location with 0 to wrap origin-spanning features
             locs = [
-                _shift_location(
+                shift_location(
                     SimpleLocation(x_start, x_start + length), 0, len(first)
                 ),
-                _shift_location(
+                shift_location(
                     SimpleLocation(y_start, y_start + length), 0, len(secnd)
                 ),
             ]
@@ -1286,7 +1324,7 @@ class Assembly:
         """
         # Copy the graph since we will add the begin and end mock nodes
-        G = _nx.MultiDiGraph(self.G)
+        G = nx.MultiDiGraph(self.G)
         G.add_nodes_from(["begin", "end"])
         if self.use_fragment_order:
@@ -1324,7 +1362,7 @@ class Assembly:
     def node_path2assembly_list(
         self, cycle: list[int], circular: bool
     ) -> list[EdgeRepresentationAssembly]:
-        """Convert a node path in the format [1, 2, 3] (as returned by _nx.cycles.simple_cycles) to a list of all
+        """Convert a node path in the format [1, 2, 3] (as returned by networkx.cycles.simple_cycles) to a list of all
           possible assemblies.
         There may be multiple assemblies for a given node path, if there are several edges connecting two nodes,
@@ -1338,11 +1376,11 @@ class Assembly:
             combine.append([(u, v, key) for key in self.G[u][v]])
         return [
             tuple(map(self.format_assembly_edge, x))
-            for x in _itertools.product(*combine)
+            for x in itertools.product(*combine)
         ]
     def get_unique_linear_paths(
-        self, G_with_begin_end: _nx.MultiDiGraph, max_paths=10000
+        self, G_with_begin_end: nx.MultiDiGraph, max_paths=10000
     ) -> list[list[int]]:
         """Get unique linear paths from the graph, removing those that contain the same node twice."""
         # We remove the begin and end nodes, and get all paths without edges
@@ -1353,8 +1391,8 @@ class Assembly:
         node_paths = [
             x[1:-1]
             for x in limit_iterator(
-                _nx.all_simple_paths(
-                    _nx.DiGraph(G_with_begin_end),
+                nx.all_simple_paths(
+                    nx.DiGraph(G_with_begin_end),
                     "begin",
                     "end",
                     cutoff=(len(self.fragments) + 1),
@@ -1403,7 +1441,7 @@ class Assembly:
         sorted_cycles = map(
             circular_permutation_min_abs,
             limit_iterator(
-                _nx.cycles.simple_cycles(self.G, length_bound=len(self.fragments)),
+                nx.cycles.simple_cycles(self.G, length_bound=len(self.fragments)),
                 10000,
             ),
         )
@@ -1446,17 +1484,18 @@ class Assembly:
         Here we check if one of the joins between fragments represents the edges of an insertion assembly
         The fragment must be linear, and the join must be as indicated below
-        ```
-        --------         -------           Fragment 1
-            ||            ||
-            xxxxxxxx      ||               Fragment 2
-                  ||      ||
-                  oooooooooo               Fragment 3
-        ```
+        ::
+            --------         -------           Fragment 1
+                ||            ||
+                xxxxxxxx      ||               Fragment 2
+                      ||      ||
+                      oooooooooo               Fragment 3
         The above example will be [(1, 2, [4:6], [0:2]), (2, 3, [6:8], [0:2]), (3, 1, [8:10], [9:11)])]
         These could be returned in any order by simple_cycles, so we sort the edges so that the first
-        and last `u` and `v` match the fragment that gets the insertion (1 in the example above).
+        and last ``u`` and ``v`` match the fragment that gets the insertion (1 in the example above).
         """
         edge_pair_index = list()
@@ -1467,8 +1506,8 @@ class Assembly:
             fragment = self.fragments[abs(v1) - 1]
             # Find the pair of edges that should be last and first  ((3, 1, [8:10], [9:11)]), (1, 2, [4:6], [0:2]) in
             # the example above. Only one of the pairs of edges should satisfy this condition for the topology to make sense.
-            left_of_insertion = _location_boundaries(start_location)[0]
-            right_of_insertion = _location_boundaries(end_location)[0]
+            left_of_insertion = location_boundaries(start_location)[0]
+            right_of_insertion = location_boundaries(end_location)[0]
             if not fragment.circular and (
                 right_of_insertion >= left_of_insertion
                 # The below condition is for single-site integration.
@@ -1480,7 +1519,7 @@ class Assembly:
                 #
                 # The locations of homology on the genome are [0:10] and [2:12], so not identical
                 # but they overlap.
-                or _locations_overlap(start_location, end_location, len(fragment))
+                or locations_overlap(start_location, end_location, len(fragment))
             ):
                 edge_pair_index.append(i)
@@ -1511,13 +1550,13 @@ class Assembly:
         fragment1 = self.fragments[abs(f1) - 1]
         fragment2 = self.fragments[abs(f2) - 1]
-        if not _locations_overlap(
+        if not locations_overlap(
             loc_f1_1, loc_f1_2, len(fragment1)
-        ) or not _locations_overlap(loc_f2_2, loc_f2_1, len(fragment2)):
+        ) or not locations_overlap(loc_f2_2, loc_f2_1, len(fragment2)):
             return same_assembly
         # Sort to make compatible with insertion assembly
-        if _location_boundaries(loc_f1_1)[0] > _location_boundaries(loc_f1_2)[0]:
+        if location_boundaries(loc_f1_1)[0] > location_boundaries(loc_f1_2)[0]:
             new_assembly = same_assembly[::-1]
         else:
             new_assembly = same_assembly[:]
@@ -1530,10 +1569,10 @@ class Assembly:
         fragment2 = self.fragments[abs(f2) - 1]
         # Extract boundaries
-        f2_1_start, _ = _location_boundaries(loc_f2_1)
-        f2_2_start, f2_2_end = _location_boundaries(loc_f2_2)
-        f1_1_start, _ = _location_boundaries(loc_f1_1)
-        f1_2_start, f1_2_end = _location_boundaries(loc_f1_2)
+        f2_1_start, _ = location_boundaries(loc_f2_1)
+        f2_2_start, f2_2_end = location_boundaries(loc_f2_2)
+        f1_1_start, _ = location_boundaries(loc_f1_1)
+        f1_2_start, f1_2_end = location_boundaries(loc_f1_2)
         overlap_diff = len(fragment1[f1_1_start:f1_2_end]) - len(
             fragment2[f2_1_start:f2_2_end]
@@ -1573,7 +1612,7 @@ class Assembly:
                 "only_adjacent_edges not implemented for insertion assemblies"
             )
-        cycles = limit_iterator(_nx.cycles.simple_cycles(self.G), 10000)
+        cycles = limit_iterator(nx.cycles.simple_cycles(self.G), 10000)
         # We apply constrains already here because sometimes the combinatorial explosion is too large
         if self.use_all_fragments:
@@ -1592,7 +1631,7 @@ class Assembly:
             )
         # We find cycles first
-        iterator = limit_iterator(_nx.cycles.simple_cycles(self.G), 10000)
+        iterator = limit_iterator(nx.cycles.simple_cycles(self.G), 10000)
         assemblies = sum(
             map(lambda x: self.node_path2assembly_list(x, True), iterator), []
         )
@@ -1616,29 +1655,27 @@ class Assembly:
     def assemble_linear(
         self, only_adjacent_edges: bool = False, max_assemblies: int = 50
-    ) -> list[_Dseqrecord]:
+    ) -> list[Dseqrecord]:
         """Assemble linear constructs, from assemblies returned by self.get_linear_assemblies."""
         assemblies = self.get_linear_assemblies(only_adjacent_edges, max_assemblies)
         return [assemble(self.fragments, a) for a in assemblies]
     def assemble_circular(
         self, only_adjacent_edges: bool = False, max_assemblies: int = 50
-    ) -> list[_Dseqrecord]:
+    ) -> list[Dseqrecord]:
         """Assemble circular constructs, from assemblies returned by self.get_circular_assemblies."""
         assemblies = self.get_circular_assemblies(only_adjacent_edges, max_assemblies)
         return [assemble(self.fragments, a) for a in assemblies]
-    def assemble_insertion(
-        self, only_adjacent_edges: bool = False
-    ) -> list[_Dseqrecord]:
+    def assemble_insertion(self, only_adjacent_edges: bool = False) -> list[Dseqrecord]:
         """Assemble insertion constructs, from assemblies returned by self.get_insertion_assemblies."""
         assemblies = self.get_insertion_assemblies(only_adjacent_edges)
         return [assemble(self.fragments, a, is_insertion=True) for a in assemblies]
     def get_locations_on_fragments(self) -> dict[int, dict[str, list[Location]]]:
         """Get a dictionary where the keys are the nodes in the graph, and the values are dictionaries with keys
-        `left`, `right`, containing (for each fragment) the locations where the fragment is joined to another fragment on its left
-        and right side. The values in `left` and `right` are often the same, except in restriction-ligation with partial overlap enabled,
+        ``left``, ``right``, containing (for each fragment) the locations where the fragment is joined to another fragment on its left
+        and right side. The values in ``left`` and ``right`` are often the same, except in restriction-ligation with partial overlap enabled,
         where we can end up with a situation like this:
         GGTCTCCCCAATT and aGGTCTCCAACCAA as fragments
@@ -1651,13 +1688,14 @@ class Assembly:
         aGGTCTCCxxCCAATT
         tCCAGAGGTTGGxxAA
-        Would return
-        {
-            1: {'left': [7:9], 'right': [9:11]},
-            2: {'left': [8:10], 'right': [10:12]},
-            -1: {'left': [2:4], 'right': [4:6]},
-            -2: {'left': [2:4], 'right': [4:6]}
-        }
+        Would return::
+            {
+                1: {'left': [7:9], 'right': [9:11]},
+                2: {'left': [8:10], 'right': [10:12]},
+                -1: {'left': [2:4], 'right': [4:6]},
+                -2: {'left': [2:4], 'right': [4:6]}
+            }
         """
@@ -1671,10 +1709,10 @@ class Assembly:
                         if edge_location not in this_dict[key]:
                             this_dict[key].append(edge_location)
             this_dict["left"] = sorted(
-                this_dict["left"], key=lambda x: _location_boundaries(x)[0]
+                this_dict["left"], key=lambda x: location_boundaries(x)[0]
             )
             this_dict["right"] = sorted(
-                this_dict["right"], key=lambda x: _location_boundaries(x)[0]
+                this_dict["right"], key=lambda x: location_boundaries(x)[0]
             )
             locations_on_fragments[node] = this_dict
@@ -1686,10 +1724,10 @@ class Assembly:
         and prevent including partially digested fragments. For example, imagine the following fragment being an input for a digestion
         and ligation assembly, where the enzyme cuts at the sites indicated by the vertical lines:
-        ```
-                 x       y       z
-          -------|-------|-------|---------
-        ```
+        ::
+                     x       y       z
+              -------|-------|-------|---------
         We would only want assemblies that contain subfragments start-x, x-y, y-z, z-end, and not start-x, y-end, for instance.
         The latter would indicate that the fragment was partially digested.
@@ -1721,7 +1759,7 @@ class Assembly:
             pairs = list()
             for pair in zip(left, right):
-                pairs += list(_itertools.product(*pair))
+                pairs += list(itertools.product(*pair))
             allowed_location_pairs[node] = pairs
         fragment_assembly = edge_representation2subfragment_representation(
@@ -1734,7 +1772,7 @@ class Assembly:
     def __repr__(self):
         # https://pyformat.info
-        return _pretty_str(
+        return ps(
             "Assembly\n"
             "fragments..: {sequences}\n"
             "limit(bp)..: {limit}\n"
@@ -1750,12 +1788,12 @@ class Assembly:
 class PCRAssembly(Assembly):
     """
-    An assembly that represents a PCR, where `fragments` is a list of primer, template, primer (in that order).
-    It always uses the `primer_template_overlap` algorithm and accepts the `mismatches` argument to indicate
+    An assembly that represents a PCR, where ``fragments`` is a list of primer, template, primer (in that order).
+    It always uses the ``primer_template_overlap`` algorithm and accepts the ``mismatches`` argument to indicate
     the number of mismatches allowed in the overlap. Only supports substitution mismatches, not indels.
     """
-    def __init__(self, frags: list[_Dseqrecord | _Primer], limit=25, mismatches=0):
+    def __init__(self, frags: list[Dseqrecord | Primer], limit=25, mismatches=0):
         value_error = ValueError(
             "PCRAssembly assembly must be initialised with a list/tuple of primer, template, primer"
@@ -1765,15 +1803,15 @@ class PCRAssembly(Assembly):
         # Validate the inputs: should be a series of primer, template, primer
         wrong_fragment_class = (
-            not isinstance(frags[0], _Primer),
-            isinstance(frags[1], _Primer),
-            not isinstance(frags[2], _Primer),
+            not isinstance(frags[0], Primer),
+            isinstance(frags[1], Primer),
+            not isinstance(frags[2], Primer),
         )
         if any(wrong_fragment_class):
             raise value_error
         # TODO: allow for the same fragment to be included more than once?
-        self.G = _nx.MultiDiGraph()
+        self.G = nx.MultiDiGraph()
         # Add positive and negative nodes for forward and reverse fragments
         self.G.add_nodes_from((i + 1, {"seq": f}) for (i, f) in enumerate(frags))
         self.G.add_nodes_from(
@@ -1786,8 +1824,8 @@ class PCRAssembly(Assembly):
             # primer, template, primer
             p1, t, p2 = (i + 1, i + 2, i + 3)
             primer_ids += [p1, p2]
-            pairs += list(_itertools.product([p1, p2], [t, -t]))
-            pairs += list(_itertools.product([t, -t], [-p1, -p2]))
+            pairs += list(itertools.product([p1, p2], [t, -t]))
+            pairs += list(itertools.product([t, -t], [-p1, -p2]))
         for u, v in pairs:
             u_seq = self.G.nodes[u]["seq"]
@@ -1826,20 +1864,33 @@ class PCRAssembly(Assembly):
             "get_insertion_assemblies not implemented for PCR assemblies"
         )
+    def assemble_linear(
+        self, only_adjacent_edges: bool = False, max_assemblies: int = 50
+    ) -> list[Dseqrecord]:
+        """
+        Overrides the parent method to ensure that the 5' of the crick strand of the product matches the
+        sequence of the reverse primer. This is important when using primers with dUTP (for USER cloning).
+        """
+        results = super().assemble_linear(only_adjacent_edges, max_assemblies)
+        for result in results:
+            rp = self.fragments[2]
+            result.seq = result.seq[: -len(rp)] + Dseq(str(rp.seq.rc()))
+        return results
 class SingleFragmentAssembly(Assembly):
     """
     An assembly that represents the circularisation or splicing of a single fragment.
     """
-    def __init__(self, frags: [_Dseqrecord], limit=25, algorithm=common_sub_strings):
+    def __init__(self, frags: [Dseqrecord], limit=25, algorithm=common_sub_strings):
         if len(frags) != 1:
             raise ValueError(
                 "SingleFragmentAssembly assembly must be initialised with a single fragment"
             )
         # TODO: allow for the same fragment to be included more than once?
-        self.G = _nx.MultiDiGraph()
+        self.G = nx.MultiDiGraph()
         frag = frags[0]
         # Add positive and negative nodes for forward and reverse fragments
         self.G.add_node(1, seq=frag)
@@ -1890,8 +1941,8 @@ class SingleFragmentAssembly(Assembly):
             if x[0][2] == x[0][3]:
                 return False
             # We don't want to get overlap only (e.g. GAATTCcatGAATTC giving GAATTC)
-            left_start, _ = _location_boundaries(x[0][2])
-            _, right_end = _location_boundaries(x[0][3])
+            left_start, _ = location_boundaries(x[0][2])
+            _, right_end = location_boundaries(x[0][3])
             if left_start == 0 and right_end == len(self.fragments[0]):
                 return False
             return True
@@ -1914,18 +1965,19 @@ class SingleFragmentAssembly(Assembly):
 def common_function_assembly_products(
-    frags: list[_Dseqrecord],
+    frags: list[Dseqrecord],
     limit: int | None,
     algorithm: Callable,
     circular_only: bool,
     filter_results_function: Callable | None = None,
-) -> list[_Dseqrecord]:
+    only_adjacent_edges: bool = False,
+) -> list[Dseqrecord]:
     """Common function to avoid code duplication. Could be simplified further
     once SingleFragmentAssembly and Assembly are merged.
     Parameters
     ----------
-    frags : list[_Dseqrecord]
+    frags : list[Dseqrecord]
         List of DNA fragments to assemble
     limit : int or None
         Minimum overlap length required, or None if not applicable
@@ -1933,10 +1985,14 @@ def common_function_assembly_products(
         Function that determines valid overlaps between fragments
     circular_only : bool
         If True, only return circular assemblies
+    filter_results_function : Callable or None
+        Function that filters the results
+    only_adjacent_edges : bool
+        If True, only return assemblies that use only adjacent edges
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of assembled DNA molecules
     """
     if len(frags) == 1:
@@ -1945,10 +2001,10 @@ def common_function_assembly_products(
         asm = Assembly(
             frags, limit, algorithm, use_fragment_order=False, use_all_fragments=True
         )
-    output_assemblies = asm.get_circular_assemblies()
+    output_assemblies = asm.get_circular_assemblies(only_adjacent_edges)
     if not circular_only and len(frags) > 1:
         output_assemblies += filter_linear_subassemblies(
-            asm.get_linear_assemblies(), output_assemblies, frags
+            asm.get_linear_assemblies(only_adjacent_edges), output_assemblies, frags
         )
     if not circular_only and len(frags) == 1:
         output_assemblies += asm.get_insertion_assemblies()
@@ -1959,14 +2015,29 @@ def common_function_assembly_products(
     return [assemble(frags, a) for a in output_assemblies]
+def _recast_sources(
+    products: list[Dseqrecord], source_cls, **extra_fields
+) -> list[Dseqrecord]:
+    """Recast the `source` of each product to `source_cls` with optional extras.
+    This avoids repeating the same for-loop across many assembly functions.
+    """
+    for prod in products:
+        prod.source = source_cls(
+            **prod.source.to_unserialized_dict(),
+            **extra_fields,
+        )
+    return products
 def gibson_assembly(
-    frags: list[_Dseqrecord], limit: int = 25, circular_only: bool = False
-) -> list[_Dseqrecord]:
+    frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
+) -> list[Dseqrecord]:
     """Returns the products for Gibson assembly.
     Parameters
     ----------
-    frags : list[_Dseqrecord]
+    frags : list[Dseqrecord]
         List of DNA fragments to assemble
     limit : int, optional
         Minimum overlap length required, by default 25
@@ -1975,23 +2046,25 @@ def gibson_assembly(
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of assembled DNA molecules
     """
-    return common_function_assembly_products(
+    products = common_function_assembly_products(
         frags, limit, gibson_overlap, circular_only
     )
+    return _recast_sources(products, GibsonAssemblySource)
 def in_fusion_assembly(
-    frags: list[_Dseqrecord], limit: int = 25, circular_only: bool = False
-) -> list[_Dseqrecord]:
+    frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
+) -> list[Dseqrecord]:
     """Returns the products for in-fusion assembly. This is the same as Gibson
     assembly, but with a different name.
     Parameters
     ----------
-    frags : list[_Dseqrecord]
+    frags : list[Dseqrecord]
         List of DNA fragments to assemble
     limit : int, optional
         Minimum overlap length required, by default 25
@@ -2000,21 +2073,23 @@ def in_fusion_assembly(
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of assembled DNA molecules
     """
-    return gibson_assembly(frags, limit)
+    products = gibson_assembly(frags, limit)
+    return _recast_sources(products, InFusionSource)
 def fusion_pcr_assembly(
-    frags: list[_Dseqrecord], limit: int = 25, circular_only: bool = False
-) -> list[_Dseqrecord]:
+    frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
+) -> list[Dseqrecord]:
     """Returns the products for fusion PCR assembly. This is the same as Gibson
     assembly, but with a different name.
     Parameters
     ----------
-    frags : list[_Dseqrecord]
+    frags : list[Dseqrecord]
         List of DNA fragments to assemble
     limit : int, optional
         Minimum overlap length required, by default 25
@@ -2023,20 +2098,21 @@ def fusion_pcr_assembly(
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of assembled DNA molecules
     """
-    return gibson_assembly(frags, limit)
+    products = gibson_assembly(frags, limit)
+    return _recast_sources(products, OverlapExtensionPCRLigationSource)
 def in_vivo_assembly(
-    frags: list[_Dseqrecord], limit: int = 25, circular_only: bool = False
-) -> list[_Dseqrecord]:
+    frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
+) -> list[Dseqrecord]:
     """Returns the products for in vivo assembly (IVA), which relies on homologous recombination between the fragments.
     Parameters
     ----------
-    frags : list[_Dseqrecord]
+    frags : list[Dseqrecord]
         List of DNA fragments to assemble
     limit : int, optional
         Minimum overlap length required, by default 25
@@ -2045,30 +2121,32 @@ def in_vivo_assembly(
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of assembled DNA molecules
     """
-    return common_function_assembly_products(
+    products = common_function_assembly_products(
         frags, limit, common_sub_strings, circular_only
     )
+    return _recast_sources(products, InVivoAssemblySource)
 def restriction_ligation_assembly(
-    frags: list[_Dseqrecord],
-    enzymes: list["_AbstractCut"],
+    frags: list[Dseqrecord],
+    enzymes: list["AbstractCut"],
     allow_blunt: bool = True,
     circular_only: bool = False,
-) -> list[_Dseqrecord]:
+) -> list[Dseqrecord]:
     """Returns the products for restriction ligation assembly:
-    * Finds cutsites in the fragments
-    * Finds all products that could be assembled by ligating the fragments based on those cutsites
-    * Will NOT return products that combine an existing end with an end generated by the same enzyme (see example below)
+    - Finds cutsites in the fragments
+    - Finds all products that could be assembled by ligating the fragments based on those cutsites
+    - Will NOT return products that combine an existing end with an end generated by the same enzyme (see example below)
     Parameters
     ----------
-    frags : list[_Dseqrecord]
+    frags : list[Dseqrecord]
         List of DNA fragments to assemble
-    enzymes : list[_AbstractCut]
+    enzymes : list[AbstractCut]
         List of restriction enzymes to use
     allow_blunt : bool, optional
         If True, allow blunt end ligations, by default True
@@ -2077,15 +2155,15 @@ def restriction_ligation_assembly(
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of assembled DNA molecules
     Examples
     --------
     In the example below, we plan to assemble a plasmid from a backbone and an insert, using the EcoRI and SalI enzymes.
-    Note how 2 circular products are returned, one contains the insert (`acgt`)
-    and the desired part of the backbone (`cccccc`), the other contains the
-    reversed insert (`tgga`) and the cut-out part of the backbone (`aaa`).
+    Note how 2 circular products are returned, one contains the insert (``acgt``)
+    and the desired part of the backbone (``cccccc``), the other contains the
+    reversed insert (``tgga``) and the cut-out part of the backbone (``aaa``).
     >>> from pydna.assembly2 import restriction_ligation_assembly
     >>> from pydna.dseqrecord import Dseqrecord
@@ -2119,28 +2197,33 @@ def restriction_ligation_assembly(
     TTAAGtttC
     """
-    def algo(x, y, _l):
+    def algorithm_fn(x, y, _l):
         # By default, we allow blunt ends
         return restriction_ligation_overlap(x, y, enzymes, False, allow_blunt)
-    return common_function_assembly_products(frags, None, algo, circular_only)
+    products = common_function_assembly_products(
+        frags, None, algorithm_fn, circular_only, only_adjacent_edges=True
+    )
+    return _recast_sources(
+        products, RestrictionAndLigationSource, restriction_enzymes=enzymes
+    )
 def golden_gate_assembly(
-    frags: list[_Dseqrecord],
-    enzymes: list["_AbstractCut"],
+    frags: list[Dseqrecord],
+    enzymes: list["AbstractCut"],
     allow_blunt: bool = True,
     circular_only: bool = False,
-) -> list[_Dseqrecord]:
+) -> list[Dseqrecord]:
     """Returns the products for Golden Gate assembly. This is the same as
     restriction ligation assembly, but with a different name. Check the documentation
-    for `restriction_ligation_assembly` for more details.
+    for ``restriction_ligation_assembly`` for more details.
     Parameters
     ----------
-    frags : list[_Dseqrecord]
+    frags : list[Dseqrecord]
         List of DNA fragments to assemble
-    enzymes : list[_AbstractCut]
+    enzymes : list[AbstractCut]
         List of restriction enzymes to use
     allow_blunt : bool, optional
         If True, allow blunt end ligations, by default True
@@ -2149,30 +2232,30 @@ def golden_gate_assembly(
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of assembled DNA molecules
     Examples
     --------
-    See the example for `restriction_ligation_assembly`.
+    See the example for ``restriction_ligation_assembly``.
     """
     return restriction_ligation_assembly(frags, enzymes, allow_blunt, circular_only)
 def ligation_assembly(
-    frags: list[_Dseqrecord],
+    frags: list[Dseqrecord],
     allow_blunt: bool = False,
     allow_partial_overlap: bool = False,
     circular_only: bool = False,
-) -> list[_Dseqrecord]:
+) -> list[Dseqrecord]:
     """Returns the products for ligation assembly, as inputs pass the fragments (digested if needed) that
     will be ligated.
-    For most cases, you probably should use `restriction_ligation_assembly` instead.
+    For most cases, you probably should use ``restriction_ligation_assembly`` instead.
     Parameters
     ----------
-    frags : list[_Dseqrecord]
+    frags : list[Dseqrecord]
         List of DNA fragments to assemble
     allow_blunt : bool, optional
         If True, allow blunt end ligations, by default False
@@ -2183,7 +2266,7 @@ def ligation_assembly(
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of assembled DNA molecules
@@ -2215,11 +2298,14 @@ def ligation_assembly(
         return sticky_end_sub_strings(x, y, allow_partial_overlap)
     if allow_blunt:
-        algo = combine_algorithms(sticky_end_algorithm, blunt_overlap)
+        algorithm_fn = combine_algorithms(sticky_end_algorithm, blunt_overlap)
     else:
-        algo = sticky_end_algorithm
+        algorithm_fn = sticky_end_algorithm
-    return common_function_assembly_products(frags, None, algo, circular_only)
+    products = common_function_assembly_products(
+        frags, None, algorithm_fn, circular_only
+    )
+    return _recast_sources(products, LigationSource)
 def assembly_is_multi_site(asm: list[EdgeRepresentationAssembly]) -> bool:
@@ -2235,20 +2321,20 @@ def assembly_is_multi_site(asm: list[EdgeRepresentationAssembly]) -> bool:
 def gateway_assembly(
-    frags: list[_Dseqrecord],
-    reaction_type: str,
+    frags: list[Dseqrecord],
+    reaction_type: Literal["BP", "LR"],
     greedy: bool = False,
     circular_only: bool = False,
     multi_site_only: bool = False,
-) -> list[_Dseqrecord]:
+) -> list[Dseqrecord]:
     """Returns the products for Gateway assembly / Gateway cloning.
     Parameters
     ----------
-    frags : list[_Dseqrecord]
+    frags : list[Dseqrecord]
         List of DNA fragments to assemble
-    reaction_type : str
-        Type of Gateway reaction, either 'BP' or 'LR'
+    reaction_type : Literal['BP', 'LR']
+        Type of Gateway reaction
     greedy : bool, optional
         If True, use greedy gateway consensus sites, by default False
     circular_only : bool, optional
@@ -2261,7 +2347,7 @@ def gateway_assembly(
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of assembled DNA molecules
@@ -2288,9 +2374,9 @@ def gateway_assembly(
     >>> len(products_LR)
     2
-    Now let's understand the `multi_site_only` parameter. Let's consider a case where we are swapping fragments
+    Now let's understand the ``multi_site_only`` parameter. Let's consider a case where we are swapping fragments
     between two plasmids using an LR reaction. Experimentally, we expect to obtain two plasmids, resulting from the
-    swapping between the two att sites. That's what we get if we set `multi_site_only` to True.
+    swapping between the two att sites. That's what we get if we set ``multi_site_only`` to True.
     >>> attL2 = 'aaataatgattttattttgactgatagtgacctgttcgttgcaacaaattgataagcaatgctttcttataatgccaactttgtacaagaaagctg'
     >>> attR2 = 'accactttgtacaagaaagctgaacgagaaacgtaaaatgatataaatatcaatatattaaattagattttgcataaaaaacagactacataatactgtaaaacacaacatatccagtcactatg'
@@ -2300,7 +2386,7 @@ def gateway_assembly(
     >>> len(products)
     2
-    However, if we set `multi_site_only` to False, we get 4 products, which also include the intermediate products
+    However, if we set ``multi_site_only`` to False, we get 4 products, which also include the intermediate products
     where the two plasmids are combined into a single one through recombination of a single att site. This is an
     intermediate of the reaction, and typically we don't want it:
@@ -2316,13 +2402,19 @@ def gateway_assembly(
             f"Invalid reaction type: {reaction_type}, can only be BP or LR"
         )
-    def algo(x, y, _l):
+    def algorithm_fn(x, y, _l):
         return gateway_overlap(x, y, reaction_type, greedy)
     filter_results_function = None if not multi_site_only else assembly_is_multi_site
     products = common_function_assembly_products(
-        frags, None, algo, circular_only, filter_results_function
+        frags, None, algorithm_fn, circular_only, filter_results_function
+    )
+    products = _recast_sources(
+        products,
+        GatewaySource,
+        reaction_type=reaction_type,
+        greedy=greedy,
     )
     if len(products) == 0:
@@ -2342,13 +2434,13 @@ def gateway_assembly(
 def common_function_integration_products(
-    frags: list[_Dseqrecord], limit: int | None, algorithm: Callable
-) -> list[_Dseqrecord]:
+    frags: list[Dseqrecord], limit: int | None, algorithm: Callable
+) -> list[Dseqrecord]:
     """Common function to avoid code duplication for integration products.
     Parameters
     ----------
-    frags : list[_Dseqrecord]
+    frags : list[Dseqrecord]
         List of DNA fragments to integrate
     limit : int or None
         Minimum overlap length required, or None if not applicable
@@ -2357,7 +2449,7 @@ def common_function_integration_products(
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of integrated DNA molecules
     """
     if len(frags) == 1:
@@ -2378,27 +2470,27 @@ def common_function_integration_products(
 def common_handle_insertion_fragments(
-    genome: _Dseqrecord, inserts: list[_Dseqrecord]
-) -> list[_Dseqrecord]:
+    genome: Dseqrecord, inserts: list[Dseqrecord]
+) -> list[Dseqrecord]:
     """Common function to handle / validate insertion fragments.
     Parameters
     ----------
-    genome : _Dseqrecord
+    genome : Dseqrecord
         Target genome sequence
-    inserts : list[_Dseqrecord] or _Dseqrecord
+    inserts : list[Dseqrecord] or Dseqrecord
         DNA fragment(s) to insert
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List containing genome and insert fragments
     """
-    if not isinstance(genome, _Dseqrecord):
+    if not isinstance(genome, Dseqrecord):
         raise ValueError("Genome must be a Dseqrecord object")
     if not isinstance(inserts, list) or not all(
-        isinstance(f, _Dseqrecord) for f in inserts
+        isinstance(f, Dseqrecord) for f in inserts
     ):
         raise ValueError("Inserts must be a list of Dseqrecord objects")
@@ -2409,13 +2501,13 @@ def common_handle_insertion_fragments(
 def common_function_excision_products(
-    genome: _Dseqrecord, limit: int | None, algorithm: Callable
-) -> list[_Dseqrecord]:
+    genome: Dseqrecord, limit: int | None, algorithm: Callable
+) -> list[Dseqrecord]:
     """Common function to avoid code duplication for excision products.
     Parameters
     ----------
-    genome : _Dseqrecord
+    genome : Dseqrecord
         Target genome sequence
     limit : int or None
         Minimum overlap length required, or None if not applicable
@@ -2424,7 +2516,7 @@ def common_function_excision_products(
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of excised DNA molecules
     """
     asm = SingleFragmentAssembly([genome], limit, algorithm)
@@ -2432,25 +2524,25 @@ def common_function_excision_products(
 def homologous_recombination_integration(
-    genome: _Dseqrecord,
-    inserts: list[_Dseqrecord],
+    genome: Dseqrecord,
+    inserts: list[Dseqrecord],
     limit: int = 40,
-) -> list[_Dseqrecord]:
+) -> list[Dseqrecord]:
     """Returns the products resulting from the integration of an insert (or inserts joined
     through in vivo recombination) into the genome through homologous recombination.
     Parameters
     ----------
-    genome : _Dseqrecord
+    genome : Dseqrecord
         Target genome sequence
-    inserts : list[_Dseqrecord]
+    inserts : list[Dseqrecord]
         DNA fragment(s) to insert
     limit : int, optional
         Minimum homology length required, by default 40
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of integrated DNA molecules
@@ -2479,25 +2571,28 @@ def homologous_recombination_integration(
     """
     fragments = common_handle_insertion_fragments(genome, inserts)
-    return common_function_integration_products(fragments, limit, common_sub_strings)
+    products = common_function_integration_products(
+        fragments, limit, common_sub_strings
+    )
+    return _recast_sources(products, HomologousRecombinationSource)
 def homologous_recombination_excision(
-    genome: _Dseqrecord, limit: int = 40
-) -> list[_Dseqrecord]:
+    genome: Dseqrecord, limit: int = 40
+) -> list[Dseqrecord]:
     """Returns the products resulting from the excision of a fragment from the genome through
     homologous recombination.
     Parameters
     ----------
-    genome : _Dseqrecord
+    genome : Dseqrecord
         Target genome sequence
     limit : int, optional
         Minimum homology length required, by default 40
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List containing excised plasmid and remaining genome sequence
     Examples
@@ -2515,27 +2610,28 @@ def homologous_recombination_excision(
     >>> products
     [Dseqrecord(o25), Dseqrecord(-32)]
     """
-    return common_function_excision_products(genome, limit, common_sub_strings)
+    products = common_function_excision_products(genome, limit, common_sub_strings)
+    return _recast_sources(products, HomologousRecombinationSource)
 def cre_lox_integration(
-    genome: _Dseqrecord, inserts: list[_Dseqrecord]
-) -> list[_Dseqrecord]:
+    genome: Dseqrecord, inserts: list[Dseqrecord]
+) -> list[Dseqrecord]:
     """Returns the products resulting from the integration of an insert (or inserts joined
     through cre-lox recombination among them) into the genome through cre-lox integration.
-    Also works with lox66 and lox71 (see `pydna.cre_lox` for more details).
+    Also works with lox66 and lox71 (see ``pydna.cre_lox`` for more details).
     Parameters
     ----------
-    genome : _Dseqrecord
+    genome : Dseqrecord
         Target genome sequence
-    inserts : list[_Dseqrecord] or _Dseqrecord
+    inserts : list[Dseqrecord] or Dseqrecord
         DNA fragment(s) to insert
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List of integrated DNA molecules
     Examples
@@ -2574,20 +2670,21 @@ def cre_lox_integration(
     """
     fragments = common_handle_insertion_fragments(genome, inserts)
-    return common_function_integration_products(fragments, None, cre_loxP_overlap)
+    products = common_function_integration_products(fragments, None, cre_loxP_overlap)
+    return _recast_sources(products, CreLoxRecombinationSource)
-def cre_lox_excision(genome: _Dseqrecord) -> list[_Dseqrecord]:
+def cre_lox_excision(genome: Dseqrecord) -> list[Dseqrecord]:
     """Returns the products for CRE-lox excision.
     Parameters
     ----------
-    genome : _Dseqrecord
+    genome : Dseqrecord
         Target genome sequence
     Returns
     -------
-    list[_Dseqrecord]
+    list[Dseqrecord]
         List containing excised plasmid and remaining genome sequence
     Examples
@@ -2624,4 +2721,152 @@ def cre_lox_excision(genome: _Dseqrecord) -> list[_Dseqrecord]:
     >>> res2
     [Dseqrecord(o39), Dseqrecord(-45)]
     """
-    return common_function_excision_products(genome, None, cre_loxP_overlap)
+    products = common_function_excision_products(genome, None, cre_loxP_overlap)
+    return _recast_sources(products, CreLoxRecombinationSource)
+def crispr_integration(
+    genome: Dseqrecord,
+    inserts: list[Dseqrecord],
+    guides: list[Primer],
+    limit: int = 40,
+) -> list[Dseqrecord]:
+    """
+    Returns the products for CRISPR integration.
+    Parameters
+    ----------
+    genome : Dseqrecord
+        Target genome sequence
+    inserts : list[Dseqrecord]
+        DNA fragment(s) to insert
+    guides : list[Primer]
+        List of guide RNAs as Primer objects. This may change in the future.
+    limit : int, optional
+        Minimum overlap length required, by default 40
+    Returns
+    -------
+    list[Dseqrecord]
+        List of integrated DNA molecules
+    Examples
+    --------
+    >>> from pydna.dseqrecord import Dseqrecord
+    >>> from pydna.assembly2 import crispr_integration
+    >>> from pydna.primer import Primer
+    >>> genome = Dseqrecord("aaccggttcaatgcaaacagtaatgatggatgacattcaaagcac", name="genome")
+    >>> insert = Dseqrecord("aaccggttAAAAAAAAAttcaaagcac", name="insert")
+    >>> guide = Primer("ttcaatgcaaacagtaatga", name="guide")
+    >>> product, *_ = crispr_integration(genome, [insert], [guide], 8)
+    >>> product
+    Dseqrecord(-27)
+    """
+    if len(guides) == 0:
+        raise ValueError("At least one guide RNA is required for CRISPR integration")
+    # Get all the possible products from the homologous recombination integration
+    products = homologous_recombination_integration(genome, inserts, limit)
+    # Verify that the guides cut in the region that will be repaired
+    # First we collect the positions where the guides cut
+    guide_cuts = []
+    for guide in guides:
+        enzyme = cas9(str(guide.seq))
+        possible_cuts = genome.seq.get_cutsites(enzyme)
+        if len(possible_cuts) == 0:
+            raise ValueError(
+                f"Could not find Cas9 cutsite in the target sequence using the guide: {guide.name}"
+            )
+        # Keep only the position of the cut
+        possible_cuts = [cut[0] for (cut, _) in possible_cuts]
+        guide_cuts.append(possible_cuts)
+    # Then, we check it the possible homologous recombination products contain the cuts
+    # from the guides inside the repair region.
+    # We also add the used guides to each product. This is very important!
+    valid_products = []
+    for i, product in enumerate(products):
+        # The second element of product.source.input is conventionally the insert/repair fragment
+        # The other two (first and third) are the two bits of the genome
+        repair_start = location_boundaries(product.source.input[0].right_location)[0]
+        # Here we do +1 because the position of the cut marks the boundary (e.g. 0:10, 10:20 if a cut is at pos 10)
+        repair_end = location_boundaries(product.source.input[2].left_location)[1] + 1
+        repair_location = create_location(repair_start, repair_end, len(genome))
+        some_cuts_inside_repair = []
+        all_cuts_inside_repair = []
+        for cut_group in guide_cuts:
+            cuts_in_repair = [cut for cut in cut_group if cut in repair_location]
+            some_cuts_inside_repair.append(len(cuts_in_repair) != 0)
+            all_cuts_inside_repair.append(len(cuts_in_repair) == len(cut_group))
+        if all(some_cuts_inside_repair):
+            used_guides = [g for i, g in enumerate(guides) if all_cuts_inside_repair[i]]
+            # Add the used guides to the product <----- VERY IMPORTANT!
+            product.source.input.extend([SourceInput(sequence=g) for g in used_guides])
+            valid_products.append(product)
+            if not all(all_cuts_inside_repair):
+                raise ValueError(
+                    "Some guides cut outside the repair region, please check the guides"
+                )
+    if len(valid_products) != len(products):
+        warnings.warn(
+            "Some recombination products were discarded because they had off-target cuts",
+            category=UserWarning,
+            stacklevel=2,
+        )
+    return _recast_sources(valid_products, CRISPRSource)
+def pcr_assembly(
+    template: Dseqrecord,
+    fwd_primer: Primer,
+    rvs_primer: Primer,
+    add_primer_features: bool = False,
+    limit: int = 14,
+    mismatches: int = 0,
+) -> list[Dseqrecord]:
+    """Returns the products for PCR assembly.
+    Parameters
+    ----------
+    template : Dseqrecord
+        Template sequence
+    fwd_primer : Primer
+        Forward primer
+    rvs_primer : Primer
+        Reverse primer
+    add_primer_features : bool, optional
+        If True, add primer features to the product, by default False
+    limit : int, optional
+        Minimum overlap length required, by default 14
+    mismatches : int, optional
+        Maximum number of mismatches, by default 0
+    Returns
+    -------
+    list[Dseqrecord]
+        List of assembled DNA molecules
+    """
+    minimal_annealing = limit + mismatches
+    fragments = [fwd_primer, template, rvs_primer]
+    asm = PCRAssembly(
+        fragments,
+        limit=minimal_annealing,
+        mismatches=mismatches,
+    )
+    products = asm.assemble_linear()
+    # If both primers are the same, remove duplicates
+    if str(fwd_primer.seq).upper() == str(rvs_primer.seq).upper():
+        products = [p for p in products if not p.source.input[1].reverse_complemented]
+    if add_primer_features:
+        products = [annotate_primer_binding_sites(prod, fragments) for prod in products]
+    return _recast_sources(products, PCRSource, add_primer_features=add_primer_features)

pydna 5.5.3__py3-none-any.whl → 5.5.5__py3-none-any.whl

pydna 5.5.3py3-none-any.whl → 5.5.5py3-none-any.whl