pydna 5.5.4__py3-none-any.whl → 5.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydna/assembly2.py CHANGED
@@ -4,29 +4,29 @@ Improved implementation of the assembly module. To see a list of issues with the
4
4
  see [issues tagged with fixed-with-new-assembly-model](https://github.com/pydna-group/pydna/issues?q=is%3Aissue%20state%3Aopen%20label%3Afixed-with-new-assembly-model)
5
5
  """
6
6
 
7
- import networkx as _nx
8
- import itertools as _itertools
7
+ import networkx as nx
8
+ import itertools
9
9
  from Bio.SeqFeature import SimpleLocation, Location
10
- from Bio.Seq import reverse_complement
10
+
11
11
  from Bio.Restriction.Restriction import RestrictionBatch
12
12
  import regex
13
13
  import copy
14
14
 
15
15
  from pydna.utils import (
16
- shift_location as _shift_location,
16
+ shift_location,
17
17
  flatten,
18
- location_boundaries as _location_boundaries,
19
- locations_overlap as _locations_overlap,
18
+ location_boundaries,
19
+ locations_overlap,
20
20
  sum_is_sticky,
21
21
  limit_iterator,
22
22
  create_location,
23
23
  )
24
- from pydna._pretty import pretty_str as _pretty_str
24
+ from pydna._pretty import pretty_str as ps
25
25
  from pydna.common_sub_strings import common_sub_strings as common_sub_strings_str
26
- from pydna.dseqrecord import Dseqrecord as _Dseqrecord
27
- from pydna.dseq import Dseq as _Dseq
28
- from pydna.primer import Primer as _Primer
29
- from pydna.seqrecord import SeqRecord as _SeqRecord
26
+ from pydna.dseqrecord import Dseqrecord
27
+ from pydna.dseq import Dseq
28
+ from pydna.primer import Primer
29
+ from pydna.seqrecord import SeqRecord
30
30
  from pydna.types import (
31
31
  CutSiteType,
32
32
  # TODO: allow user to enforce multi-site
@@ -38,6 +38,7 @@ from pydna.types import (
38
38
  )
39
39
  from pydna.gateway import gateway_overlap, find_gateway_sites
40
40
  from pydna.cre_lox import cre_loxP_overlap
41
+ from pydna.alphabet import anneal_strands
41
42
 
42
43
  from typing import TYPE_CHECKING, Callable, Literal
43
44
  from pydna.opencloning_models import (
@@ -59,7 +60,7 @@ from pydna.crispr import cas9
59
60
  import warnings
60
61
 
61
62
  if TYPE_CHECKING: # pragma: no cover
62
- from Bio.Restriction import AbstractCut as _AbstractCut
63
+ from Bio.Restriction import AbstractCut
63
64
 
64
65
 
65
66
  def gather_overlapping_locations(
@@ -71,29 +72,29 @@ def gather_overlapping_locations(
71
72
  the output will be [(loc1, loc2), (loc3,)].
72
73
  """
73
74
  # Make a graph with all the locations as nodes
74
- G = _nx.Graph()
75
+ G = nx.Graph()
75
76
  for i, loc in enumerate(locs):
76
77
  G.add_node(i, location=loc)
77
78
 
78
79
  # Add edges between nodes that overlap
79
80
  for i in range(len(locs)):
80
81
  for j in range(i + 1, len(locs)):
81
- if _locations_overlap(locs[i], locs[j], fragment_length):
82
+ if locations_overlap(locs[i], locs[j], fragment_length):
82
83
  G.add_edge(i, j)
83
84
 
84
85
  # Get groups of overlapping locations
85
86
  groups = list()
86
- for loc_set in _nx.connected_components(G):
87
+ for loc_set in nx.connected_components(G):
87
88
  groups.append(tuple(locs[i] for i in loc_set))
88
89
 
89
90
  # Sort by location of the first element in each group (does not matter which since they are overlapping)
90
- groups.sort(key=lambda x: _location_boundaries(x[0])[0])
91
+ groups.sort(key=lambda x: location_boundaries(x[0])[0])
91
92
 
92
93
  return groups
93
94
 
94
95
 
95
96
  def ends_from_cutsite(
96
- cutsite: CutSiteType, seq: _Dseq
97
+ cutsite: CutSiteType, seq: Dseq
97
98
  ) -> tuple[tuple[str, str], tuple[str, str]]:
98
99
  """Get the sticky or blunt ends created by a restriction enzyme cut.
99
100
 
@@ -116,7 +117,7 @@ def ends_from_cutsite(
116
117
  and the sequence of the overhang. The first tuple is for the left end, second for the right end.
117
118
 
118
119
  >>> from Bio.Restriction import NotI
119
- >>> x = _Dseq("ctcgGCGGCCGCcagcggccg")
120
+ >>> x = Dseq("ctcgGCGGCCGCcagcggccg")
120
121
  >>> x.get_cutsites(NotI)
121
122
  [((6, -4), NotI)]
122
123
  >>> ends_from_cutsite(x.get_cutsites(NotI)[0], x)
@@ -143,8 +144,8 @@ def ends_from_cutsite(
143
144
 
144
145
 
145
146
  def restriction_ligation_overlap(
146
- seqx: _Dseqrecord,
147
- seqy: _Dseqrecord,
147
+ seqx: Dseqrecord,
148
+ seqy: Dseqrecord,
148
149
  enzymes=RestrictionBatch,
149
150
  partial=False,
150
151
  allow_blunt=False,
@@ -155,9 +156,9 @@ def restriction_ligation_overlap(
155
156
 
156
157
  Parameters
157
158
  ----------
158
- seqx : _Dseqrecord
159
+ seqx : Dseqrecord
159
160
  The first sequence
160
- seqy : _Dseqrecord
161
+ seqy : Dseqrecord
161
162
  The second sequence
162
163
  enzymes : RestrictionBatch
163
164
  The enzymes to use
@@ -211,7 +212,7 @@ def restriction_ligation_overlap(
211
212
  # if not seqy.circular:
212
213
  # cuts_y.append(((0, 0), None))
213
214
  matches = list()
214
- for cut_x, cut_y in _itertools.product(cuts_x, cuts_y):
215
+ for cut_x, cut_y in itertools.product(cuts_x, cuts_y):
215
216
  # A blunt end
216
217
  if allow_blunt and cut_x[0][1] == cut_y[0][1] == 0:
217
218
  matches.append((cut_x[0][0], cut_y[0][0], 0))
@@ -255,7 +256,7 @@ def combine_algorithms(*algorithms: AssemblyAlgorithmType) -> AssemblyAlgorithmT
255
256
 
256
257
 
257
258
  def blunt_overlap(
258
- seqx: _Dseqrecord, seqy: _Dseqrecord, limit=None
259
+ seqx: Dseqrecord, seqy: Dseqrecord, limit=None
259
260
  ) -> list[SequenceOverlap]:
260
261
  """
261
262
  Assembly algorithm to find blunt overlaps. Used for blunt ligation.
@@ -265,9 +266,9 @@ def blunt_overlap(
265
266
 
266
267
  Parameters
267
268
  ----------
268
- seqx : _Dseqrecord
269
+ seqx : Dseqrecord
269
270
  The first sequence
270
- seqy : _Dseqrecord
271
+ seqy : Dseqrecord
271
272
  The second sequence
272
273
  limit : int
273
274
  There for compatibility, but it is ignored
@@ -293,7 +294,7 @@ def blunt_overlap(
293
294
 
294
295
 
295
296
  def common_sub_strings(
296
- seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25
297
+ seqx: Dseqrecord, seqy: Dseqrecord, limit=25
297
298
  ) -> list[SequenceOverlap]:
298
299
  """
299
300
  Assembly algorithm to find common substrings of length == limit. see the docs of
@@ -356,7 +357,18 @@ def common_sub_strings(
356
357
  return [r for r in results if r not in shifted_matches]
357
358
 
358
359
 
359
- def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
360
+ def _get_trim_end_info(
361
+ end_info: tuple[str, str], trim_ends: str, is_five_prime: bool
362
+ ) -> int | None:
363
+ """Utility function to get the trim information for terminal_overlap."""
364
+ if end_info[0] == trim_ends:
365
+ return len(end_info[1]) if is_five_prime else len(end_info[1]) * -1
366
+ return 0 if is_five_prime else None
367
+
368
+
369
+ def terminal_overlap(
370
+ seqx: Dseqrecord, seqy: Dseqrecord, limit=25, trim_ends: None | str = None
371
+ ):
360
372
  """
361
373
  Assembly algorithm to find terminal overlaps (e.g. for Gibson assembly).
362
374
  The order matters, we want alignments like:
@@ -375,12 +387,15 @@ def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
375
387
 
376
388
  Parameters
377
389
  ----------
378
- seqx : _Dseqrecord
390
+ seqx : Dseqrecord
379
391
  The first sequence
380
- seqy : _Dseqrecord
392
+ seqy : Dseqrecord
381
393
  The second sequence
382
394
  limit : int
383
395
  Minimum length of the overlap
396
+ trim_ends : str
397
+ The ends to trim, either '5' or '3'
398
+ If None, no trimming is done
384
399
 
385
400
  Returns
386
401
  -------
@@ -388,32 +403,64 @@ def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
388
403
  A list of overlaps between the two sequences
389
404
 
390
405
  >>> from pydna.dseqrecord import Dseqrecord
391
- >>> from pydna.assembly2 import gibson_overlap
406
+ >>> from pydna.assembly2 import terminal_overlap
392
407
  >>> x = Dseqrecord("ttactaAAAAAA")
393
408
  >>> y = Dseqrecord("AAAAAAcgcacg")
394
- >>> gibson_overlap(x, y, limit=5)
409
+ >>> terminal_overlap(x, y, limit=5)
395
410
  [(6, 0, 6), (7, 0, 5)]
396
- >>> gibson_overlap(y, x, limit=5)
411
+ >>> terminal_overlap(y, x, limit=5)
412
+ []
413
+
414
+ Trimming the ends:
415
+ >>> from pydna.dseq import Dseq
416
+ >>> from pydna.dseqrecord import Dseqrecord
417
+ >>> from pydna.assembly2 import terminal_overlap
418
+ >>> x = Dseqrecord(Dseq.from_full_sequence_and_overhangs("aaaACGT", 0, 3))
419
+ >>> y = Dseqrecord(Dseq.from_full_sequence_and_overhangs("ACGTccc", 3, 0))
420
+ >>> terminal_overlap(x, y, limit=4)
421
+ [(3, 0, 4)]
422
+ >>> terminal_overlap(x, y, limit=4, trim_ends="5'")
423
+ [(3, 0, 4)]
424
+ >>> terminal_overlap(x, y, limit=4, trim_ends="3'")
397
425
  []
398
426
  """
399
427
 
400
- # Because Gibson enzymes remove 5' overhangs, we remove them from the sequence
401
- # when looking for homology, then we shift the location of the second fragment accordingly.
402
- # This is only relevant for linear fragments, so we don't need to worry about
403
- # shifting locations for circular fragments.
404
- trim_x_left = -seqx.seq.ovhg if seqx.seq.ovhg < 0 else 0
405
- trim_x_right = seqx.seq.watson_ovhg() if seqx.seq.watson_ovhg() < 0 else None
406
- trim_y_left = -seqy.seq.ovhg if seqy.seq.ovhg < 0 else 0
407
- trim_y_right = seqy.seq.watson_ovhg() if seqy.seq.watson_ovhg() < 0 else None
408
-
409
- stringx = str(seqx.seq[trim_x_left:trim_x_right]).upper()
410
- stringy = str(seqy.seq[trim_y_left:trim_y_right]).upper()
428
+ if trim_ends is not None and trim_ends not in ["5'", "3'"]:
429
+ raise ValueError("trim_ends must be '5' or '3'")
430
+
431
+ if trim_ends is None:
432
+ trim_x_left, trim_x_right, trim_y_left, trim_y_right = (0, None, 0, None)
433
+ stringx = str(seqx.seq).upper()
434
+ stringy = str(seqy.seq).upper()
435
+ else:
436
+ trim_x_right = _get_trim_end_info(
437
+ seqx.seq.three_prime_end(), trim_ends, is_five_prime=False
438
+ )
439
+ trim_y_left = _get_trim_end_info(
440
+ seqy.seq.five_prime_end(), trim_ends, is_five_prime=True
441
+ )
442
+
443
+ # I actually don't think these two are needed, since only the terminal
444
+ # join between x_right and y_left is tested, but maybe there is some edge-case
445
+ # that I am missing, so keeping them just in case.
446
+ trim_x_left = _get_trim_end_info(
447
+ seqx.seq.five_prime_end(), trim_ends, is_five_prime=True
448
+ )
449
+ trim_y_right = _get_trim_end_info(
450
+ seqy.seq.three_prime_end(), trim_ends, is_five_prime=False
451
+ )
452
+
453
+ stringx = str(seqx.seq[trim_x_left:trim_x_right]).upper()
454
+ stringy = str(seqy.seq[trim_y_left:trim_y_right]).upper()
455
+
411
456
  # We have to convert to list because we need to modify the matches
412
457
  matches = [
413
458
  list(m)
414
459
  for m in common_sub_strings_str(stringx, stringy, limit)
415
460
  if (m[1] == 0 and m[0] + m[2] == len(stringx))
416
461
  ]
462
+
463
+ # Shift the matches if the left end has been trimmed
417
464
  for match in matches:
418
465
  match[0] += trim_x_left
419
466
  match[1] += trim_y_left
@@ -422,7 +469,32 @@ def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
422
469
  return [tuple(m) for m in matches]
423
470
 
424
471
 
425
- def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = False):
472
+ def gibson_overlap(seqx: Dseqrecord, seqy: Dseqrecord, limit=25):
473
+ """
474
+ Assembly algorithm to find terminal overlaps for Gibson assembly.
475
+ It is a wrapper around terminal_overlap with trim_ends="5'".
476
+ """
477
+
478
+ return terminal_overlap(seqx, seqy, limit, trim_ends="5'")
479
+
480
+
481
+ def in_fusion_overlap(seqx: Dseqrecord, seqy: Dseqrecord, limit=25):
482
+ """
483
+ Assembly algorithm to find terminal overlaps for in-fusion assembly.
484
+ It is a wrapper around terminal_overlap with trim_ends="3'".
485
+ """
486
+ return terminal_overlap(seqx, seqy, limit, trim_ends="3'")
487
+
488
+
489
+ def pcr_fusion_overlap(seqx: Dseqrecord, seqy: Dseqrecord, limit=25):
490
+ """
491
+ Assembly algorithm to find terminal overlaps for PCR fusion assembly.
492
+ It is a wrapper around terminal_overlap with trim_ends=None.
493
+ """
494
+ return terminal_overlap(seqx, seqy, limit, trim_ends=None)
495
+
496
+
497
+ def sticky_end_sub_strings(seqx: Dseqrecord, seqy: Dseqrecord, limit: bool = False):
426
498
  """
427
499
  Assembly algorithm for ligation of sticky ends.
428
500
 
@@ -431,9 +503,9 @@ def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = F
431
503
 
432
504
  Parameters
433
505
  ----------
434
- seqx : _Dseqrecord
506
+ seqx : Dseqrecord
435
507
  The first sequence
436
- seqy : _Dseqrecord
508
+ seqy : Dseqrecord
437
509
  The second sequence
438
510
  limit : bool
439
511
  Whether to allow partial overlaps
@@ -466,6 +538,7 @@ def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = F
466
538
  [(4, 0, 2)]
467
539
 
468
540
  """
541
+
469
542
  overlap = sum_is_sticky(
470
543
  seqx.seq.three_prime_end(), seqy.seq.five_prime_end(), limit
471
544
  )
@@ -475,7 +548,7 @@ def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = F
475
548
 
476
549
 
477
550
  def zip_match_leftwards(
478
- seqx: _SeqRecord, seqy: _SeqRecord, match: SequenceOverlap
551
+ seqx: SeqRecord, seqy: SeqRecord, match: SequenceOverlap
479
552
  ) -> SequenceOverlap:
480
553
  """
481
554
  Starting from the rightmost edge of the match, return a new match encompassing the max
@@ -483,15 +556,15 @@ def zip_match_leftwards(
483
556
  than the limit or a shorter match if there are mismatches. This is convenient to maintain
484
557
  as many features as possible. It is used in PCR assembly.
485
558
 
486
- >>> seq = _Dseqrecord('AAAAACGTCCCGT')
487
- >>> primer = _Dseqrecord('ACGTCCCGT')
559
+ >>> seq = Dseqrecord('AAAAACGTCCCGT')
560
+ >>> primer = Dseqrecord('ACGTCCCGT')
488
561
  >>> match = (13, 9, 0) # an empty match at the end of each
489
562
  >>> zip_match_leftwards(seq, primer, match)
490
563
  (4, 0, 9)
491
564
 
492
565
  Works in circular molecules if the match spans the origin:
493
- >>> seq = _Dseqrecord('TCCCGTAAAAACG', circular=True)
494
- >>> primer = _Dseqrecord('ACGTCCCGT')
566
+ >>> seq = Dseqrecord('TCCCGTAAAAACG', circular=True)
567
+ >>> primer = Dseqrecord('ACGTCCCGT')
495
568
  >>> match = (6, 9, 0)
496
569
  >>> zip_match_leftwards(seq, primer, match)
497
570
  (10, 0, 9)
@@ -512,11 +585,11 @@ def zip_match_leftwards(
512
585
  # For those cases we shift by length, then go back
513
586
 
514
587
  end_on_x = match[0] + match[2]
515
- if isinstance(seqx, _Dseqrecord) and seqx.circular and end_on_x <= len(seqx):
588
+ if isinstance(seqx, Dseqrecord) and seqx.circular and end_on_x <= len(seqx):
516
589
  end_on_x += len(seqx)
517
590
 
518
591
  end_on_y = match[1] + match[2]
519
- if isinstance(seqy, _Dseqrecord) and seqy.circular and end_on_y <= len(seqy):
592
+ if isinstance(seqy, Dseqrecord) and seqy.circular and end_on_y <= len(seqy):
520
593
  end_on_y += len(seqy)
521
594
 
522
595
  count = 0
@@ -533,7 +606,7 @@ def zip_match_leftwards(
533
606
 
534
607
 
535
608
  def zip_match_rightwards(
536
- seqx: _Dseqrecord, seqy: _Dseqrecord, match: SequenceOverlap
609
+ seqx: Dseqrecord, seqy: Dseqrecord, match: SequenceOverlap
537
610
  ) -> SequenceOverlap:
538
611
  """Same as zip_match_leftwards, but towards the right."""
539
612
 
@@ -549,19 +622,19 @@ def zip_match_rightwards(
549
622
  return (start_on_x, start_on_y, count)
550
623
 
551
624
 
552
- def seqrecord2_uppercase_DNA_string(seqr: _SeqRecord) -> str:
625
+ def seqrecord2_uppercase_DNA_string(seqr: SeqRecord) -> str:
553
626
  """
554
627
  Transform a Dseqrecord to a sequence string where U is replaced by T, everything is upper case and
555
628
  circular sequences are repeated twice. This is used for PCR, to support primers with U's (e.g. for USER cloning).
556
629
  """
557
630
  out = str(seqr.seq).upper().replace("U", "T")
558
- if isinstance(seqr, _Dseqrecord) and seqr.circular:
631
+ if isinstance(seqr, Dseqrecord) and seqr.circular:
559
632
  return out * 2
560
633
  return out
561
634
 
562
635
 
563
636
  def primer_template_overlap(
564
- seqx: _Dseqrecord | _Primer, seqy: _Dseqrecord | _Primer, limit=25, mismatches=0
637
+ seqx: Dseqrecord | Primer, seqy: Dseqrecord | Primer, limit=25, mismatches=0
565
638
  ) -> list[SequenceOverlap]:
566
639
  """
567
640
  Assembly algorithm to find overlaps between a primer and a template. It accepts mismatches.
@@ -573,9 +646,9 @@ def primer_template_overlap(
573
646
 
574
647
  Parameters
575
648
  ----------
576
- seqx : _Dseqrecord | _Primer
649
+ seqx : Dseqrecord | Primer
577
650
  The primer
578
- seqy : _Dseqrecord | _Primer
651
+ seqy : Dseqrecord | Primer
579
652
  The template
580
653
  limit : int
581
654
  Minimum length of the overlap
@@ -604,11 +677,11 @@ def primer_template_overlap(
604
677
  []
605
678
  """
606
679
 
607
- if isinstance(seqx, _Primer) and isinstance(seqy, _Dseqrecord):
680
+ if isinstance(seqx, Primer) and isinstance(seqy, Dseqrecord):
608
681
  primer = seqx
609
682
  template = seqy
610
683
  reverse_primer = False
611
- elif isinstance(seqx, _Dseqrecord) and isinstance(seqy, _Primer):
684
+ elif isinstance(seqx, Dseqrecord) and isinstance(seqy, Primer):
612
685
  primer = seqy
613
686
  template = seqx
614
687
  reverse_primer = True
@@ -662,45 +735,8 @@ def primer_template_overlap(
662
735
  return list(sorted(out))
663
736
 
664
737
 
665
- def fill_left(seq: _Dseq) -> _Dseq:
666
- """Fill the left overhang of a sequence with the complementary sequence."""
667
- new_watson = seq.watson
668
- new_crick = seq.crick
669
-
670
- # Watson 5' overhang
671
- if seq.ovhg < 0:
672
- new_crick = new_crick + reverse_complement(seq.watson[: -seq.ovhg])
673
- # Crick 5' overhang
674
- elif seq.ovhg > 0:
675
- new_watson = reverse_complement(seq.crick[-seq.ovhg :]) + new_watson
676
-
677
- return _Dseq(new_watson, new_crick, 0)
678
-
679
-
680
- def fill_right(seq: _Dseq) -> _Dseq:
681
- """Fill the right overhang of a sequence with the complementary sequence."""
682
- new_watson = seq.watson
683
- new_crick = seq.crick
684
-
685
- # Watson 3' overhang
686
- watson_ovhg = seq.watson_ovhg()
687
- if watson_ovhg < 0:
688
- new_watson = new_watson + reverse_complement(seq.crick[:-watson_ovhg])
689
-
690
- # Crick 3' overhang
691
- elif watson_ovhg > 0:
692
- new_crick = reverse_complement(seq.watson[-watson_ovhg:]) + new_crick
693
-
694
- return _Dseq(new_watson, new_crick, seq.ovhg)
695
-
696
-
697
- def fill_dseq(seq: _Dseq) -> _Dseq:
698
- """Fill the overhangs of a sequence with the complementary sequence."""
699
- return fill_left(fill_right(seq))
700
-
701
-
702
738
  def reverse_complement_assembly(
703
- assembly: EdgeRepresentationAssembly, fragments: list[_Dseqrecord]
739
+ assembly: EdgeRepresentationAssembly, fragments: list[Dseqrecord]
704
740
  ) -> EdgeRepresentationAssembly:
705
741
  """Complement an assembly, i.e. reverse the order of the fragments and the orientation of the overlaps."""
706
742
  new_assembly = list()
@@ -714,7 +750,7 @@ def reverse_complement_assembly(
714
750
  def filter_linear_subassemblies(
715
751
  linear_assemblies: list[EdgeRepresentationAssembly],
716
752
  circular_assemblies: list[EdgeRepresentationAssembly],
717
- fragments: list[_Dseqrecord],
753
+ fragments: list[Dseqrecord],
718
754
  ) -> list[EdgeRepresentationAssembly]:
719
755
  """Remove linear assemblies which are sub-assemblies of circular assemblies"""
720
756
  all_circular_assemblies = circular_assemblies + [
@@ -773,7 +809,7 @@ def assembly2str_tuple(assembly: EdgeRepresentationAssembly) -> str:
773
809
 
774
810
 
775
811
  def assembly_has_mismatches(
776
- fragments: list[_Dseqrecord], assembly: EdgeRepresentationAssembly
812
+ fragments: list[Dseqrecord], assembly: EdgeRepresentationAssembly
777
813
  ) -> bool:
778
814
  """Check if an assembly has mismatches. This should never happen and if so it returns an error."""
779
815
  for u, v, loc_u, loc_v in assembly:
@@ -789,7 +825,7 @@ def assembly_has_mismatches(
789
825
 
790
826
 
791
827
  def assembly_is_circular(
792
- assembly: EdgeRepresentationAssembly, fragments: list[_Dseqrecord]
828
+ assembly: EdgeRepresentationAssembly, fragments: list[Dseqrecord]
793
829
  ) -> bool:
794
830
  """
795
831
  Based on the topology of the locations of an assembly, determine if it is circular.
@@ -798,22 +834,22 @@ def assembly_is_circular(
798
834
  if assembly[0][0] != assembly[-1][1]:
799
835
  return False
800
836
  elif (
801
- isinstance(fragments[abs(assembly[0][0]) - 1], _Dseqrecord)
837
+ isinstance(fragments[abs(assembly[0][0]) - 1], Dseqrecord)
802
838
  and fragments[abs(assembly[0][0]) - 1].circular
803
839
  ):
804
840
  return True
805
841
  else:
806
842
  return (
807
- _location_boundaries(assembly[0][2])[0]
808
- > _location_boundaries(assembly[-1][3])[0]
843
+ location_boundaries(assembly[0][2])[0]
844
+ > location_boundaries(assembly[-1][3])[0]
809
845
  )
810
846
 
811
847
 
812
848
  def assemble(
813
- fragments: list[_Dseqrecord],
849
+ fragments: list[Dseqrecord],
814
850
  assembly: EdgeRepresentationAssembly,
815
851
  is_insertion: bool = False,
816
- ) -> _Dseqrecord:
852
+ ) -> Dseqrecord:
817
853
  """Generate a Dseqrecord from an assembly and a list of fragments."""
818
854
 
819
855
  if is_insertion:
@@ -830,14 +866,15 @@ def assemble(
830
866
  u, v, loc_u, loc_v = asm_edge
831
867
  f_u = fragments[u - 1] if u > 0 else fragments[-u - 1].reverse_complement()
832
868
  f_v = fragments[v - 1] if v > 0 else fragments[-v - 1].reverse_complement()
833
- seq_u = str(loc_u.extract(f_u).seq).upper()
834
- seq_v = str(loc_v.extract(f_v).seq).upper()
835
- if seq_u != seq_v:
869
+ seq_u = str(loc_u.extract(f_u).seq)
870
+ seq_v = str(loc_v.extract(f_v).seq.rc())
871
+ # Test if seq_u and seq_v anneal
872
+ if not anneal_strands(seq_u, seq_v):
836
873
  raise ValueError("Mismatch in assembly")
837
874
 
838
875
  # We transform into Dseqrecords (for primers)
839
876
  dseqr_fragments = [
840
- f if isinstance(f, _Dseqrecord) else _Dseqrecord(f) for f in fragments
877
+ f if isinstance(f, Dseqrecord) else Dseqrecord(f) for f in fragments
841
878
  ]
842
879
  subfragments = get_assembly_subfragments(
843
880
  dseqr_fragments, subfragment_representation
@@ -845,42 +882,23 @@ def assemble(
845
882
 
846
883
  # Length of the overlaps between consecutive assembly fragments
847
884
  fragment_overlaps = [len(e[-1]) for e in assembly]
885
+ out_dseqrecord = subfragments.pop(0)
848
886
 
849
- out_dseqrecord = _Dseqrecord(subfragments[0])
887
+ for fragment, overlap in zip(subfragments, fragment_overlaps):
888
+ out_dseqrecord.seq = out_dseqrecord.seq.cast_to_ds_right()
889
+ out_dseqrecord.seq = out_dseqrecord.seq.exo1_end(overlap)
890
+ fragment.seq = fragment.seq.cast_to_ds_left()
891
+ fragment.seq = fragment.seq.exo1_front(overlap)
892
+ out_dseqrecord += fragment
850
893
 
851
- for fragment, overlap in zip(subfragments[1:], fragment_overlaps):
852
- # Shift the features of the right fragment to the left by ``overlap``
853
- new_features = [
854
- f._shift(len(out_dseqrecord) - overlap) for f in fragment.features
855
- ]
856
- # Join the left sequence including the overlap with the right sequence without the overlap
857
- # we use fill_right / fill_left so that it works for ligation of sticky ends
858
- out_dseqrecord = _Dseqrecord(
859
- fill_right(out_dseqrecord.seq) + fill_left(fragment.seq)[overlap:],
860
- features=out_dseqrecord.features + new_features,
861
- )
862
-
863
- # For circular assemblies, close the loop and wrap origin-spanning features
894
+ # For circular assemblies, process the fragment and loop
864
895
  if is_circular:
896
+ out_dseqrecord.seq = out_dseqrecord.seq.cast_to_ds_left()
897
+ out_dseqrecord.seq = out_dseqrecord.seq.cast_to_ds_right()
865
898
  overlap = fragment_overlaps[-1]
866
-
867
- # Special case for blunt circularisation
868
- if overlap == 0:
869
- out_dseqrecord = out_dseqrecord.looped()
870
- else:
871
- # Remove trailing overlap
872
- out_dseqrecord = _Dseqrecord(
873
- fill_dseq(out_dseqrecord.seq)[:-overlap],
874
- features=out_dseqrecord.features,
875
- circular=True,
876
- )
877
- for feature in out_dseqrecord.features:
878
- start, end = _location_boundaries(feature.location)
879
- if start >= len(out_dseqrecord) or end > len(out_dseqrecord):
880
- # Wrap around the origin
881
- feature.location = _shift_location(
882
- feature.location, 0, len(out_dseqrecord)
883
- )
899
+ out_dseqrecord.seq = out_dseqrecord.seq.exo1_front(overlap)
900
+ out_dseqrecord.seq = out_dseqrecord.seq.exo1_end(overlap)
901
+ out_dseqrecord = out_dseqrecord.looped()
884
902
 
885
903
  out_dseqrecord.source = AssemblySource.from_subfragment_representation(
886
904
  subfragment_representation, fragments, is_circular
@@ -889,8 +907,8 @@ def assemble(
889
907
 
890
908
 
891
909
  def annotate_primer_binding_sites(
892
- input_dseqr: _Dseqrecord, fragments: list[_Dseqrecord]
893
- ) -> _Dseqrecord:
910
+ input_dseqr: Dseqrecord, fragments: list[Dseqrecord]
911
+ ) -> Dseqrecord:
894
912
  """Annotate the primer binding sites in a Dseqrecord."""
895
913
  fwd, _, rvs = fragments
896
914
  start_rvs = len(input_dseqr) - len(rvs)
@@ -970,9 +988,9 @@ def subfragment_representation2edge_representation(
970
988
 
971
989
 
972
990
  def get_assembly_subfragments(
973
- fragments: list[_Dseqrecord],
991
+ fragments: list[Dseqrecord],
974
992
  subfragment_representation: SubFragmentRepresentationAssembly,
975
- ) -> list[_Dseqrecord]:
993
+ ) -> list[Dseqrecord]:
976
994
  """From the fragment representation returned by edge_representation2subfragment_representation, get the subfragments that are joined together.
977
995
 
978
996
  Subfragments are the slices of the fragments that are joined together
@@ -1013,19 +1031,26 @@ def get_assembly_subfragments(
1013
1031
 
1014
1032
 
1015
1033
  def extract_subfragment(
1016
- seq: _Dseqrecord, start_location: Location, end_location: Location
1017
- ) -> _Dseqrecord:
1034
+ seq: Dseqrecord, start_location: Location | None, end_location: Location | None
1035
+ ) -> Dseqrecord:
1018
1036
  """Extract a subfragment from a sequence for an assembly, given the start and end locations of the subfragment."""
1019
- start = 0 if start_location is None else _location_boundaries(start_location)[0]
1020
- end = None if end_location is None else _location_boundaries(end_location)[1]
1037
+
1038
+ if seq.circular and (start_location is None or end_location is None):
1039
+ raise ValueError(
1040
+ "Start and end locations cannot be None for circular sequences"
1041
+ )
1042
+ # This could be used to have consistent behaviour for circular sequences, where the start is arbitrary. However,
1043
+ # they should never get None, so this is not used.
1044
+ # if start_location is None:
1045
+ # start_location = end_location
1046
+ # elif end_location is None:
1047
+ # end_location = start_location
1048
+
1049
+ start = 0 if start_location is None else location_boundaries(start_location)[0]
1050
+ end = None if end_location is None else location_boundaries(end_location)[1]
1021
1051
 
1022
1052
  # Special case, some of it could be handled by better Dseqrecord slicing in the future
1023
- if (
1024
- seq.circular
1025
- and start_location is not None
1026
- and end_location is not None
1027
- and _locations_overlap(start_location, end_location, len(seq))
1028
- ):
1053
+ if seq.circular and locations_overlap(start_location, end_location, len(seq)):
1029
1054
  # The overhang is different for origin-spanning features, for instance
1030
1055
  # for a feature join{[12:13], [0:3]} in a sequence of length 13, the overhang
1031
1056
  # is -4, not 9
@@ -1035,7 +1060,7 @@ def extract_subfragment(
1035
1060
  ovhg = 0
1036
1061
  dummy_cut = ((start, ovhg), None)
1037
1062
  open_seq = seq.apply_cut(dummy_cut, dummy_cut)
1038
- return _Dseqrecord(fill_dseq(open_seq.seq), features=open_seq.features)
1063
+ return Dseqrecord(open_seq.seq.cast_to_ds(), features=open_seq.features)
1039
1064
 
1040
1065
  return seq[start:end]
1041
1066
 
@@ -1178,14 +1203,15 @@ class Assembly:
1178
1203
 
1179
1204
  def __init__(
1180
1205
  self,
1181
- frags: list[_Dseqrecord],
1206
+ frags: list[Dseqrecord],
1182
1207
  limit: int = 25,
1183
1208
  algorithm: AssemblyAlgorithmType = common_sub_strings,
1184
1209
  use_fragment_order: bool = True,
1185
1210
  use_all_fragments: bool = False,
1186
1211
  ):
1212
+
1187
1213
  # TODO: allow for the same fragment to be included more than once?
1188
- self.G = _nx.MultiDiGraph()
1214
+ self.G = nx.MultiDiGraph()
1189
1215
  # Add positive and negative nodes for forward and reverse fragments
1190
1216
  self.G.add_nodes_from((i + 1, {"seq": f}) for (i, f) in enumerate(frags))
1191
1217
  self.G.add_nodes_from(
@@ -1193,12 +1219,12 @@ class Assembly:
1193
1219
  )
1194
1220
 
1195
1221
  # Iterate over all possible combinations of fragments
1196
- fragment_pairs = _itertools.combinations(
1222
+ fragment_pairs = itertools.combinations(
1197
1223
  filter(lambda x: x > 0, self.G.nodes), 2
1198
1224
  )
1199
1225
  for i, j in fragment_pairs:
1200
1226
  # All the relative orientations of the fragments in the pair
1201
- for u, v in _itertools.product([i, -i], [j, -j]):
1227
+ for u, v in itertools.product([i, -i], [j, -j]):
1202
1228
  u_seq = self.G.nodes[u]["seq"]
1203
1229
  v_seq = self.G.nodes[v]["seq"]
1204
1230
  matches = algorithm(u_seq, v_seq, limit)
@@ -1216,7 +1242,7 @@ class Assembly:
1216
1242
  @classmethod
1217
1243
  def assembly_is_valid(
1218
1244
  cls,
1219
- fragments: list[_Dseqrecord | _Primer],
1245
+ fragments: list[Dseqrecord | Primer],
1220
1246
  assembly: EdgeRepresentationAssembly,
1221
1247
  is_circular: bool,
1222
1248
  use_all_fragments: bool,
@@ -1232,6 +1258,23 @@ class Assembly:
1232
1258
  if len(assembly) == 0:
1233
1259
  return False
1234
1260
 
1261
+ # Topology check -> Circular sequences cannot be first or last in a linear assembly.
1262
+ # For example, let's imagine aACGTc (linear) and gACGTc (circular).
1263
+ # It should not be possible to join them into a linear assembly. It's similar if we
1264
+ # think of a restriction-ligation assembly, example: aGAATTCc (linear) and gGAATTCc
1265
+ # (circular).
1266
+ # A linear product can be generated where the circular molecule is cut open, and one end
1267
+ # it joins the linear molecule and on the other it's free, but for now it's not a
1268
+ # relevant product and it's excluded.
1269
+ first_fragment = fragments[abs(assembly[0][0]) - 1]
1270
+ last_fragment = fragments[abs(assembly[-1][1]) - 1]
1271
+ if not is_circular and (
1272
+ isinstance(first_fragment, Dseqrecord)
1273
+ and first_fragment.circular
1274
+ or (isinstance(last_fragment, Dseqrecord) and last_fragment.circular)
1275
+ ):
1276
+ return False
1277
+
1235
1278
  if use_all_fragments and len(fragments) != len(
1236
1279
  set(flatten(map(abs, e[:2]) for e in assembly))
1237
1280
  ):
@@ -1269,8 +1312,8 @@ class Assembly:
1269
1312
  # Incompatible as described in figure above
1270
1313
  fragment = fragments[abs(v1) - 1]
1271
1314
  if (
1272
- isinstance(fragment, _Primer) or not fragment.circular
1273
- ) and _location_boundaries(start_location)[1] >= _location_boundaries(
1315
+ isinstance(fragment, Primer) or not fragment.circular
1316
+ ) and location_boundaries(start_location)[1] >= location_boundaries(
1274
1317
  end_location
1275
1318
  )[
1276
1319
  1
@@ -1294,8 +1337,8 @@ class Assembly:
1294
1337
  match: SequenceOverlap,
1295
1338
  u: int,
1296
1339
  v: int,
1297
- first: _Dseqrecord,
1298
- secnd: _Dseqrecord,
1340
+ first: Dseqrecord,
1341
+ secnd: Dseqrecord,
1299
1342
  ):
1300
1343
  """Add edges to the graph from a match returned by the ``algorithm`` function (see pydna.common_substrings). For
1301
1344
  format of edges (see documentation of the Assembly class).
@@ -1314,10 +1357,10 @@ class Assembly:
1314
1357
  else:
1315
1358
  # We use shift_location with 0 to wrap origin-spanning features
1316
1359
  locs = [
1317
- _shift_location(
1360
+ shift_location(
1318
1361
  SimpleLocation(x_start, x_start + length), 0, len(first)
1319
1362
  ),
1320
- _shift_location(
1363
+ shift_location(
1321
1364
  SimpleLocation(y_start, y_start + length), 0, len(secnd)
1322
1365
  ),
1323
1366
  ]
@@ -1352,7 +1395,7 @@ class Assembly:
1352
1395
  """
1353
1396
 
1354
1397
  # Copy the graph since we will add the begin and end mock nodes
1355
- G = _nx.MultiDiGraph(self.G)
1398
+ G = nx.MultiDiGraph(self.G)
1356
1399
  G.add_nodes_from(["begin", "end"])
1357
1400
 
1358
1401
  if self.use_fragment_order:
@@ -1390,7 +1433,7 @@ class Assembly:
1390
1433
  def node_path2assembly_list(
1391
1434
  self, cycle: list[int], circular: bool
1392
1435
  ) -> list[EdgeRepresentationAssembly]:
1393
- """Convert a node path in the format [1, 2, 3] (as returned by _nx.cycles.simple_cycles) to a list of all
1436
+ """Convert a node path in the format [1, 2, 3] (as returned by networkx.cycles.simple_cycles) to a list of all
1394
1437
  possible assemblies.
1395
1438
 
1396
1439
  There may be multiple assemblies for a given node path, if there are several edges connecting two nodes,
@@ -1404,11 +1447,11 @@ class Assembly:
1404
1447
  combine.append([(u, v, key) for key in self.G[u][v]])
1405
1448
  return [
1406
1449
  tuple(map(self.format_assembly_edge, x))
1407
- for x in _itertools.product(*combine)
1450
+ for x in itertools.product(*combine)
1408
1451
  ]
1409
1452
 
1410
1453
  def get_unique_linear_paths(
1411
- self, G_with_begin_end: _nx.MultiDiGraph, max_paths=10000
1454
+ self, G_with_begin_end: nx.MultiDiGraph, max_paths=10000
1412
1455
  ) -> list[list[int]]:
1413
1456
  """Get unique linear paths from the graph, removing those that contain the same node twice."""
1414
1457
  # We remove the begin and end nodes, and get all paths without edges
@@ -1419,8 +1462,8 @@ class Assembly:
1419
1462
  node_paths = [
1420
1463
  x[1:-1]
1421
1464
  for x in limit_iterator(
1422
- _nx.all_simple_paths(
1423
- _nx.DiGraph(G_with_begin_end),
1465
+ nx.all_simple_paths(
1466
+ nx.DiGraph(G_with_begin_end),
1424
1467
  "begin",
1425
1468
  "end",
1426
1469
  cutoff=(len(self.fragments) + 1),
@@ -1469,7 +1512,7 @@ class Assembly:
1469
1512
  sorted_cycles = map(
1470
1513
  circular_permutation_min_abs,
1471
1514
  limit_iterator(
1472
- _nx.cycles.simple_cycles(self.G, length_bound=len(self.fragments)),
1515
+ nx.cycles.simple_cycles(self.G, length_bound=len(self.fragments)),
1473
1516
  10000,
1474
1517
  ),
1475
1518
  )
@@ -1534,8 +1577,8 @@ class Assembly:
1534
1577
  fragment = self.fragments[abs(v1) - 1]
1535
1578
  # Find the pair of edges that should be last and first ((3, 1, [8:10], [9:11)]), (1, 2, [4:6], [0:2]) in
1536
1579
  # the example above. Only one of the pairs of edges should satisfy this condition for the topology to make sense.
1537
- left_of_insertion = _location_boundaries(start_location)[0]
1538
- right_of_insertion = _location_boundaries(end_location)[0]
1580
+ left_of_insertion = location_boundaries(start_location)[0]
1581
+ right_of_insertion = location_boundaries(end_location)[0]
1539
1582
  if not fragment.circular and (
1540
1583
  right_of_insertion >= left_of_insertion
1541
1584
  # The below condition is for single-site integration.
@@ -1547,7 +1590,7 @@ class Assembly:
1547
1590
  #
1548
1591
  # The locations of homology on the genome are [0:10] and [2:12], so not identical
1549
1592
  # but they overlap.
1550
- or _locations_overlap(start_location, end_location, len(fragment))
1593
+ or locations_overlap(start_location, end_location, len(fragment))
1551
1594
  ):
1552
1595
  edge_pair_index.append(i)
1553
1596
 
@@ -1578,13 +1621,13 @@ class Assembly:
1578
1621
  fragment1 = self.fragments[abs(f1) - 1]
1579
1622
  fragment2 = self.fragments[abs(f2) - 1]
1580
1623
 
1581
- if not _locations_overlap(
1624
+ if not locations_overlap(
1582
1625
  loc_f1_1, loc_f1_2, len(fragment1)
1583
- ) or not _locations_overlap(loc_f2_2, loc_f2_1, len(fragment2)):
1626
+ ) or not locations_overlap(loc_f2_2, loc_f2_1, len(fragment2)):
1584
1627
  return same_assembly
1585
1628
 
1586
1629
  # Sort to make compatible with insertion assembly
1587
- if _location_boundaries(loc_f1_1)[0] > _location_boundaries(loc_f1_2)[0]:
1630
+ if location_boundaries(loc_f1_1)[0] > location_boundaries(loc_f1_2)[0]:
1588
1631
  new_assembly = same_assembly[::-1]
1589
1632
  else:
1590
1633
  new_assembly = same_assembly[:]
@@ -1597,17 +1640,18 @@ class Assembly:
1597
1640
  fragment2 = self.fragments[abs(f2) - 1]
1598
1641
 
1599
1642
  # Extract boundaries
1600
- f2_1_start, _ = _location_boundaries(loc_f2_1)
1601
- f2_2_start, f2_2_end = _location_boundaries(loc_f2_2)
1602
- f1_1_start, _ = _location_boundaries(loc_f1_1)
1603
- f1_2_start, f1_2_end = _location_boundaries(loc_f1_2)
1643
+ f2_1_start, _ = location_boundaries(loc_f2_1)
1644
+ f2_2_start, f2_2_end = location_boundaries(loc_f2_2)
1645
+ f1_1_start, _ = location_boundaries(loc_f1_1)
1646
+ f1_2_start, f1_2_end = location_boundaries(loc_f1_2)
1604
1647
 
1605
1648
  overlap_diff = len(fragment1[f1_1_start:f1_2_end]) - len(
1606
1649
  fragment2[f2_1_start:f2_2_end]
1607
1650
  )
1608
1651
 
1609
- if overlap_diff == 0:
1610
- assert False, "Overlap is 0"
1652
+ # Safeguard
1653
+ if overlap_diff == 0: # pragma: no cover
1654
+ raise AssertionError("Overlap is 0")
1611
1655
 
1612
1656
  if overlap_diff > 0:
1613
1657
  new_loc_f1_1 = create_location(
@@ -1640,7 +1684,7 @@ class Assembly:
1640
1684
  "only_adjacent_edges not implemented for insertion assemblies"
1641
1685
  )
1642
1686
 
1643
- cycles = limit_iterator(_nx.cycles.simple_cycles(self.G), 10000)
1687
+ cycles = limit_iterator(nx.cycles.simple_cycles(self.G), 10000)
1644
1688
 
1645
1689
  # We apply constrains already here because sometimes the combinatorial explosion is too large
1646
1690
  if self.use_all_fragments:
@@ -1659,7 +1703,7 @@ class Assembly:
1659
1703
  )
1660
1704
 
1661
1705
  # We find cycles first
1662
- iterator = limit_iterator(_nx.cycles.simple_cycles(self.G), 10000)
1706
+ iterator = limit_iterator(nx.cycles.simple_cycles(self.G), 10000)
1663
1707
  assemblies = sum(
1664
1708
  map(lambda x: self.node_path2assembly_list(x, True), iterator), []
1665
1709
  )
@@ -1683,21 +1727,19 @@ class Assembly:
1683
1727
 
1684
1728
  def assemble_linear(
1685
1729
  self, only_adjacent_edges: bool = False, max_assemblies: int = 50
1686
- ) -> list[_Dseqrecord]:
1730
+ ) -> list[Dseqrecord]:
1687
1731
  """Assemble linear constructs, from assemblies returned by self.get_linear_assemblies."""
1688
1732
  assemblies = self.get_linear_assemblies(only_adjacent_edges, max_assemblies)
1689
1733
  return [assemble(self.fragments, a) for a in assemblies]
1690
1734
 
1691
1735
  def assemble_circular(
1692
1736
  self, only_adjacent_edges: bool = False, max_assemblies: int = 50
1693
- ) -> list[_Dseqrecord]:
1737
+ ) -> list[Dseqrecord]:
1694
1738
  """Assemble circular constructs, from assemblies returned by self.get_circular_assemblies."""
1695
1739
  assemblies = self.get_circular_assemblies(only_adjacent_edges, max_assemblies)
1696
1740
  return [assemble(self.fragments, a) for a in assemblies]
1697
1741
 
1698
- def assemble_insertion(
1699
- self, only_adjacent_edges: bool = False
1700
- ) -> list[_Dseqrecord]:
1742
+ def assemble_insertion(self, only_adjacent_edges: bool = False) -> list[Dseqrecord]:
1701
1743
  """Assemble insertion constructs, from assemblies returned by self.get_insertion_assemblies."""
1702
1744
  assemblies = self.get_insertion_assemblies(only_adjacent_edges)
1703
1745
  return [assemble(self.fragments, a, is_insertion=True) for a in assemblies]
@@ -1739,10 +1781,10 @@ class Assembly:
1739
1781
  if edge_location not in this_dict[key]:
1740
1782
  this_dict[key].append(edge_location)
1741
1783
  this_dict["left"] = sorted(
1742
- this_dict["left"], key=lambda x: _location_boundaries(x)[0]
1784
+ this_dict["left"], key=lambda x: location_boundaries(x)[0]
1743
1785
  )
1744
1786
  this_dict["right"] = sorted(
1745
- this_dict["right"], key=lambda x: _location_boundaries(x)[0]
1787
+ this_dict["right"], key=lambda x: location_boundaries(x)[0]
1746
1788
  )
1747
1789
  locations_on_fragments[node] = this_dict
1748
1790
 
@@ -1789,7 +1831,7 @@ class Assembly:
1789
1831
 
1790
1832
  pairs = list()
1791
1833
  for pair in zip(left, right):
1792
- pairs += list(_itertools.product(*pair))
1834
+ pairs += list(itertools.product(*pair))
1793
1835
  allowed_location_pairs[node] = pairs
1794
1836
 
1795
1837
  fragment_assembly = edge_representation2subfragment_representation(
@@ -1802,7 +1844,7 @@ class Assembly:
1802
1844
 
1803
1845
  def __repr__(self):
1804
1846
  # https://pyformat.info
1805
- return _pretty_str(
1847
+ return ps(
1806
1848
  "Assembly\n"
1807
1849
  "fragments..: {sequences}\n"
1808
1850
  "limit(bp)..: {limit}\n"
@@ -1823,7 +1865,7 @@ class PCRAssembly(Assembly):
1823
1865
  the number of mismatches allowed in the overlap. Only supports substitution mismatches, not indels.
1824
1866
  """
1825
1867
 
1826
- def __init__(self, frags: list[_Dseqrecord | _Primer], limit=25, mismatches=0):
1868
+ def __init__(self, frags: list[Dseqrecord | Primer], limit=25, mismatches=0):
1827
1869
 
1828
1870
  value_error = ValueError(
1829
1871
  "PCRAssembly assembly must be initialised with a list/tuple of primer, template, primer"
@@ -1833,15 +1875,15 @@ class PCRAssembly(Assembly):
1833
1875
 
1834
1876
  # Validate the inputs: should be a series of primer, template, primer
1835
1877
  wrong_fragment_class = (
1836
- not isinstance(frags[0], _Primer),
1837
- isinstance(frags[1], _Primer),
1838
- not isinstance(frags[2], _Primer),
1878
+ not isinstance(frags[0], Primer),
1879
+ isinstance(frags[1], Primer),
1880
+ not isinstance(frags[2], Primer),
1839
1881
  )
1840
1882
  if any(wrong_fragment_class):
1841
1883
  raise value_error
1842
1884
 
1843
1885
  # TODO: allow for the same fragment to be included more than once?
1844
- self.G = _nx.MultiDiGraph()
1886
+ self.G = nx.MultiDiGraph()
1845
1887
  # Add positive and negative nodes for forward and reverse fragments
1846
1888
  self.G.add_nodes_from((i + 1, {"seq": f}) for (i, f) in enumerate(frags))
1847
1889
  self.G.add_nodes_from(
@@ -1854,8 +1896,8 @@ class PCRAssembly(Assembly):
1854
1896
  # primer, template, primer
1855
1897
  p1, t, p2 = (i + 1, i + 2, i + 3)
1856
1898
  primer_ids += [p1, p2]
1857
- pairs += list(_itertools.product([p1, p2], [t, -t]))
1858
- pairs += list(_itertools.product([t, -t], [-p1, -p2]))
1899
+ pairs += list(itertools.product([p1, p2], [t, -t]))
1900
+ pairs += list(itertools.product([t, -t], [-p1, -p2]))
1859
1901
 
1860
1902
  for u, v in pairs:
1861
1903
  u_seq = self.G.nodes[u]["seq"]
@@ -1894,20 +1936,33 @@ class PCRAssembly(Assembly):
1894
1936
  "get_insertion_assemblies not implemented for PCR assemblies"
1895
1937
  )
1896
1938
 
1939
+ def assemble_linear(
1940
+ self, only_adjacent_edges: bool = False, max_assemblies: int = 50
1941
+ ) -> list[Dseqrecord]:
1942
+ """
1943
+ Overrides the parent method to ensure that the 5' of the crick strand of the product matches the
1944
+ sequence of the reverse primer. This is important when using primers with dUTP (for USER cloning).
1945
+ """
1946
+ results = super().assemble_linear(only_adjacent_edges, max_assemblies)
1947
+ for result in results:
1948
+ rp = self.fragments[2]
1949
+ result.seq = result.seq[: -len(rp)] + Dseq(str(rp.seq.rc()))
1950
+ return results
1951
+
1897
1952
 
1898
1953
  class SingleFragmentAssembly(Assembly):
1899
1954
  """
1900
1955
  An assembly that represents the circularisation or splicing of a single fragment.
1901
1956
  """
1902
1957
 
1903
- def __init__(self, frags: [_Dseqrecord], limit=25, algorithm=common_sub_strings):
1958
+ def __init__(self, frags: [Dseqrecord], limit=25, algorithm=common_sub_strings):
1904
1959
 
1905
1960
  if len(frags) != 1:
1906
1961
  raise ValueError(
1907
1962
  "SingleFragmentAssembly assembly must be initialised with a single fragment"
1908
1963
  )
1909
1964
  # TODO: allow for the same fragment to be included more than once?
1910
- self.G = _nx.MultiDiGraph()
1965
+ self.G = nx.MultiDiGraph()
1911
1966
  frag = frags[0]
1912
1967
  # Add positive and negative nodes for forward and reverse fragments
1913
1968
  self.G.add_node(1, seq=frag)
@@ -1958,8 +2013,8 @@ class SingleFragmentAssembly(Assembly):
1958
2013
  if x[0][2] == x[0][3]:
1959
2014
  return False
1960
2015
  # We don't want to get overlap only (e.g. GAATTCcatGAATTC giving GAATTC)
1961
- left_start, _ = _location_boundaries(x[0][2])
1962
- _, right_end = _location_boundaries(x[0][3])
2016
+ left_start, _ = location_boundaries(x[0][2])
2017
+ _, right_end = location_boundaries(x[0][3])
1963
2018
  if left_start == 0 and right_end == len(self.fragments[0]):
1964
2019
  return False
1965
2020
  return True
@@ -1982,18 +2037,19 @@ class SingleFragmentAssembly(Assembly):
1982
2037
 
1983
2038
 
1984
2039
  def common_function_assembly_products(
1985
- frags: list[_Dseqrecord],
2040
+ frags: list[Dseqrecord],
1986
2041
  limit: int | None,
1987
2042
  algorithm: Callable,
1988
2043
  circular_only: bool,
1989
2044
  filter_results_function: Callable | None = None,
1990
- ) -> list[_Dseqrecord]:
2045
+ only_adjacent_edges: bool = False,
2046
+ ) -> list[Dseqrecord]:
1991
2047
  """Common function to avoid code duplication. Could be simplified further
1992
2048
  once SingleFragmentAssembly and Assembly are merged.
1993
2049
 
1994
2050
  Parameters
1995
2051
  ----------
1996
- frags : list[_Dseqrecord]
2052
+ frags : list[Dseqrecord]
1997
2053
  List of DNA fragments to assemble
1998
2054
  limit : int or None
1999
2055
  Minimum overlap length required, or None if not applicable
@@ -2001,10 +2057,14 @@ def common_function_assembly_products(
2001
2057
  Function that determines valid overlaps between fragments
2002
2058
  circular_only : bool
2003
2059
  If True, only return circular assemblies
2060
+ filter_results_function : Callable or None
2061
+ Function that filters the results
2062
+ only_adjacent_edges : bool
2063
+ If True, only return assemblies that use only adjacent edges
2004
2064
 
2005
2065
  Returns
2006
2066
  -------
2007
- list[_Dseqrecord]
2067
+ list[Dseqrecord]
2008
2068
  List of assembled DNA molecules
2009
2069
  """
2010
2070
  if len(frags) == 1:
@@ -2013,10 +2073,10 @@ def common_function_assembly_products(
2013
2073
  asm = Assembly(
2014
2074
  frags, limit, algorithm, use_fragment_order=False, use_all_fragments=True
2015
2075
  )
2016
- output_assemblies = asm.get_circular_assemblies()
2076
+ output_assemblies = asm.get_circular_assemblies(only_adjacent_edges)
2017
2077
  if not circular_only and len(frags) > 1:
2018
2078
  output_assemblies += filter_linear_subassemblies(
2019
- asm.get_linear_assemblies(), output_assemblies, frags
2079
+ asm.get_linear_assemblies(only_adjacent_edges), output_assemblies, frags
2020
2080
  )
2021
2081
  if not circular_only and len(frags) == 1:
2022
2082
  output_assemblies += asm.get_insertion_assemblies()
@@ -2028,28 +2088,28 @@ def common_function_assembly_products(
2028
2088
 
2029
2089
 
2030
2090
  def _recast_sources(
2031
- products: list[_Dseqrecord], source_cls, **extra_fields
2032
- ) -> list[_Dseqrecord]:
2091
+ products: list[Dseqrecord], source_cls, **extra_fields
2092
+ ) -> list[Dseqrecord]:
2033
2093
  """Recast the `source` of each product to `source_cls` with optional extras.
2034
2094
 
2035
2095
  This avoids repeating the same for-loop across many assembly functions.
2036
2096
  """
2037
2097
  for prod in products:
2038
2098
  prod.source = source_cls(
2039
- **prod.source.model_dump(),
2099
+ **prod.source.to_unserialized_dict(),
2040
2100
  **extra_fields,
2041
2101
  )
2042
2102
  return products
2043
2103
 
2044
2104
 
2045
2105
  def gibson_assembly(
2046
- frags: list[_Dseqrecord], limit: int = 25, circular_only: bool = False
2047
- ) -> list[_Dseqrecord]:
2106
+ frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
2107
+ ) -> list[Dseqrecord]:
2048
2108
  """Returns the products for Gibson assembly.
2049
2109
 
2050
2110
  Parameters
2051
2111
  ----------
2052
- frags : list[_Dseqrecord]
2112
+ frags : list[Dseqrecord]
2053
2113
  List of DNA fragments to assemble
2054
2114
  limit : int, optional
2055
2115
  Minimum overlap length required, by default 25
@@ -2058,7 +2118,7 @@ def gibson_assembly(
2058
2118
 
2059
2119
  Returns
2060
2120
  -------
2061
- list[_Dseqrecord]
2121
+ list[Dseqrecord]
2062
2122
  List of assembled DNA molecules
2063
2123
  """
2064
2124
 
@@ -2069,14 +2129,14 @@ def gibson_assembly(
2069
2129
 
2070
2130
 
2071
2131
  def in_fusion_assembly(
2072
- frags: list[_Dseqrecord], limit: int = 25, circular_only: bool = False
2073
- ) -> list[_Dseqrecord]:
2132
+ frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
2133
+ ) -> list[Dseqrecord]:
2074
2134
  """Returns the products for in-fusion assembly. This is the same as Gibson
2075
2135
  assembly, but with a different name.
2076
2136
 
2077
2137
  Parameters
2078
2138
  ----------
2079
- frags : list[_Dseqrecord]
2139
+ frags : list[Dseqrecord]
2080
2140
  List of DNA fragments to assemble
2081
2141
  limit : int, optional
2082
2142
  Minimum overlap length required, by default 25
@@ -2085,23 +2145,25 @@ def in_fusion_assembly(
2085
2145
 
2086
2146
  Returns
2087
2147
  -------
2088
- list[_Dseqrecord]
2148
+ list[Dseqrecord]
2089
2149
  List of assembled DNA molecules
2090
2150
  """
2091
2151
 
2092
- products = gibson_assembly(frags, limit)
2152
+ products = common_function_assembly_products(
2153
+ frags, limit, in_fusion_overlap, circular_only
2154
+ )
2093
2155
  return _recast_sources(products, InFusionSource)
2094
2156
 
2095
2157
 
2096
2158
  def fusion_pcr_assembly(
2097
- frags: list[_Dseqrecord], limit: int = 25, circular_only: bool = False
2098
- ) -> list[_Dseqrecord]:
2159
+ frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
2160
+ ) -> list[Dseqrecord]:
2099
2161
  """Returns the products for fusion PCR assembly. This is the same as Gibson
2100
2162
  assembly, but with a different name.
2101
2163
 
2102
2164
  Parameters
2103
2165
  ----------
2104
- frags : list[_Dseqrecord]
2166
+ frags : list[Dseqrecord]
2105
2167
  List of DNA fragments to assemble
2106
2168
  limit : int, optional
2107
2169
  Minimum overlap length required, by default 25
@@ -2110,21 +2172,23 @@ def fusion_pcr_assembly(
2110
2172
 
2111
2173
  Returns
2112
2174
  -------
2113
- list[_Dseqrecord]
2175
+ list[Dseqrecord]
2114
2176
  List of assembled DNA molecules
2115
2177
  """
2116
- products = gibson_assembly(frags, limit)
2178
+ products = common_function_assembly_products(
2179
+ frags, limit, pcr_fusion_overlap, circular_only
2180
+ )
2117
2181
  return _recast_sources(products, OverlapExtensionPCRLigationSource)
2118
2182
 
2119
2183
 
2120
2184
  def in_vivo_assembly(
2121
- frags: list[_Dseqrecord], limit: int = 25, circular_only: bool = False
2122
- ) -> list[_Dseqrecord]:
2185
+ frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
2186
+ ) -> list[Dseqrecord]:
2123
2187
  """Returns the products for in vivo assembly (IVA), which relies on homologous recombination between the fragments.
2124
2188
 
2125
2189
  Parameters
2126
2190
  ----------
2127
- frags : list[_Dseqrecord]
2191
+ frags : list[Dseqrecord]
2128
2192
  List of DNA fragments to assemble
2129
2193
  limit : int, optional
2130
2194
  Minimum overlap length required, by default 25
@@ -2133,7 +2197,7 @@ def in_vivo_assembly(
2133
2197
 
2134
2198
  Returns
2135
2199
  -------
2136
- list[_Dseqrecord]
2200
+ list[Dseqrecord]
2137
2201
  List of assembled DNA molecules
2138
2202
  """
2139
2203
  products = common_function_assembly_products(
@@ -2143,11 +2207,11 @@ def in_vivo_assembly(
2143
2207
 
2144
2208
 
2145
2209
  def restriction_ligation_assembly(
2146
- frags: list[_Dseqrecord],
2147
- enzymes: list["_AbstractCut"],
2210
+ frags: list[Dseqrecord],
2211
+ enzymes: list["AbstractCut"],
2148
2212
  allow_blunt: bool = True,
2149
2213
  circular_only: bool = False,
2150
- ) -> list[_Dseqrecord]:
2214
+ ) -> list[Dseqrecord]:
2151
2215
  """Returns the products for restriction ligation assembly:
2152
2216
 
2153
2217
  - Finds cutsites in the fragments
@@ -2156,9 +2220,9 @@ def restriction_ligation_assembly(
2156
2220
 
2157
2221
  Parameters
2158
2222
  ----------
2159
- frags : list[_Dseqrecord]
2223
+ frags : list[Dseqrecord]
2160
2224
  List of DNA fragments to assemble
2161
- enzymes : list[_AbstractCut]
2225
+ enzymes : list[AbstractCut]
2162
2226
  List of restriction enzymes to use
2163
2227
  allow_blunt : bool, optional
2164
2228
  If True, allow blunt end ligations, by default True
@@ -2167,7 +2231,7 @@ def restriction_ligation_assembly(
2167
2231
 
2168
2232
  Returns
2169
2233
  -------
2170
- list[_Dseqrecord]
2234
+ list[Dseqrecord]
2171
2235
  List of assembled DNA molecules
2172
2236
 
2173
2237
  Examples
@@ -2214,7 +2278,7 @@ def restriction_ligation_assembly(
2214
2278
  return restriction_ligation_overlap(x, y, enzymes, False, allow_blunt)
2215
2279
 
2216
2280
  products = common_function_assembly_products(
2217
- frags, None, algorithm_fn, circular_only
2281
+ frags, None, algorithm_fn, circular_only, only_adjacent_edges=True
2218
2282
  )
2219
2283
  return _recast_sources(
2220
2284
  products, RestrictionAndLigationSource, restriction_enzymes=enzymes
@@ -2222,20 +2286,20 @@ def restriction_ligation_assembly(
2222
2286
 
2223
2287
 
2224
2288
  def golden_gate_assembly(
2225
- frags: list[_Dseqrecord],
2226
- enzymes: list["_AbstractCut"],
2289
+ frags: list[Dseqrecord],
2290
+ enzymes: list["AbstractCut"],
2227
2291
  allow_blunt: bool = True,
2228
2292
  circular_only: bool = False,
2229
- ) -> list[_Dseqrecord]:
2293
+ ) -> list[Dseqrecord]:
2230
2294
  """Returns the products for Golden Gate assembly. This is the same as
2231
2295
  restriction ligation assembly, but with a different name. Check the documentation
2232
2296
  for ``restriction_ligation_assembly`` for more details.
2233
2297
 
2234
2298
  Parameters
2235
2299
  ----------
2236
- frags : list[_Dseqrecord]
2300
+ frags : list[Dseqrecord]
2237
2301
  List of DNA fragments to assemble
2238
- enzymes : list[_AbstractCut]
2302
+ enzymes : list[AbstractCut]
2239
2303
  List of restriction enzymes to use
2240
2304
  allow_blunt : bool, optional
2241
2305
  If True, allow blunt end ligations, by default True
@@ -2244,7 +2308,7 @@ def golden_gate_assembly(
2244
2308
 
2245
2309
  Returns
2246
2310
  -------
2247
- list[_Dseqrecord]
2311
+ list[Dseqrecord]
2248
2312
  List of assembled DNA molecules
2249
2313
 
2250
2314
  Examples
@@ -2255,11 +2319,11 @@ def golden_gate_assembly(
2255
2319
 
2256
2320
 
2257
2321
  def ligation_assembly(
2258
- frags: list[_Dseqrecord],
2322
+ frags: list[Dseqrecord],
2259
2323
  allow_blunt: bool = False,
2260
2324
  allow_partial_overlap: bool = False,
2261
2325
  circular_only: bool = False,
2262
- ) -> list[_Dseqrecord]:
2326
+ ) -> list[Dseqrecord]:
2263
2327
  """Returns the products for ligation assembly, as inputs pass the fragments (digested if needed) that
2264
2328
  will be ligated.
2265
2329
 
@@ -2267,7 +2331,7 @@ def ligation_assembly(
2267
2331
 
2268
2332
  Parameters
2269
2333
  ----------
2270
- frags : list[_Dseqrecord]
2334
+ frags : list[Dseqrecord]
2271
2335
  List of DNA fragments to assemble
2272
2336
  allow_blunt : bool, optional
2273
2337
  If True, allow blunt end ligations, by default False
@@ -2278,7 +2342,7 @@ def ligation_assembly(
2278
2342
 
2279
2343
  Returns
2280
2344
  -------
2281
- list[_Dseqrecord]
2345
+ list[Dseqrecord]
2282
2346
  List of assembled DNA molecules
2283
2347
 
2284
2348
 
@@ -2333,17 +2397,17 @@ def assembly_is_multi_site(asm: list[EdgeRepresentationAssembly]) -> bool:
2333
2397
 
2334
2398
 
2335
2399
  def gateway_assembly(
2336
- frags: list[_Dseqrecord],
2400
+ frags: list[Dseqrecord],
2337
2401
  reaction_type: Literal["BP", "LR"],
2338
2402
  greedy: bool = False,
2339
2403
  circular_only: bool = False,
2340
2404
  multi_site_only: bool = False,
2341
- ) -> list[_Dseqrecord]:
2405
+ ) -> list[Dseqrecord]:
2342
2406
  """Returns the products for Gateway assembly / Gateway cloning.
2343
2407
 
2344
2408
  Parameters
2345
2409
  ----------
2346
- frags : list[_Dseqrecord]
2410
+ frags : list[Dseqrecord]
2347
2411
  List of DNA fragments to assemble
2348
2412
  reaction_type : Literal['BP', 'LR']
2349
2413
  Type of Gateway reaction
@@ -2359,7 +2423,7 @@ def gateway_assembly(
2359
2423
 
2360
2424
  Returns
2361
2425
  -------
2362
- list[_Dseqrecord]
2426
+ list[Dseqrecord]
2363
2427
  List of assembled DNA molecules
2364
2428
 
2365
2429
 
@@ -2446,13 +2510,13 @@ def gateway_assembly(
2446
2510
 
2447
2511
 
2448
2512
  def common_function_integration_products(
2449
- frags: list[_Dseqrecord], limit: int | None, algorithm: Callable
2450
- ) -> list[_Dseqrecord]:
2513
+ frags: list[Dseqrecord], limit: int | None, algorithm: Callable
2514
+ ) -> list[Dseqrecord]:
2451
2515
  """Common function to avoid code duplication for integration products.
2452
2516
 
2453
2517
  Parameters
2454
2518
  ----------
2455
- frags : list[_Dseqrecord]
2519
+ frags : list[Dseqrecord]
2456
2520
  List of DNA fragments to integrate
2457
2521
  limit : int or None
2458
2522
  Minimum overlap length required, or None if not applicable
@@ -2461,7 +2525,7 @@ def common_function_integration_products(
2461
2525
 
2462
2526
  Returns
2463
2527
  -------
2464
- list[_Dseqrecord]
2528
+ list[Dseqrecord]
2465
2529
  List of integrated DNA molecules
2466
2530
  """
2467
2531
  if len(frags) == 1:
@@ -2482,27 +2546,27 @@ def common_function_integration_products(
2482
2546
 
2483
2547
 
2484
2548
  def common_handle_insertion_fragments(
2485
- genome: _Dseqrecord, inserts: list[_Dseqrecord]
2486
- ) -> list[_Dseqrecord]:
2549
+ genome: Dseqrecord, inserts: list[Dseqrecord]
2550
+ ) -> list[Dseqrecord]:
2487
2551
  """Common function to handle / validate insertion fragments.
2488
2552
 
2489
2553
  Parameters
2490
2554
  ----------
2491
- genome : _Dseqrecord
2555
+ genome : Dseqrecord
2492
2556
  Target genome sequence
2493
- inserts : list[_Dseqrecord] or _Dseqrecord
2557
+ inserts : list[Dseqrecord] or Dseqrecord
2494
2558
  DNA fragment(s) to insert
2495
2559
 
2496
2560
  Returns
2497
2561
  -------
2498
- list[_Dseqrecord]
2562
+ list[Dseqrecord]
2499
2563
  List containing genome and insert fragments
2500
2564
  """
2501
- if not isinstance(genome, _Dseqrecord):
2565
+ if not isinstance(genome, Dseqrecord):
2502
2566
  raise ValueError("Genome must be a Dseqrecord object")
2503
2567
 
2504
2568
  if not isinstance(inserts, list) or not all(
2505
- isinstance(f, _Dseqrecord) for f in inserts
2569
+ isinstance(f, Dseqrecord) for f in inserts
2506
2570
  ):
2507
2571
  raise ValueError("Inserts must be a list of Dseqrecord objects")
2508
2572
 
@@ -2513,13 +2577,13 @@ def common_handle_insertion_fragments(
2513
2577
 
2514
2578
 
2515
2579
  def common_function_excision_products(
2516
- genome: _Dseqrecord, limit: int | None, algorithm: Callable
2517
- ) -> list[_Dseqrecord]:
2580
+ genome: Dseqrecord, limit: int | None, algorithm: Callable
2581
+ ) -> list[Dseqrecord]:
2518
2582
  """Common function to avoid code duplication for excision products.
2519
2583
 
2520
2584
  Parameters
2521
2585
  ----------
2522
- genome : _Dseqrecord
2586
+ genome : Dseqrecord
2523
2587
  Target genome sequence
2524
2588
  limit : int or None
2525
2589
  Minimum overlap length required, or None if not applicable
@@ -2528,7 +2592,7 @@ def common_function_excision_products(
2528
2592
 
2529
2593
  Returns
2530
2594
  -------
2531
- list[_Dseqrecord]
2595
+ list[Dseqrecord]
2532
2596
  List of excised DNA molecules
2533
2597
  """
2534
2598
  asm = SingleFragmentAssembly([genome], limit, algorithm)
@@ -2536,25 +2600,25 @@ def common_function_excision_products(
2536
2600
 
2537
2601
 
2538
2602
  def homologous_recombination_integration(
2539
- genome: _Dseqrecord,
2540
- inserts: list[_Dseqrecord],
2603
+ genome: Dseqrecord,
2604
+ inserts: list[Dseqrecord],
2541
2605
  limit: int = 40,
2542
- ) -> list[_Dseqrecord]:
2606
+ ) -> list[Dseqrecord]:
2543
2607
  """Returns the products resulting from the integration of an insert (or inserts joined
2544
2608
  through in vivo recombination) into the genome through homologous recombination.
2545
2609
 
2546
2610
  Parameters
2547
2611
  ----------
2548
- genome : _Dseqrecord
2612
+ genome : Dseqrecord
2549
2613
  Target genome sequence
2550
- inserts : list[_Dseqrecord]
2614
+ inserts : list[Dseqrecord]
2551
2615
  DNA fragment(s) to insert
2552
2616
  limit : int, optional
2553
2617
  Minimum homology length required, by default 40
2554
2618
 
2555
2619
  Returns
2556
2620
  -------
2557
- list[_Dseqrecord]
2621
+ list[Dseqrecord]
2558
2622
  List of integrated DNA molecules
2559
2623
 
2560
2624
 
@@ -2590,21 +2654,21 @@ def homologous_recombination_integration(
2590
2654
 
2591
2655
 
2592
2656
  def homologous_recombination_excision(
2593
- genome: _Dseqrecord, limit: int = 40
2594
- ) -> list[_Dseqrecord]:
2657
+ genome: Dseqrecord, limit: int = 40
2658
+ ) -> list[Dseqrecord]:
2595
2659
  """Returns the products resulting from the excision of a fragment from the genome through
2596
2660
  homologous recombination.
2597
2661
 
2598
2662
  Parameters
2599
2663
  ----------
2600
- genome : _Dseqrecord
2664
+ genome : Dseqrecord
2601
2665
  Target genome sequence
2602
2666
  limit : int, optional
2603
2667
  Minimum homology length required, by default 40
2604
2668
 
2605
2669
  Returns
2606
2670
  -------
2607
- list[_Dseqrecord]
2671
+ list[Dseqrecord]
2608
2672
  List containing excised plasmid and remaining genome sequence
2609
2673
 
2610
2674
  Examples
@@ -2627,8 +2691,8 @@ def homologous_recombination_excision(
2627
2691
 
2628
2692
 
2629
2693
  def cre_lox_integration(
2630
- genome: _Dseqrecord, inserts: list[_Dseqrecord]
2631
- ) -> list[_Dseqrecord]:
2694
+ genome: Dseqrecord, inserts: list[Dseqrecord]
2695
+ ) -> list[Dseqrecord]:
2632
2696
  """Returns the products resulting from the integration of an insert (or inserts joined
2633
2697
  through cre-lox recombination among them) into the genome through cre-lox integration.
2634
2698
 
@@ -2636,14 +2700,14 @@ def cre_lox_integration(
2636
2700
 
2637
2701
  Parameters
2638
2702
  ----------
2639
- genome : _Dseqrecord
2703
+ genome : Dseqrecord
2640
2704
  Target genome sequence
2641
- inserts : list[_Dseqrecord] or _Dseqrecord
2705
+ inserts : list[Dseqrecord] or Dseqrecord
2642
2706
  DNA fragment(s) to insert
2643
2707
 
2644
2708
  Returns
2645
2709
  -------
2646
- list[_Dseqrecord]
2710
+ list[Dseqrecord]
2647
2711
  List of integrated DNA molecules
2648
2712
 
2649
2713
  Examples
@@ -2686,17 +2750,17 @@ def cre_lox_integration(
2686
2750
  return _recast_sources(products, CreLoxRecombinationSource)
2687
2751
 
2688
2752
 
2689
- def cre_lox_excision(genome: _Dseqrecord) -> list[_Dseqrecord]:
2753
+ def cre_lox_excision(genome: Dseqrecord) -> list[Dseqrecord]:
2690
2754
  """Returns the products for CRE-lox excision.
2691
2755
 
2692
2756
  Parameters
2693
2757
  ----------
2694
- genome : _Dseqrecord
2758
+ genome : Dseqrecord
2695
2759
  Target genome sequence
2696
2760
 
2697
2761
  Returns
2698
2762
  -------
2699
- list[_Dseqrecord]
2763
+ list[Dseqrecord]
2700
2764
  List containing excised plasmid and remaining genome sequence
2701
2765
 
2702
2766
  Examples
@@ -2738,28 +2802,28 @@ def cre_lox_excision(genome: _Dseqrecord) -> list[_Dseqrecord]:
2738
2802
 
2739
2803
 
2740
2804
  def crispr_integration(
2741
- genome: _Dseqrecord,
2742
- inserts: list[_Dseqrecord],
2743
- guides: list[_Primer],
2805
+ genome: Dseqrecord,
2806
+ inserts: list[Dseqrecord],
2807
+ guides: list[Primer],
2744
2808
  limit: int = 40,
2745
- ) -> list[_Dseqrecord]:
2809
+ ) -> list[Dseqrecord]:
2746
2810
  """
2747
2811
  Returns the products for CRISPR integration.
2748
2812
 
2749
2813
  Parameters
2750
2814
  ----------
2751
- genome : _Dseqrecord
2815
+ genome : Dseqrecord
2752
2816
  Target genome sequence
2753
- inserts : list[_Dseqrecord]
2817
+ inserts : list[Dseqrecord]
2754
2818
  DNA fragment(s) to insert
2755
- guides : list[_Primer]
2819
+ guides : list[Primer]
2756
2820
  List of guide RNAs as Primer objects. This may change in the future.
2757
2821
  limit : int, optional
2758
2822
  Minimum overlap length required, by default 40
2759
2823
 
2760
2824
  Returns
2761
2825
  -------
2762
- list[_Dseqrecord]
2826
+ list[Dseqrecord]
2763
2827
  List of integrated DNA molecules
2764
2828
 
2765
2829
  Examples
@@ -2804,8 +2868,9 @@ def crispr_integration(
2804
2868
  for i, product in enumerate(products):
2805
2869
  # The second element of product.source.input is conventionally the insert/repair fragment
2806
2870
  # The other two (first and third) are the two bits of the genome
2807
- repair_start = _location_boundaries(product.source.input[0].right_location)[0]
2808
- repair_end = _location_boundaries(product.source.input[2].left_location)[1]
2871
+ repair_start = location_boundaries(product.source.input[0].right_location)[0]
2872
+ # Here we do +1 because the position of the cut marks the boundary (e.g. 0:10, 10:20 if a cut is at pos 10)
2873
+ repair_end = location_boundaries(product.source.input[2].left_location)[1] + 1
2809
2874
  repair_location = create_location(repair_start, repair_end, len(genome))
2810
2875
  some_cuts_inside_repair = []
2811
2876
  all_cuts_inside_repair = []
@@ -2836,22 +2901,22 @@ def crispr_integration(
2836
2901
 
2837
2902
 
2838
2903
  def pcr_assembly(
2839
- template: _Dseqrecord,
2840
- fwd_primer: _Primer,
2841
- rvs_primer: _Primer,
2904
+ template: Dseqrecord,
2905
+ fwd_primer: Primer,
2906
+ rvs_primer: Primer,
2842
2907
  add_primer_features: bool = False,
2843
2908
  limit: int = 14,
2844
2909
  mismatches: int = 0,
2845
- ) -> list[_Dseqrecord]:
2910
+ ) -> list[Dseqrecord]:
2846
2911
  """Returns the products for PCR assembly.
2847
2912
 
2848
2913
  Parameters
2849
2914
  ----------
2850
- template : _Dseqrecord
2915
+ template : Dseqrecord
2851
2916
  Template sequence
2852
- fwd_primer : _Primer
2917
+ fwd_primer : Primer
2853
2918
  Forward primer
2854
- rvs_primer : _Primer
2919
+ rvs_primer : Primer
2855
2920
  Reverse primer
2856
2921
  add_primer_features : bool, optional
2857
2922
  If True, add primer features to the product, by default False
@@ -2862,7 +2927,7 @@ def pcr_assembly(
2862
2927
 
2863
2928
  Returns
2864
2929
  -------
2865
- list[_Dseqrecord]
2930
+ list[Dseqrecord]
2866
2931
  List of assembled DNA molecules
2867
2932
  """
2868
2933