pydna 5.5.4__py3-none-any.whl → 5.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +24 -193
- pydna/_pretty.py +8 -8
- pydna/_thermodynamic_data.py +3 -3
- pydna/alphabet.py +995 -0
- pydna/amplicon.py +19 -24
- pydna/amplify.py +75 -95
- pydna/assembly.py +64 -81
- pydna/assembly2.py +283 -294
- pydna/codon.py +4 -4
- pydna/common_sub_strings.py +6 -8
- pydna/contig.py +203 -10
- pydna/design.py +176 -60
- pydna/download.py +6 -15
- pydna/dseq.py +1794 -718
- pydna/dseqrecord.py +170 -169
- pydna/gateway.py +6 -6
- pydna/gel.py +5 -5
- pydna/genbank.py +43 -46
- pydna/genbankfixer.py +89 -92
- pydna/ladders.py +11 -12
- pydna/oligonucleotide_hybridization.py +124 -0
- pydna/opencloning_models.py +187 -60
- pydna/parsers.py +45 -32
- pydna/primer.py +4 -4
- pydna/primer_screen.py +833 -0
- pydna/readers.py +14 -9
- pydna/seq.py +137 -47
- pydna/seqrecord.py +54 -62
- pydna/sequence_picker.py +2 -5
- pydna/sequence_regex.py +6 -6
- pydna/tm.py +17 -17
- pydna/types.py +19 -19
- pydna/utils.py +97 -75
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/METADATA +8 -8
- pydna-5.5.5.dist-info/RECORD +43 -0
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/WHEEL +1 -1
- pydna/conftest.py +0 -42
- pydna/genbankfile.py +0 -42
- pydna/genbankrecord.py +0 -168
- pydna/goldengate.py +0 -45
- pydna/ligate.py +0 -62
- pydna/user_cloning.py +0 -29
- pydna-5.5.4.dist-info/RECORD +0 -46
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/licenses/LICENSE.txt +0 -0
pydna/assembly2.py
CHANGED
|
@@ -4,29 +4,29 @@ Improved implementation of the assembly module. To see a list of issues with the
|
|
|
4
4
|
see [issues tagged with fixed-with-new-assembly-model](https://github.com/pydna-group/pydna/issues?q=is%3Aissue%20state%3Aopen%20label%3Afixed-with-new-assembly-model)
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
import networkx as
|
|
8
|
-
import itertools
|
|
7
|
+
import networkx as nx
|
|
8
|
+
import itertools
|
|
9
9
|
from Bio.SeqFeature import SimpleLocation, Location
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
from Bio.Restriction.Restriction import RestrictionBatch
|
|
12
12
|
import regex
|
|
13
13
|
import copy
|
|
14
14
|
|
|
15
15
|
from pydna.utils import (
|
|
16
|
-
shift_location
|
|
16
|
+
shift_location,
|
|
17
17
|
flatten,
|
|
18
|
-
location_boundaries
|
|
19
|
-
locations_overlap
|
|
18
|
+
location_boundaries,
|
|
19
|
+
locations_overlap,
|
|
20
20
|
sum_is_sticky,
|
|
21
21
|
limit_iterator,
|
|
22
22
|
create_location,
|
|
23
23
|
)
|
|
24
|
-
from pydna._pretty import pretty_str as
|
|
24
|
+
from pydna._pretty import pretty_str as ps
|
|
25
25
|
from pydna.common_sub_strings import common_sub_strings as common_sub_strings_str
|
|
26
|
-
from pydna.dseqrecord import Dseqrecord
|
|
27
|
-
from pydna.dseq import Dseq
|
|
28
|
-
from pydna.primer import Primer
|
|
29
|
-
from pydna.seqrecord import SeqRecord
|
|
26
|
+
from pydna.dseqrecord import Dseqrecord
|
|
27
|
+
from pydna.dseq import Dseq
|
|
28
|
+
from pydna.primer import Primer
|
|
29
|
+
from pydna.seqrecord import SeqRecord
|
|
30
30
|
from pydna.types import (
|
|
31
31
|
CutSiteType,
|
|
32
32
|
# TODO: allow user to enforce multi-site
|
|
@@ -38,6 +38,7 @@ from pydna.types import (
|
|
|
38
38
|
)
|
|
39
39
|
from pydna.gateway import gateway_overlap, find_gateway_sites
|
|
40
40
|
from pydna.cre_lox import cre_loxP_overlap
|
|
41
|
+
from pydna.alphabet import anneal_strands
|
|
41
42
|
|
|
42
43
|
from typing import TYPE_CHECKING, Callable, Literal
|
|
43
44
|
from pydna.opencloning_models import (
|
|
@@ -59,7 +60,7 @@ from pydna.crispr import cas9
|
|
|
59
60
|
import warnings
|
|
60
61
|
|
|
61
62
|
if TYPE_CHECKING: # pragma: no cover
|
|
62
|
-
from Bio.Restriction import AbstractCut
|
|
63
|
+
from Bio.Restriction import AbstractCut
|
|
63
64
|
|
|
64
65
|
|
|
65
66
|
def gather_overlapping_locations(
|
|
@@ -71,29 +72,29 @@ def gather_overlapping_locations(
|
|
|
71
72
|
the output will be [(loc1, loc2), (loc3,)].
|
|
72
73
|
"""
|
|
73
74
|
# Make a graph with all the locations as nodes
|
|
74
|
-
G =
|
|
75
|
+
G = nx.Graph()
|
|
75
76
|
for i, loc in enumerate(locs):
|
|
76
77
|
G.add_node(i, location=loc)
|
|
77
78
|
|
|
78
79
|
# Add edges between nodes that overlap
|
|
79
80
|
for i in range(len(locs)):
|
|
80
81
|
for j in range(i + 1, len(locs)):
|
|
81
|
-
if
|
|
82
|
+
if locations_overlap(locs[i], locs[j], fragment_length):
|
|
82
83
|
G.add_edge(i, j)
|
|
83
84
|
|
|
84
85
|
# Get groups of overlapping locations
|
|
85
86
|
groups = list()
|
|
86
|
-
for loc_set in
|
|
87
|
+
for loc_set in nx.connected_components(G):
|
|
87
88
|
groups.append(tuple(locs[i] for i in loc_set))
|
|
88
89
|
|
|
89
90
|
# Sort by location of the first element in each group (does not matter which since they are overlapping)
|
|
90
|
-
groups.sort(key=lambda x:
|
|
91
|
+
groups.sort(key=lambda x: location_boundaries(x[0])[0])
|
|
91
92
|
|
|
92
93
|
return groups
|
|
93
94
|
|
|
94
95
|
|
|
95
96
|
def ends_from_cutsite(
|
|
96
|
-
cutsite: CutSiteType, seq:
|
|
97
|
+
cutsite: CutSiteType, seq: Dseq
|
|
97
98
|
) -> tuple[tuple[str, str], tuple[str, str]]:
|
|
98
99
|
"""Get the sticky or blunt ends created by a restriction enzyme cut.
|
|
99
100
|
|
|
@@ -116,7 +117,7 @@ def ends_from_cutsite(
|
|
|
116
117
|
and the sequence of the overhang. The first tuple is for the left end, second for the right end.
|
|
117
118
|
|
|
118
119
|
>>> from Bio.Restriction import NotI
|
|
119
|
-
>>> x =
|
|
120
|
+
>>> x = Dseq("ctcgGCGGCCGCcagcggccg")
|
|
120
121
|
>>> x.get_cutsites(NotI)
|
|
121
122
|
[((6, -4), NotI)]
|
|
122
123
|
>>> ends_from_cutsite(x.get_cutsites(NotI)[0], x)
|
|
@@ -143,8 +144,8 @@ def ends_from_cutsite(
|
|
|
143
144
|
|
|
144
145
|
|
|
145
146
|
def restriction_ligation_overlap(
|
|
146
|
-
seqx:
|
|
147
|
-
seqy:
|
|
147
|
+
seqx: Dseqrecord,
|
|
148
|
+
seqy: Dseqrecord,
|
|
148
149
|
enzymes=RestrictionBatch,
|
|
149
150
|
partial=False,
|
|
150
151
|
allow_blunt=False,
|
|
@@ -155,9 +156,9 @@ def restriction_ligation_overlap(
|
|
|
155
156
|
|
|
156
157
|
Parameters
|
|
157
158
|
----------
|
|
158
|
-
seqx :
|
|
159
|
+
seqx : Dseqrecord
|
|
159
160
|
The first sequence
|
|
160
|
-
seqy :
|
|
161
|
+
seqy : Dseqrecord
|
|
161
162
|
The second sequence
|
|
162
163
|
enzymes : RestrictionBatch
|
|
163
164
|
The enzymes to use
|
|
@@ -211,7 +212,7 @@ def restriction_ligation_overlap(
|
|
|
211
212
|
# if not seqy.circular:
|
|
212
213
|
# cuts_y.append(((0, 0), None))
|
|
213
214
|
matches = list()
|
|
214
|
-
for cut_x, cut_y in
|
|
215
|
+
for cut_x, cut_y in itertools.product(cuts_x, cuts_y):
|
|
215
216
|
# A blunt end
|
|
216
217
|
if allow_blunt and cut_x[0][1] == cut_y[0][1] == 0:
|
|
217
218
|
matches.append((cut_x[0][0], cut_y[0][0], 0))
|
|
@@ -255,7 +256,7 @@ def combine_algorithms(*algorithms: AssemblyAlgorithmType) -> AssemblyAlgorithmT
|
|
|
255
256
|
|
|
256
257
|
|
|
257
258
|
def blunt_overlap(
|
|
258
|
-
seqx:
|
|
259
|
+
seqx: Dseqrecord, seqy: Dseqrecord, limit=None
|
|
259
260
|
) -> list[SequenceOverlap]:
|
|
260
261
|
"""
|
|
261
262
|
Assembly algorithm to find blunt overlaps. Used for blunt ligation.
|
|
@@ -265,9 +266,9 @@ def blunt_overlap(
|
|
|
265
266
|
|
|
266
267
|
Parameters
|
|
267
268
|
----------
|
|
268
|
-
seqx :
|
|
269
|
+
seqx : Dseqrecord
|
|
269
270
|
The first sequence
|
|
270
|
-
seqy :
|
|
271
|
+
seqy : Dseqrecord
|
|
271
272
|
The second sequence
|
|
272
273
|
limit : int
|
|
273
274
|
There for compatibility, but it is ignored
|
|
@@ -293,7 +294,7 @@ def blunt_overlap(
|
|
|
293
294
|
|
|
294
295
|
|
|
295
296
|
def common_sub_strings(
|
|
296
|
-
seqx:
|
|
297
|
+
seqx: Dseqrecord, seqy: Dseqrecord, limit=25
|
|
297
298
|
) -> list[SequenceOverlap]:
|
|
298
299
|
"""
|
|
299
300
|
Assembly algorithm to find common substrings of length == limit. see the docs of
|
|
@@ -356,7 +357,7 @@ def common_sub_strings(
|
|
|
356
357
|
return [r for r in results if r not in shifted_matches]
|
|
357
358
|
|
|
358
359
|
|
|
359
|
-
def gibson_overlap(seqx:
|
|
360
|
+
def gibson_overlap(seqx: Dseqrecord, seqy: Dseqrecord, limit=25):
|
|
360
361
|
"""
|
|
361
362
|
Assembly algorithm to find terminal overlaps (e.g. for Gibson assembly).
|
|
362
363
|
The order matters, we want alignments like:
|
|
@@ -375,9 +376,9 @@ def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
|
|
|
375
376
|
|
|
376
377
|
Parameters
|
|
377
378
|
----------
|
|
378
|
-
seqx :
|
|
379
|
+
seqx : Dseqrecord
|
|
379
380
|
The first sequence
|
|
380
|
-
seqy :
|
|
381
|
+
seqy : Dseqrecord
|
|
381
382
|
The second sequence
|
|
382
383
|
limit : int
|
|
383
384
|
Minimum length of the overlap
|
|
@@ -402,9 +403,9 @@ def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
|
|
|
402
403
|
# This is only relevant for linear fragments, so we don't need to worry about
|
|
403
404
|
# shifting locations for circular fragments.
|
|
404
405
|
trim_x_left = -seqx.seq.ovhg if seqx.seq.ovhg < 0 else 0
|
|
405
|
-
trim_x_right = seqx.seq.watson_ovhg
|
|
406
|
+
trim_x_right = seqx.seq.watson_ovhg if seqx.seq.watson_ovhg < 0 else None
|
|
406
407
|
trim_y_left = -seqy.seq.ovhg if seqy.seq.ovhg < 0 else 0
|
|
407
|
-
trim_y_right = seqy.seq.watson_ovhg
|
|
408
|
+
trim_y_right = seqy.seq.watson_ovhg if seqy.seq.watson_ovhg < 0 else None
|
|
408
409
|
|
|
409
410
|
stringx = str(seqx.seq[trim_x_left:trim_x_right]).upper()
|
|
410
411
|
stringy = str(seqy.seq[trim_y_left:trim_y_right]).upper()
|
|
@@ -422,7 +423,7 @@ def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
|
|
|
422
423
|
return [tuple(m) for m in matches]
|
|
423
424
|
|
|
424
425
|
|
|
425
|
-
def sticky_end_sub_strings(seqx:
|
|
426
|
+
def sticky_end_sub_strings(seqx: Dseqrecord, seqy: Dseqrecord, limit: bool = False):
|
|
426
427
|
"""
|
|
427
428
|
Assembly algorithm for ligation of sticky ends.
|
|
428
429
|
|
|
@@ -431,9 +432,9 @@ def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = F
|
|
|
431
432
|
|
|
432
433
|
Parameters
|
|
433
434
|
----------
|
|
434
|
-
seqx :
|
|
435
|
+
seqx : Dseqrecord
|
|
435
436
|
The first sequence
|
|
436
|
-
seqy :
|
|
437
|
+
seqy : Dseqrecord
|
|
437
438
|
The second sequence
|
|
438
439
|
limit : bool
|
|
439
440
|
Whether to allow partial overlaps
|
|
@@ -466,6 +467,7 @@ def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = F
|
|
|
466
467
|
[(4, 0, 2)]
|
|
467
468
|
|
|
468
469
|
"""
|
|
470
|
+
|
|
469
471
|
overlap = sum_is_sticky(
|
|
470
472
|
seqx.seq.three_prime_end(), seqy.seq.five_prime_end(), limit
|
|
471
473
|
)
|
|
@@ -475,7 +477,7 @@ def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = F
|
|
|
475
477
|
|
|
476
478
|
|
|
477
479
|
def zip_match_leftwards(
|
|
478
|
-
seqx:
|
|
480
|
+
seqx: SeqRecord, seqy: SeqRecord, match: SequenceOverlap
|
|
479
481
|
) -> SequenceOverlap:
|
|
480
482
|
"""
|
|
481
483
|
Starting from the rightmost edge of the match, return a new match encompassing the max
|
|
@@ -483,15 +485,15 @@ def zip_match_leftwards(
|
|
|
483
485
|
than the limit or a shorter match if there are mismatches. This is convenient to maintain
|
|
484
486
|
as many features as possible. It is used in PCR assembly.
|
|
485
487
|
|
|
486
|
-
>>> seq =
|
|
487
|
-
>>> primer =
|
|
488
|
+
>>> seq = Dseqrecord('AAAAACGTCCCGT')
|
|
489
|
+
>>> primer = Dseqrecord('ACGTCCCGT')
|
|
488
490
|
>>> match = (13, 9, 0) # an empty match at the end of each
|
|
489
491
|
>>> zip_match_leftwards(seq, primer, match)
|
|
490
492
|
(4, 0, 9)
|
|
491
493
|
|
|
492
494
|
Works in circular molecules if the match spans the origin:
|
|
493
|
-
>>> seq =
|
|
494
|
-
>>> primer =
|
|
495
|
+
>>> seq = Dseqrecord('TCCCGTAAAAACG', circular=True)
|
|
496
|
+
>>> primer = Dseqrecord('ACGTCCCGT')
|
|
495
497
|
>>> match = (6, 9, 0)
|
|
496
498
|
>>> zip_match_leftwards(seq, primer, match)
|
|
497
499
|
(10, 0, 9)
|
|
@@ -512,11 +514,11 @@ def zip_match_leftwards(
|
|
|
512
514
|
# For those cases we shift by length, then go back
|
|
513
515
|
|
|
514
516
|
end_on_x = match[0] + match[2]
|
|
515
|
-
if isinstance(seqx,
|
|
517
|
+
if isinstance(seqx, Dseqrecord) and seqx.circular and end_on_x <= len(seqx):
|
|
516
518
|
end_on_x += len(seqx)
|
|
517
519
|
|
|
518
520
|
end_on_y = match[1] + match[2]
|
|
519
|
-
if isinstance(seqy,
|
|
521
|
+
if isinstance(seqy, Dseqrecord) and seqy.circular and end_on_y <= len(seqy):
|
|
520
522
|
end_on_y += len(seqy)
|
|
521
523
|
|
|
522
524
|
count = 0
|
|
@@ -533,7 +535,7 @@ def zip_match_leftwards(
|
|
|
533
535
|
|
|
534
536
|
|
|
535
537
|
def zip_match_rightwards(
|
|
536
|
-
seqx:
|
|
538
|
+
seqx: Dseqrecord, seqy: Dseqrecord, match: SequenceOverlap
|
|
537
539
|
) -> SequenceOverlap:
|
|
538
540
|
"""Same as zip_match_leftwards, but towards the right."""
|
|
539
541
|
|
|
@@ -549,19 +551,19 @@ def zip_match_rightwards(
|
|
|
549
551
|
return (start_on_x, start_on_y, count)
|
|
550
552
|
|
|
551
553
|
|
|
552
|
-
def seqrecord2_uppercase_DNA_string(seqr:
|
|
554
|
+
def seqrecord2_uppercase_DNA_string(seqr: SeqRecord) -> str:
|
|
553
555
|
"""
|
|
554
556
|
Transform a Dseqrecord to a sequence string where U is replaced by T, everything is upper case and
|
|
555
557
|
circular sequences are repeated twice. This is used for PCR, to support primers with U's (e.g. for USER cloning).
|
|
556
558
|
"""
|
|
557
559
|
out = str(seqr.seq).upper().replace("U", "T")
|
|
558
|
-
if isinstance(seqr,
|
|
560
|
+
if isinstance(seqr, Dseqrecord) and seqr.circular:
|
|
559
561
|
return out * 2
|
|
560
562
|
return out
|
|
561
563
|
|
|
562
564
|
|
|
563
565
|
def primer_template_overlap(
|
|
564
|
-
seqx:
|
|
566
|
+
seqx: Dseqrecord | Primer, seqy: Dseqrecord | Primer, limit=25, mismatches=0
|
|
565
567
|
) -> list[SequenceOverlap]:
|
|
566
568
|
"""
|
|
567
569
|
Assembly algorithm to find overlaps between a primer and a template. It accepts mismatches.
|
|
@@ -573,9 +575,9 @@ def primer_template_overlap(
|
|
|
573
575
|
|
|
574
576
|
Parameters
|
|
575
577
|
----------
|
|
576
|
-
seqx :
|
|
578
|
+
seqx : Dseqrecord | Primer
|
|
577
579
|
The primer
|
|
578
|
-
seqy :
|
|
580
|
+
seqy : Dseqrecord | Primer
|
|
579
581
|
The template
|
|
580
582
|
limit : int
|
|
581
583
|
Minimum length of the overlap
|
|
@@ -604,11 +606,11 @@ def primer_template_overlap(
|
|
|
604
606
|
[]
|
|
605
607
|
"""
|
|
606
608
|
|
|
607
|
-
if isinstance(seqx,
|
|
609
|
+
if isinstance(seqx, Primer) and isinstance(seqy, Dseqrecord):
|
|
608
610
|
primer = seqx
|
|
609
611
|
template = seqy
|
|
610
612
|
reverse_primer = False
|
|
611
|
-
elif isinstance(seqx,
|
|
613
|
+
elif isinstance(seqx, Dseqrecord) and isinstance(seqy, Primer):
|
|
612
614
|
primer = seqy
|
|
613
615
|
template = seqx
|
|
614
616
|
reverse_primer = True
|
|
@@ -662,45 +664,8 @@ def primer_template_overlap(
|
|
|
662
664
|
return list(sorted(out))
|
|
663
665
|
|
|
664
666
|
|
|
665
|
-
def fill_left(seq: _Dseq) -> _Dseq:
|
|
666
|
-
"""Fill the left overhang of a sequence with the complementary sequence."""
|
|
667
|
-
new_watson = seq.watson
|
|
668
|
-
new_crick = seq.crick
|
|
669
|
-
|
|
670
|
-
# Watson 5' overhang
|
|
671
|
-
if seq.ovhg < 0:
|
|
672
|
-
new_crick = new_crick + reverse_complement(seq.watson[: -seq.ovhg])
|
|
673
|
-
# Crick 5' overhang
|
|
674
|
-
elif seq.ovhg > 0:
|
|
675
|
-
new_watson = reverse_complement(seq.crick[-seq.ovhg :]) + new_watson
|
|
676
|
-
|
|
677
|
-
return _Dseq(new_watson, new_crick, 0)
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
def fill_right(seq: _Dseq) -> _Dseq:
|
|
681
|
-
"""Fill the right overhang of a sequence with the complementary sequence."""
|
|
682
|
-
new_watson = seq.watson
|
|
683
|
-
new_crick = seq.crick
|
|
684
|
-
|
|
685
|
-
# Watson 3' overhang
|
|
686
|
-
watson_ovhg = seq.watson_ovhg()
|
|
687
|
-
if watson_ovhg < 0:
|
|
688
|
-
new_watson = new_watson + reverse_complement(seq.crick[:-watson_ovhg])
|
|
689
|
-
|
|
690
|
-
# Crick 3' overhang
|
|
691
|
-
elif watson_ovhg > 0:
|
|
692
|
-
new_crick = reverse_complement(seq.watson[-watson_ovhg:]) + new_crick
|
|
693
|
-
|
|
694
|
-
return _Dseq(new_watson, new_crick, seq.ovhg)
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
def fill_dseq(seq: _Dseq) -> _Dseq:
|
|
698
|
-
"""Fill the overhangs of a sequence with the complementary sequence."""
|
|
699
|
-
return fill_left(fill_right(seq))
|
|
700
|
-
|
|
701
|
-
|
|
702
667
|
def reverse_complement_assembly(
|
|
703
|
-
assembly: EdgeRepresentationAssembly, fragments: list[
|
|
668
|
+
assembly: EdgeRepresentationAssembly, fragments: list[Dseqrecord]
|
|
704
669
|
) -> EdgeRepresentationAssembly:
|
|
705
670
|
"""Complement an assembly, i.e. reverse the order of the fragments and the orientation of the overlaps."""
|
|
706
671
|
new_assembly = list()
|
|
@@ -714,7 +679,7 @@ def reverse_complement_assembly(
|
|
|
714
679
|
def filter_linear_subassemblies(
|
|
715
680
|
linear_assemblies: list[EdgeRepresentationAssembly],
|
|
716
681
|
circular_assemblies: list[EdgeRepresentationAssembly],
|
|
717
|
-
fragments: list[
|
|
682
|
+
fragments: list[Dseqrecord],
|
|
718
683
|
) -> list[EdgeRepresentationAssembly]:
|
|
719
684
|
"""Remove linear assemblies which are sub-assemblies of circular assemblies"""
|
|
720
685
|
all_circular_assemblies = circular_assemblies + [
|
|
@@ -773,7 +738,7 @@ def assembly2str_tuple(assembly: EdgeRepresentationAssembly) -> str:
|
|
|
773
738
|
|
|
774
739
|
|
|
775
740
|
def assembly_has_mismatches(
|
|
776
|
-
fragments: list[
|
|
741
|
+
fragments: list[Dseqrecord], assembly: EdgeRepresentationAssembly
|
|
777
742
|
) -> bool:
|
|
778
743
|
"""Check if an assembly has mismatches. This should never happen and if so it returns an error."""
|
|
779
744
|
for u, v, loc_u, loc_v in assembly:
|
|
@@ -789,7 +754,7 @@ def assembly_has_mismatches(
|
|
|
789
754
|
|
|
790
755
|
|
|
791
756
|
def assembly_is_circular(
|
|
792
|
-
assembly: EdgeRepresentationAssembly, fragments: list[
|
|
757
|
+
assembly: EdgeRepresentationAssembly, fragments: list[Dseqrecord]
|
|
793
758
|
) -> bool:
|
|
794
759
|
"""
|
|
795
760
|
Based on the topology of the locations of an assembly, determine if it is circular.
|
|
@@ -798,22 +763,22 @@ def assembly_is_circular(
|
|
|
798
763
|
if assembly[0][0] != assembly[-1][1]:
|
|
799
764
|
return False
|
|
800
765
|
elif (
|
|
801
|
-
isinstance(fragments[abs(assembly[0][0]) - 1],
|
|
766
|
+
isinstance(fragments[abs(assembly[0][0]) - 1], Dseqrecord)
|
|
802
767
|
and fragments[abs(assembly[0][0]) - 1].circular
|
|
803
768
|
):
|
|
804
769
|
return True
|
|
805
770
|
else:
|
|
806
771
|
return (
|
|
807
|
-
|
|
808
|
-
>
|
|
772
|
+
location_boundaries(assembly[0][2])[0]
|
|
773
|
+
> location_boundaries(assembly[-1][3])[0]
|
|
809
774
|
)
|
|
810
775
|
|
|
811
776
|
|
|
812
777
|
def assemble(
|
|
813
|
-
fragments: list[
|
|
778
|
+
fragments: list[Dseqrecord],
|
|
814
779
|
assembly: EdgeRepresentationAssembly,
|
|
815
780
|
is_insertion: bool = False,
|
|
816
|
-
) ->
|
|
781
|
+
) -> Dseqrecord:
|
|
817
782
|
"""Generate a Dseqrecord from an assembly and a list of fragments."""
|
|
818
783
|
|
|
819
784
|
if is_insertion:
|
|
@@ -830,14 +795,15 @@ def assemble(
|
|
|
830
795
|
u, v, loc_u, loc_v = asm_edge
|
|
831
796
|
f_u = fragments[u - 1] if u > 0 else fragments[-u - 1].reverse_complement()
|
|
832
797
|
f_v = fragments[v - 1] if v > 0 else fragments[-v - 1].reverse_complement()
|
|
833
|
-
seq_u = str(loc_u.extract(f_u).seq)
|
|
834
|
-
seq_v = str(loc_v.extract(f_v).seq
|
|
835
|
-
if seq_u
|
|
798
|
+
seq_u = str(loc_u.extract(f_u).seq)
|
|
799
|
+
seq_v = str(loc_v.extract(f_v).seq.rc())
|
|
800
|
+
# Test if seq_u and seq_v anneal
|
|
801
|
+
if not anneal_strands(seq_u, seq_v):
|
|
836
802
|
raise ValueError("Mismatch in assembly")
|
|
837
803
|
|
|
838
804
|
# We transform into Dseqrecords (for primers)
|
|
839
805
|
dseqr_fragments = [
|
|
840
|
-
f if isinstance(f,
|
|
806
|
+
f if isinstance(f, Dseqrecord) else Dseqrecord(f) for f in fragments
|
|
841
807
|
]
|
|
842
808
|
subfragments = get_assembly_subfragments(
|
|
843
809
|
dseqr_fragments, subfragment_representation
|
|
@@ -845,42 +811,23 @@ def assemble(
|
|
|
845
811
|
|
|
846
812
|
# Length of the overlaps between consecutive assembly fragments
|
|
847
813
|
fragment_overlaps = [len(e[-1]) for e in assembly]
|
|
814
|
+
out_dseqrecord = subfragments.pop(0)
|
|
848
815
|
|
|
849
|
-
|
|
816
|
+
for fragment, overlap in zip(subfragments, fragment_overlaps):
|
|
817
|
+
out_dseqrecord.seq = out_dseqrecord.seq.cast_to_ds_right()
|
|
818
|
+
out_dseqrecord.seq = out_dseqrecord.seq.exo1_end(overlap)
|
|
819
|
+
fragment.seq = fragment.seq.cast_to_ds_left()
|
|
820
|
+
fragment.seq = fragment.seq.exo1_front(overlap)
|
|
821
|
+
out_dseqrecord += fragment
|
|
850
822
|
|
|
851
|
-
|
|
852
|
-
# Shift the features of the right fragment to the left by ``overlap``
|
|
853
|
-
new_features = [
|
|
854
|
-
f._shift(len(out_dseqrecord) - overlap) for f in fragment.features
|
|
855
|
-
]
|
|
856
|
-
# Join the left sequence including the overlap with the right sequence without the overlap
|
|
857
|
-
# we use fill_right / fill_left so that it works for ligation of sticky ends
|
|
858
|
-
out_dseqrecord = _Dseqrecord(
|
|
859
|
-
fill_right(out_dseqrecord.seq) + fill_left(fragment.seq)[overlap:],
|
|
860
|
-
features=out_dseqrecord.features + new_features,
|
|
861
|
-
)
|
|
862
|
-
|
|
863
|
-
# For circular assemblies, close the loop and wrap origin-spanning features
|
|
823
|
+
# For circular assemblies, process the fragment and loop
|
|
864
824
|
if is_circular:
|
|
825
|
+
out_dseqrecord.seq = out_dseqrecord.seq.cast_to_ds_left()
|
|
826
|
+
out_dseqrecord.seq = out_dseqrecord.seq.cast_to_ds_right()
|
|
865
827
|
overlap = fragment_overlaps[-1]
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
out_dseqrecord = out_dseqrecord.looped()
|
|
870
|
-
else:
|
|
871
|
-
# Remove trailing overlap
|
|
872
|
-
out_dseqrecord = _Dseqrecord(
|
|
873
|
-
fill_dseq(out_dseqrecord.seq)[:-overlap],
|
|
874
|
-
features=out_dseqrecord.features,
|
|
875
|
-
circular=True,
|
|
876
|
-
)
|
|
877
|
-
for feature in out_dseqrecord.features:
|
|
878
|
-
start, end = _location_boundaries(feature.location)
|
|
879
|
-
if start >= len(out_dseqrecord) or end > len(out_dseqrecord):
|
|
880
|
-
# Wrap around the origin
|
|
881
|
-
feature.location = _shift_location(
|
|
882
|
-
feature.location, 0, len(out_dseqrecord)
|
|
883
|
-
)
|
|
828
|
+
out_dseqrecord.seq = out_dseqrecord.seq.exo1_front(overlap)
|
|
829
|
+
out_dseqrecord.seq = out_dseqrecord.seq.exo1_end(overlap)
|
|
830
|
+
out_dseqrecord = out_dseqrecord.looped()
|
|
884
831
|
|
|
885
832
|
out_dseqrecord.source = AssemblySource.from_subfragment_representation(
|
|
886
833
|
subfragment_representation, fragments, is_circular
|
|
@@ -889,8 +836,8 @@ def assemble(
|
|
|
889
836
|
|
|
890
837
|
|
|
891
838
|
def annotate_primer_binding_sites(
|
|
892
|
-
input_dseqr:
|
|
893
|
-
) ->
|
|
839
|
+
input_dseqr: Dseqrecord, fragments: list[Dseqrecord]
|
|
840
|
+
) -> Dseqrecord:
|
|
894
841
|
"""Annotate the primer binding sites in a Dseqrecord."""
|
|
895
842
|
fwd, _, rvs = fragments
|
|
896
843
|
start_rvs = len(input_dseqr) - len(rvs)
|
|
@@ -970,9 +917,9 @@ def subfragment_representation2edge_representation(
|
|
|
970
917
|
|
|
971
918
|
|
|
972
919
|
def get_assembly_subfragments(
|
|
973
|
-
fragments: list[
|
|
920
|
+
fragments: list[Dseqrecord],
|
|
974
921
|
subfragment_representation: SubFragmentRepresentationAssembly,
|
|
975
|
-
) -> list[
|
|
922
|
+
) -> list[Dseqrecord]:
|
|
976
923
|
"""From the fragment representation returned by edge_representation2subfragment_representation, get the subfragments that are joined together.
|
|
977
924
|
|
|
978
925
|
Subfragments are the slices of the fragments that are joined together
|
|
@@ -1013,19 +960,26 @@ def get_assembly_subfragments(
|
|
|
1013
960
|
|
|
1014
961
|
|
|
1015
962
|
def extract_subfragment(
|
|
1016
|
-
seq:
|
|
1017
|
-
) ->
|
|
963
|
+
seq: Dseqrecord, start_location: Location | None, end_location: Location | None
|
|
964
|
+
) -> Dseqrecord:
|
|
1018
965
|
"""Extract a subfragment from a sequence for an assembly, given the start and end locations of the subfragment."""
|
|
1019
|
-
|
|
1020
|
-
|
|
966
|
+
|
|
967
|
+
if seq.circular and (start_location is None or end_location is None):
|
|
968
|
+
raise ValueError(
|
|
969
|
+
"Start and end locations cannot be None for circular sequences"
|
|
970
|
+
)
|
|
971
|
+
# This could be used to have consistent behaviour for circular sequences, where the start is arbitrary. However,
|
|
972
|
+
# they should never get None, so this is not used.
|
|
973
|
+
# if start_location is None:
|
|
974
|
+
# start_location = end_location
|
|
975
|
+
# elif end_location is None:
|
|
976
|
+
# end_location = start_location
|
|
977
|
+
|
|
978
|
+
start = 0 if start_location is None else location_boundaries(start_location)[0]
|
|
979
|
+
end = None if end_location is None else location_boundaries(end_location)[1]
|
|
1021
980
|
|
|
1022
981
|
# Special case, some of it could be handled by better Dseqrecord slicing in the future
|
|
1023
|
-
if (
|
|
1024
|
-
seq.circular
|
|
1025
|
-
and start_location is not None
|
|
1026
|
-
and end_location is not None
|
|
1027
|
-
and _locations_overlap(start_location, end_location, len(seq))
|
|
1028
|
-
):
|
|
982
|
+
if seq.circular and locations_overlap(start_location, end_location, len(seq)):
|
|
1029
983
|
# The overhang is different for origin-spanning features, for instance
|
|
1030
984
|
# for a feature join{[12:13], [0:3]} in a sequence of length 13, the overhang
|
|
1031
985
|
# is -4, not 9
|
|
@@ -1035,7 +989,7 @@ def extract_subfragment(
|
|
|
1035
989
|
ovhg = 0
|
|
1036
990
|
dummy_cut = ((start, ovhg), None)
|
|
1037
991
|
open_seq = seq.apply_cut(dummy_cut, dummy_cut)
|
|
1038
|
-
return
|
|
992
|
+
return Dseqrecord(open_seq.seq.cast_to_ds(), features=open_seq.features)
|
|
1039
993
|
|
|
1040
994
|
return seq[start:end]
|
|
1041
995
|
|
|
@@ -1178,14 +1132,15 @@ class Assembly:
|
|
|
1178
1132
|
|
|
1179
1133
|
def __init__(
|
|
1180
1134
|
self,
|
|
1181
|
-
frags: list[
|
|
1135
|
+
frags: list[Dseqrecord],
|
|
1182
1136
|
limit: int = 25,
|
|
1183
1137
|
algorithm: AssemblyAlgorithmType = common_sub_strings,
|
|
1184
1138
|
use_fragment_order: bool = True,
|
|
1185
1139
|
use_all_fragments: bool = False,
|
|
1186
1140
|
):
|
|
1141
|
+
|
|
1187
1142
|
# TODO: allow for the same fragment to be included more than once?
|
|
1188
|
-
self.G =
|
|
1143
|
+
self.G = nx.MultiDiGraph()
|
|
1189
1144
|
# Add positive and negative nodes for forward and reverse fragments
|
|
1190
1145
|
self.G.add_nodes_from((i + 1, {"seq": f}) for (i, f) in enumerate(frags))
|
|
1191
1146
|
self.G.add_nodes_from(
|
|
@@ -1193,12 +1148,12 @@ class Assembly:
|
|
|
1193
1148
|
)
|
|
1194
1149
|
|
|
1195
1150
|
# Iterate over all possible combinations of fragments
|
|
1196
|
-
fragment_pairs =
|
|
1151
|
+
fragment_pairs = itertools.combinations(
|
|
1197
1152
|
filter(lambda x: x > 0, self.G.nodes), 2
|
|
1198
1153
|
)
|
|
1199
1154
|
for i, j in fragment_pairs:
|
|
1200
1155
|
# All the relative orientations of the fragments in the pair
|
|
1201
|
-
for u, v in
|
|
1156
|
+
for u, v in itertools.product([i, -i], [j, -j]):
|
|
1202
1157
|
u_seq = self.G.nodes[u]["seq"]
|
|
1203
1158
|
v_seq = self.G.nodes[v]["seq"]
|
|
1204
1159
|
matches = algorithm(u_seq, v_seq, limit)
|
|
@@ -1216,7 +1171,7 @@ class Assembly:
|
|
|
1216
1171
|
@classmethod
|
|
1217
1172
|
def assembly_is_valid(
|
|
1218
1173
|
cls,
|
|
1219
|
-
fragments: list[
|
|
1174
|
+
fragments: list[Dseqrecord | Primer],
|
|
1220
1175
|
assembly: EdgeRepresentationAssembly,
|
|
1221
1176
|
is_circular: bool,
|
|
1222
1177
|
use_all_fragments: bool,
|
|
@@ -1232,6 +1187,23 @@ class Assembly:
|
|
|
1232
1187
|
if len(assembly) == 0:
|
|
1233
1188
|
return False
|
|
1234
1189
|
|
|
1190
|
+
# Topology check -> Circular sequences cannot be first or last in a linear assembly.
|
|
1191
|
+
# For example, let's imagine aACGTc (linear) and gACGTc (circular).
|
|
1192
|
+
# It should not be possible to join them into a linear assembly. It's similar if we
|
|
1193
|
+
# think of a restriction-ligation assembly, example: aGAATTCc (linear) and gGAATTCc
|
|
1194
|
+
# (circular).
|
|
1195
|
+
# A linear product can be generated where the circular molecule is cut open, and one end
|
|
1196
|
+
# it joins the linear molecule and on the other it's free, but for now it's not a
|
|
1197
|
+
# relevant product and it's excluded.
|
|
1198
|
+
first_fragment = fragments[abs(assembly[0][0]) - 1]
|
|
1199
|
+
last_fragment = fragments[abs(assembly[-1][1]) - 1]
|
|
1200
|
+
if not is_circular and (
|
|
1201
|
+
isinstance(first_fragment, Dseqrecord)
|
|
1202
|
+
and first_fragment.circular
|
|
1203
|
+
or (isinstance(last_fragment, Dseqrecord) and last_fragment.circular)
|
|
1204
|
+
):
|
|
1205
|
+
return False
|
|
1206
|
+
|
|
1235
1207
|
if use_all_fragments and len(fragments) != len(
|
|
1236
1208
|
set(flatten(map(abs, e[:2]) for e in assembly))
|
|
1237
1209
|
):
|
|
@@ -1269,8 +1241,8 @@ class Assembly:
|
|
|
1269
1241
|
# Incompatible as described in figure above
|
|
1270
1242
|
fragment = fragments[abs(v1) - 1]
|
|
1271
1243
|
if (
|
|
1272
|
-
isinstance(fragment,
|
|
1273
|
-
) and
|
|
1244
|
+
isinstance(fragment, Primer) or not fragment.circular
|
|
1245
|
+
) and location_boundaries(start_location)[1] >= location_boundaries(
|
|
1274
1246
|
end_location
|
|
1275
1247
|
)[
|
|
1276
1248
|
1
|
|
@@ -1294,8 +1266,8 @@ class Assembly:
|
|
|
1294
1266
|
match: SequenceOverlap,
|
|
1295
1267
|
u: int,
|
|
1296
1268
|
v: int,
|
|
1297
|
-
first:
|
|
1298
|
-
secnd:
|
|
1269
|
+
first: Dseqrecord,
|
|
1270
|
+
secnd: Dseqrecord,
|
|
1299
1271
|
):
|
|
1300
1272
|
"""Add edges to the graph from a match returned by the ``algorithm`` function (see pydna.common_substrings). For
|
|
1301
1273
|
format of edges (see documentation of the Assembly class).
|
|
@@ -1314,10 +1286,10 @@ class Assembly:
|
|
|
1314
1286
|
else:
|
|
1315
1287
|
# We use shift_location with 0 to wrap origin-spanning features
|
|
1316
1288
|
locs = [
|
|
1317
|
-
|
|
1289
|
+
shift_location(
|
|
1318
1290
|
SimpleLocation(x_start, x_start + length), 0, len(first)
|
|
1319
1291
|
),
|
|
1320
|
-
|
|
1292
|
+
shift_location(
|
|
1321
1293
|
SimpleLocation(y_start, y_start + length), 0, len(secnd)
|
|
1322
1294
|
),
|
|
1323
1295
|
]
|
|
@@ -1352,7 +1324,7 @@ class Assembly:
|
|
|
1352
1324
|
"""
|
|
1353
1325
|
|
|
1354
1326
|
# Copy the graph since we will add the begin and end mock nodes
|
|
1355
|
-
G =
|
|
1327
|
+
G = nx.MultiDiGraph(self.G)
|
|
1356
1328
|
G.add_nodes_from(["begin", "end"])
|
|
1357
1329
|
|
|
1358
1330
|
if self.use_fragment_order:
|
|
@@ -1390,7 +1362,7 @@ class Assembly:
|
|
|
1390
1362
|
def node_path2assembly_list(
|
|
1391
1363
|
self, cycle: list[int], circular: bool
|
|
1392
1364
|
) -> list[EdgeRepresentationAssembly]:
|
|
1393
|
-
"""Convert a node path in the format [1, 2, 3] (as returned by
|
|
1365
|
+
"""Convert a node path in the format [1, 2, 3] (as returned by networkx.cycles.simple_cycles) to a list of all
|
|
1394
1366
|
possible assemblies.
|
|
1395
1367
|
|
|
1396
1368
|
There may be multiple assemblies for a given node path, if there are several edges connecting two nodes,
|
|
@@ -1404,11 +1376,11 @@ class Assembly:
|
|
|
1404
1376
|
combine.append([(u, v, key) for key in self.G[u][v]])
|
|
1405
1377
|
return [
|
|
1406
1378
|
tuple(map(self.format_assembly_edge, x))
|
|
1407
|
-
for x in
|
|
1379
|
+
for x in itertools.product(*combine)
|
|
1408
1380
|
]
|
|
1409
1381
|
|
|
1410
1382
|
def get_unique_linear_paths(
|
|
1411
|
-
self, G_with_begin_end:
|
|
1383
|
+
self, G_with_begin_end: nx.MultiDiGraph, max_paths=10000
|
|
1412
1384
|
) -> list[list[int]]:
|
|
1413
1385
|
"""Get unique linear paths from the graph, removing those that contain the same node twice."""
|
|
1414
1386
|
# We remove the begin and end nodes, and get all paths without edges
|
|
@@ -1419,8 +1391,8 @@ class Assembly:
|
|
|
1419
1391
|
node_paths = [
|
|
1420
1392
|
x[1:-1]
|
|
1421
1393
|
for x in limit_iterator(
|
|
1422
|
-
|
|
1423
|
-
|
|
1394
|
+
nx.all_simple_paths(
|
|
1395
|
+
nx.DiGraph(G_with_begin_end),
|
|
1424
1396
|
"begin",
|
|
1425
1397
|
"end",
|
|
1426
1398
|
cutoff=(len(self.fragments) + 1),
|
|
@@ -1469,7 +1441,7 @@ class Assembly:
|
|
|
1469
1441
|
sorted_cycles = map(
|
|
1470
1442
|
circular_permutation_min_abs,
|
|
1471
1443
|
limit_iterator(
|
|
1472
|
-
|
|
1444
|
+
nx.cycles.simple_cycles(self.G, length_bound=len(self.fragments)),
|
|
1473
1445
|
10000,
|
|
1474
1446
|
),
|
|
1475
1447
|
)
|
|
@@ -1534,8 +1506,8 @@ class Assembly:
|
|
|
1534
1506
|
fragment = self.fragments[abs(v1) - 1]
|
|
1535
1507
|
# Find the pair of edges that should be last and first ((3, 1, [8:10], [9:11)]), (1, 2, [4:6], [0:2]) in
|
|
1536
1508
|
# the example above. Only one of the pairs of edges should satisfy this condition for the topology to make sense.
|
|
1537
|
-
left_of_insertion =
|
|
1538
|
-
right_of_insertion =
|
|
1509
|
+
left_of_insertion = location_boundaries(start_location)[0]
|
|
1510
|
+
right_of_insertion = location_boundaries(end_location)[0]
|
|
1539
1511
|
if not fragment.circular and (
|
|
1540
1512
|
right_of_insertion >= left_of_insertion
|
|
1541
1513
|
# The below condition is for single-site integration.
|
|
@@ -1547,7 +1519,7 @@ class Assembly:
|
|
|
1547
1519
|
#
|
|
1548
1520
|
# The locations of homology on the genome are [0:10] and [2:12], so not identical
|
|
1549
1521
|
# but they overlap.
|
|
1550
|
-
or
|
|
1522
|
+
or locations_overlap(start_location, end_location, len(fragment))
|
|
1551
1523
|
):
|
|
1552
1524
|
edge_pair_index.append(i)
|
|
1553
1525
|
|
|
@@ -1578,13 +1550,13 @@ class Assembly:
|
|
|
1578
1550
|
fragment1 = self.fragments[abs(f1) - 1]
|
|
1579
1551
|
fragment2 = self.fragments[abs(f2) - 1]
|
|
1580
1552
|
|
|
1581
|
-
if not
|
|
1553
|
+
if not locations_overlap(
|
|
1582
1554
|
loc_f1_1, loc_f1_2, len(fragment1)
|
|
1583
|
-
) or not
|
|
1555
|
+
) or not locations_overlap(loc_f2_2, loc_f2_1, len(fragment2)):
|
|
1584
1556
|
return same_assembly
|
|
1585
1557
|
|
|
1586
1558
|
# Sort to make compatible with insertion assembly
|
|
1587
|
-
if
|
|
1559
|
+
if location_boundaries(loc_f1_1)[0] > location_boundaries(loc_f1_2)[0]:
|
|
1588
1560
|
new_assembly = same_assembly[::-1]
|
|
1589
1561
|
else:
|
|
1590
1562
|
new_assembly = same_assembly[:]
|
|
@@ -1597,10 +1569,10 @@ class Assembly:
|
|
|
1597
1569
|
fragment2 = self.fragments[abs(f2) - 1]
|
|
1598
1570
|
|
|
1599
1571
|
# Extract boundaries
|
|
1600
|
-
f2_1_start, _ =
|
|
1601
|
-
f2_2_start, f2_2_end =
|
|
1602
|
-
f1_1_start, _ =
|
|
1603
|
-
f1_2_start, f1_2_end =
|
|
1572
|
+
f2_1_start, _ = location_boundaries(loc_f2_1)
|
|
1573
|
+
f2_2_start, f2_2_end = location_boundaries(loc_f2_2)
|
|
1574
|
+
f1_1_start, _ = location_boundaries(loc_f1_1)
|
|
1575
|
+
f1_2_start, f1_2_end = location_boundaries(loc_f1_2)
|
|
1604
1576
|
|
|
1605
1577
|
overlap_diff = len(fragment1[f1_1_start:f1_2_end]) - len(
|
|
1606
1578
|
fragment2[f2_1_start:f2_2_end]
|
|
@@ -1640,7 +1612,7 @@ class Assembly:
|
|
|
1640
1612
|
"only_adjacent_edges not implemented for insertion assemblies"
|
|
1641
1613
|
)
|
|
1642
1614
|
|
|
1643
|
-
cycles = limit_iterator(
|
|
1615
|
+
cycles = limit_iterator(nx.cycles.simple_cycles(self.G), 10000)
|
|
1644
1616
|
|
|
1645
1617
|
# We apply constrains already here because sometimes the combinatorial explosion is too large
|
|
1646
1618
|
if self.use_all_fragments:
|
|
@@ -1659,7 +1631,7 @@ class Assembly:
|
|
|
1659
1631
|
)
|
|
1660
1632
|
|
|
1661
1633
|
# We find cycles first
|
|
1662
|
-
iterator = limit_iterator(
|
|
1634
|
+
iterator = limit_iterator(nx.cycles.simple_cycles(self.G), 10000)
|
|
1663
1635
|
assemblies = sum(
|
|
1664
1636
|
map(lambda x: self.node_path2assembly_list(x, True), iterator), []
|
|
1665
1637
|
)
|
|
@@ -1683,21 +1655,19 @@ class Assembly:
|
|
|
1683
1655
|
|
|
1684
1656
|
def assemble_linear(
|
|
1685
1657
|
self, only_adjacent_edges: bool = False, max_assemblies: int = 50
|
|
1686
|
-
) -> list[
|
|
1658
|
+
) -> list[Dseqrecord]:
|
|
1687
1659
|
"""Assemble linear constructs, from assemblies returned by self.get_linear_assemblies."""
|
|
1688
1660
|
assemblies = self.get_linear_assemblies(only_adjacent_edges, max_assemblies)
|
|
1689
1661
|
return [assemble(self.fragments, a) for a in assemblies]
|
|
1690
1662
|
|
|
1691
1663
|
def assemble_circular(
|
|
1692
1664
|
self, only_adjacent_edges: bool = False, max_assemblies: int = 50
|
|
1693
|
-
) -> list[
|
|
1665
|
+
) -> list[Dseqrecord]:
|
|
1694
1666
|
"""Assemble circular constructs, from assemblies returned by self.get_circular_assemblies."""
|
|
1695
1667
|
assemblies = self.get_circular_assemblies(only_adjacent_edges, max_assemblies)
|
|
1696
1668
|
return [assemble(self.fragments, a) for a in assemblies]
|
|
1697
1669
|
|
|
1698
|
-
def assemble_insertion(
|
|
1699
|
-
self, only_adjacent_edges: bool = False
|
|
1700
|
-
) -> list[_Dseqrecord]:
|
|
1670
|
+
def assemble_insertion(self, only_adjacent_edges: bool = False) -> list[Dseqrecord]:
|
|
1701
1671
|
"""Assemble insertion constructs, from assemblies returned by self.get_insertion_assemblies."""
|
|
1702
1672
|
assemblies = self.get_insertion_assemblies(only_adjacent_edges)
|
|
1703
1673
|
return [assemble(self.fragments, a, is_insertion=True) for a in assemblies]
|
|
@@ -1739,10 +1709,10 @@ class Assembly:
|
|
|
1739
1709
|
if edge_location not in this_dict[key]:
|
|
1740
1710
|
this_dict[key].append(edge_location)
|
|
1741
1711
|
this_dict["left"] = sorted(
|
|
1742
|
-
this_dict["left"], key=lambda x:
|
|
1712
|
+
this_dict["left"], key=lambda x: location_boundaries(x)[0]
|
|
1743
1713
|
)
|
|
1744
1714
|
this_dict["right"] = sorted(
|
|
1745
|
-
this_dict["right"], key=lambda x:
|
|
1715
|
+
this_dict["right"], key=lambda x: location_boundaries(x)[0]
|
|
1746
1716
|
)
|
|
1747
1717
|
locations_on_fragments[node] = this_dict
|
|
1748
1718
|
|
|
@@ -1789,7 +1759,7 @@ class Assembly:
|
|
|
1789
1759
|
|
|
1790
1760
|
pairs = list()
|
|
1791
1761
|
for pair in zip(left, right):
|
|
1792
|
-
pairs += list(
|
|
1762
|
+
pairs += list(itertools.product(*pair))
|
|
1793
1763
|
allowed_location_pairs[node] = pairs
|
|
1794
1764
|
|
|
1795
1765
|
fragment_assembly = edge_representation2subfragment_representation(
|
|
@@ -1802,7 +1772,7 @@ class Assembly:
|
|
|
1802
1772
|
|
|
1803
1773
|
def __repr__(self):
|
|
1804
1774
|
# https://pyformat.info
|
|
1805
|
-
return
|
|
1775
|
+
return ps(
|
|
1806
1776
|
"Assembly\n"
|
|
1807
1777
|
"fragments..: {sequences}\n"
|
|
1808
1778
|
"limit(bp)..: {limit}\n"
|
|
@@ -1823,7 +1793,7 @@ class PCRAssembly(Assembly):
|
|
|
1823
1793
|
the number of mismatches allowed in the overlap. Only supports substitution mismatches, not indels.
|
|
1824
1794
|
"""
|
|
1825
1795
|
|
|
1826
|
-
def __init__(self, frags: list[
|
|
1796
|
+
def __init__(self, frags: list[Dseqrecord | Primer], limit=25, mismatches=0):
|
|
1827
1797
|
|
|
1828
1798
|
value_error = ValueError(
|
|
1829
1799
|
"PCRAssembly assembly must be initialised with a list/tuple of primer, template, primer"
|
|
@@ -1833,15 +1803,15 @@ class PCRAssembly(Assembly):
|
|
|
1833
1803
|
|
|
1834
1804
|
# Validate the inputs: should be a series of primer, template, primer
|
|
1835
1805
|
wrong_fragment_class = (
|
|
1836
|
-
not isinstance(frags[0],
|
|
1837
|
-
isinstance(frags[1],
|
|
1838
|
-
not isinstance(frags[2],
|
|
1806
|
+
not isinstance(frags[0], Primer),
|
|
1807
|
+
isinstance(frags[1], Primer),
|
|
1808
|
+
not isinstance(frags[2], Primer),
|
|
1839
1809
|
)
|
|
1840
1810
|
if any(wrong_fragment_class):
|
|
1841
1811
|
raise value_error
|
|
1842
1812
|
|
|
1843
1813
|
# TODO: allow for the same fragment to be included more than once?
|
|
1844
|
-
self.G =
|
|
1814
|
+
self.G = nx.MultiDiGraph()
|
|
1845
1815
|
# Add positive and negative nodes for forward and reverse fragments
|
|
1846
1816
|
self.G.add_nodes_from((i + 1, {"seq": f}) for (i, f) in enumerate(frags))
|
|
1847
1817
|
self.G.add_nodes_from(
|
|
@@ -1854,8 +1824,8 @@ class PCRAssembly(Assembly):
|
|
|
1854
1824
|
# primer, template, primer
|
|
1855
1825
|
p1, t, p2 = (i + 1, i + 2, i + 3)
|
|
1856
1826
|
primer_ids += [p1, p2]
|
|
1857
|
-
pairs += list(
|
|
1858
|
-
pairs += list(
|
|
1827
|
+
pairs += list(itertools.product([p1, p2], [t, -t]))
|
|
1828
|
+
pairs += list(itertools.product([t, -t], [-p1, -p2]))
|
|
1859
1829
|
|
|
1860
1830
|
for u, v in pairs:
|
|
1861
1831
|
u_seq = self.G.nodes[u]["seq"]
|
|
@@ -1894,20 +1864,33 @@ class PCRAssembly(Assembly):
|
|
|
1894
1864
|
"get_insertion_assemblies not implemented for PCR assemblies"
|
|
1895
1865
|
)
|
|
1896
1866
|
|
|
1867
|
+
def assemble_linear(
|
|
1868
|
+
self, only_adjacent_edges: bool = False, max_assemblies: int = 50
|
|
1869
|
+
) -> list[Dseqrecord]:
|
|
1870
|
+
"""
|
|
1871
|
+
Overrides the parent method to ensure that the 5' of the crick strand of the product matches the
|
|
1872
|
+
sequence of the reverse primer. This is important when using primers with dUTP (for USER cloning).
|
|
1873
|
+
"""
|
|
1874
|
+
results = super().assemble_linear(only_adjacent_edges, max_assemblies)
|
|
1875
|
+
for result in results:
|
|
1876
|
+
rp = self.fragments[2]
|
|
1877
|
+
result.seq = result.seq[: -len(rp)] + Dseq(str(rp.seq.rc()))
|
|
1878
|
+
return results
|
|
1879
|
+
|
|
1897
1880
|
|
|
1898
1881
|
class SingleFragmentAssembly(Assembly):
|
|
1899
1882
|
"""
|
|
1900
1883
|
An assembly that represents the circularisation or splicing of a single fragment.
|
|
1901
1884
|
"""
|
|
1902
1885
|
|
|
1903
|
-
def __init__(self, frags: [
|
|
1886
|
+
def __init__(self, frags: [Dseqrecord], limit=25, algorithm=common_sub_strings):
|
|
1904
1887
|
|
|
1905
1888
|
if len(frags) != 1:
|
|
1906
1889
|
raise ValueError(
|
|
1907
1890
|
"SingleFragmentAssembly assembly must be initialised with a single fragment"
|
|
1908
1891
|
)
|
|
1909
1892
|
# TODO: allow for the same fragment to be included more than once?
|
|
1910
|
-
self.G =
|
|
1893
|
+
self.G = nx.MultiDiGraph()
|
|
1911
1894
|
frag = frags[0]
|
|
1912
1895
|
# Add positive and negative nodes for forward and reverse fragments
|
|
1913
1896
|
self.G.add_node(1, seq=frag)
|
|
@@ -1958,8 +1941,8 @@ class SingleFragmentAssembly(Assembly):
|
|
|
1958
1941
|
if x[0][2] == x[0][3]:
|
|
1959
1942
|
return False
|
|
1960
1943
|
# We don't want to get overlap only (e.g. GAATTCcatGAATTC giving GAATTC)
|
|
1961
|
-
left_start, _ =
|
|
1962
|
-
_, right_end =
|
|
1944
|
+
left_start, _ = location_boundaries(x[0][2])
|
|
1945
|
+
_, right_end = location_boundaries(x[0][3])
|
|
1963
1946
|
if left_start == 0 and right_end == len(self.fragments[0]):
|
|
1964
1947
|
return False
|
|
1965
1948
|
return True
|
|
@@ -1982,18 +1965,19 @@ class SingleFragmentAssembly(Assembly):
|
|
|
1982
1965
|
|
|
1983
1966
|
|
|
1984
1967
|
def common_function_assembly_products(
|
|
1985
|
-
frags: list[
|
|
1968
|
+
frags: list[Dseqrecord],
|
|
1986
1969
|
limit: int | None,
|
|
1987
1970
|
algorithm: Callable,
|
|
1988
1971
|
circular_only: bool,
|
|
1989
1972
|
filter_results_function: Callable | None = None,
|
|
1990
|
-
|
|
1973
|
+
only_adjacent_edges: bool = False,
|
|
1974
|
+
) -> list[Dseqrecord]:
|
|
1991
1975
|
"""Common function to avoid code duplication. Could be simplified further
|
|
1992
1976
|
once SingleFragmentAssembly and Assembly are merged.
|
|
1993
1977
|
|
|
1994
1978
|
Parameters
|
|
1995
1979
|
----------
|
|
1996
|
-
frags : list[
|
|
1980
|
+
frags : list[Dseqrecord]
|
|
1997
1981
|
List of DNA fragments to assemble
|
|
1998
1982
|
limit : int or None
|
|
1999
1983
|
Minimum overlap length required, or None if not applicable
|
|
@@ -2001,10 +1985,14 @@ def common_function_assembly_products(
|
|
|
2001
1985
|
Function that determines valid overlaps between fragments
|
|
2002
1986
|
circular_only : bool
|
|
2003
1987
|
If True, only return circular assemblies
|
|
1988
|
+
filter_results_function : Callable or None
|
|
1989
|
+
Function that filters the results
|
|
1990
|
+
only_adjacent_edges : bool
|
|
1991
|
+
If True, only return assemblies that use only adjacent edges
|
|
2004
1992
|
|
|
2005
1993
|
Returns
|
|
2006
1994
|
-------
|
|
2007
|
-
list[
|
|
1995
|
+
list[Dseqrecord]
|
|
2008
1996
|
List of assembled DNA molecules
|
|
2009
1997
|
"""
|
|
2010
1998
|
if len(frags) == 1:
|
|
@@ -2013,10 +2001,10 @@ def common_function_assembly_products(
|
|
|
2013
2001
|
asm = Assembly(
|
|
2014
2002
|
frags, limit, algorithm, use_fragment_order=False, use_all_fragments=True
|
|
2015
2003
|
)
|
|
2016
|
-
output_assemblies = asm.get_circular_assemblies()
|
|
2004
|
+
output_assemblies = asm.get_circular_assemblies(only_adjacent_edges)
|
|
2017
2005
|
if not circular_only and len(frags) > 1:
|
|
2018
2006
|
output_assemblies += filter_linear_subassemblies(
|
|
2019
|
-
asm.get_linear_assemblies(), output_assemblies, frags
|
|
2007
|
+
asm.get_linear_assemblies(only_adjacent_edges), output_assemblies, frags
|
|
2020
2008
|
)
|
|
2021
2009
|
if not circular_only and len(frags) == 1:
|
|
2022
2010
|
output_assemblies += asm.get_insertion_assemblies()
|
|
@@ -2028,28 +2016,28 @@ def common_function_assembly_products(
|
|
|
2028
2016
|
|
|
2029
2017
|
|
|
2030
2018
|
def _recast_sources(
|
|
2031
|
-
products: list[
|
|
2032
|
-
) -> list[
|
|
2019
|
+
products: list[Dseqrecord], source_cls, **extra_fields
|
|
2020
|
+
) -> list[Dseqrecord]:
|
|
2033
2021
|
"""Recast the `source` of each product to `source_cls` with optional extras.
|
|
2034
2022
|
|
|
2035
2023
|
This avoids repeating the same for-loop across many assembly functions.
|
|
2036
2024
|
"""
|
|
2037
2025
|
for prod in products:
|
|
2038
2026
|
prod.source = source_cls(
|
|
2039
|
-
**prod.source.
|
|
2027
|
+
**prod.source.to_unserialized_dict(),
|
|
2040
2028
|
**extra_fields,
|
|
2041
2029
|
)
|
|
2042
2030
|
return products
|
|
2043
2031
|
|
|
2044
2032
|
|
|
2045
2033
|
def gibson_assembly(
|
|
2046
|
-
frags: list[
|
|
2047
|
-
) -> list[
|
|
2034
|
+
frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
|
|
2035
|
+
) -> list[Dseqrecord]:
|
|
2048
2036
|
"""Returns the products for Gibson assembly.
|
|
2049
2037
|
|
|
2050
2038
|
Parameters
|
|
2051
2039
|
----------
|
|
2052
|
-
frags : list[
|
|
2040
|
+
frags : list[Dseqrecord]
|
|
2053
2041
|
List of DNA fragments to assemble
|
|
2054
2042
|
limit : int, optional
|
|
2055
2043
|
Minimum overlap length required, by default 25
|
|
@@ -2058,7 +2046,7 @@ def gibson_assembly(
|
|
|
2058
2046
|
|
|
2059
2047
|
Returns
|
|
2060
2048
|
-------
|
|
2061
|
-
list[
|
|
2049
|
+
list[Dseqrecord]
|
|
2062
2050
|
List of assembled DNA molecules
|
|
2063
2051
|
"""
|
|
2064
2052
|
|
|
@@ -2069,14 +2057,14 @@ def gibson_assembly(
|
|
|
2069
2057
|
|
|
2070
2058
|
|
|
2071
2059
|
def in_fusion_assembly(
|
|
2072
|
-
frags: list[
|
|
2073
|
-
) -> list[
|
|
2060
|
+
frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
|
|
2061
|
+
) -> list[Dseqrecord]:
|
|
2074
2062
|
"""Returns the products for in-fusion assembly. This is the same as Gibson
|
|
2075
2063
|
assembly, but with a different name.
|
|
2076
2064
|
|
|
2077
2065
|
Parameters
|
|
2078
2066
|
----------
|
|
2079
|
-
frags : list[
|
|
2067
|
+
frags : list[Dseqrecord]
|
|
2080
2068
|
List of DNA fragments to assemble
|
|
2081
2069
|
limit : int, optional
|
|
2082
2070
|
Minimum overlap length required, by default 25
|
|
@@ -2085,7 +2073,7 @@ def in_fusion_assembly(
|
|
|
2085
2073
|
|
|
2086
2074
|
Returns
|
|
2087
2075
|
-------
|
|
2088
|
-
list[
|
|
2076
|
+
list[Dseqrecord]
|
|
2089
2077
|
List of assembled DNA molecules
|
|
2090
2078
|
"""
|
|
2091
2079
|
|
|
@@ -2094,14 +2082,14 @@ def in_fusion_assembly(
|
|
|
2094
2082
|
|
|
2095
2083
|
|
|
2096
2084
|
def fusion_pcr_assembly(
|
|
2097
|
-
frags: list[
|
|
2098
|
-
) -> list[
|
|
2085
|
+
frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
|
|
2086
|
+
) -> list[Dseqrecord]:
|
|
2099
2087
|
"""Returns the products for fusion PCR assembly. This is the same as Gibson
|
|
2100
2088
|
assembly, but with a different name.
|
|
2101
2089
|
|
|
2102
2090
|
Parameters
|
|
2103
2091
|
----------
|
|
2104
|
-
frags : list[
|
|
2092
|
+
frags : list[Dseqrecord]
|
|
2105
2093
|
List of DNA fragments to assemble
|
|
2106
2094
|
limit : int, optional
|
|
2107
2095
|
Minimum overlap length required, by default 25
|
|
@@ -2110,7 +2098,7 @@ def fusion_pcr_assembly(
|
|
|
2110
2098
|
|
|
2111
2099
|
Returns
|
|
2112
2100
|
-------
|
|
2113
|
-
list[
|
|
2101
|
+
list[Dseqrecord]
|
|
2114
2102
|
List of assembled DNA molecules
|
|
2115
2103
|
"""
|
|
2116
2104
|
products = gibson_assembly(frags, limit)
|
|
@@ -2118,13 +2106,13 @@ def fusion_pcr_assembly(
|
|
|
2118
2106
|
|
|
2119
2107
|
|
|
2120
2108
|
def in_vivo_assembly(
|
|
2121
|
-
frags: list[
|
|
2122
|
-
) -> list[
|
|
2109
|
+
frags: list[Dseqrecord], limit: int = 25, circular_only: bool = False
|
|
2110
|
+
) -> list[Dseqrecord]:
|
|
2123
2111
|
"""Returns the products for in vivo assembly (IVA), which relies on homologous recombination between the fragments.
|
|
2124
2112
|
|
|
2125
2113
|
Parameters
|
|
2126
2114
|
----------
|
|
2127
|
-
frags : list[
|
|
2115
|
+
frags : list[Dseqrecord]
|
|
2128
2116
|
List of DNA fragments to assemble
|
|
2129
2117
|
limit : int, optional
|
|
2130
2118
|
Minimum overlap length required, by default 25
|
|
@@ -2133,7 +2121,7 @@ def in_vivo_assembly(
|
|
|
2133
2121
|
|
|
2134
2122
|
Returns
|
|
2135
2123
|
-------
|
|
2136
|
-
list[
|
|
2124
|
+
list[Dseqrecord]
|
|
2137
2125
|
List of assembled DNA molecules
|
|
2138
2126
|
"""
|
|
2139
2127
|
products = common_function_assembly_products(
|
|
@@ -2143,11 +2131,11 @@ def in_vivo_assembly(
|
|
|
2143
2131
|
|
|
2144
2132
|
|
|
2145
2133
|
def restriction_ligation_assembly(
|
|
2146
|
-
frags: list[
|
|
2147
|
-
enzymes: list["
|
|
2134
|
+
frags: list[Dseqrecord],
|
|
2135
|
+
enzymes: list["AbstractCut"],
|
|
2148
2136
|
allow_blunt: bool = True,
|
|
2149
2137
|
circular_only: bool = False,
|
|
2150
|
-
) -> list[
|
|
2138
|
+
) -> list[Dseqrecord]:
|
|
2151
2139
|
"""Returns the products for restriction ligation assembly:
|
|
2152
2140
|
|
|
2153
2141
|
- Finds cutsites in the fragments
|
|
@@ -2156,9 +2144,9 @@ def restriction_ligation_assembly(
|
|
|
2156
2144
|
|
|
2157
2145
|
Parameters
|
|
2158
2146
|
----------
|
|
2159
|
-
frags : list[
|
|
2147
|
+
frags : list[Dseqrecord]
|
|
2160
2148
|
List of DNA fragments to assemble
|
|
2161
|
-
enzymes : list[
|
|
2149
|
+
enzymes : list[AbstractCut]
|
|
2162
2150
|
List of restriction enzymes to use
|
|
2163
2151
|
allow_blunt : bool, optional
|
|
2164
2152
|
If True, allow blunt end ligations, by default True
|
|
@@ -2167,7 +2155,7 @@ def restriction_ligation_assembly(
|
|
|
2167
2155
|
|
|
2168
2156
|
Returns
|
|
2169
2157
|
-------
|
|
2170
|
-
list[
|
|
2158
|
+
list[Dseqrecord]
|
|
2171
2159
|
List of assembled DNA molecules
|
|
2172
2160
|
|
|
2173
2161
|
Examples
|
|
@@ -2214,7 +2202,7 @@ def restriction_ligation_assembly(
|
|
|
2214
2202
|
return restriction_ligation_overlap(x, y, enzymes, False, allow_blunt)
|
|
2215
2203
|
|
|
2216
2204
|
products = common_function_assembly_products(
|
|
2217
|
-
frags, None, algorithm_fn, circular_only
|
|
2205
|
+
frags, None, algorithm_fn, circular_only, only_adjacent_edges=True
|
|
2218
2206
|
)
|
|
2219
2207
|
return _recast_sources(
|
|
2220
2208
|
products, RestrictionAndLigationSource, restriction_enzymes=enzymes
|
|
@@ -2222,20 +2210,20 @@ def restriction_ligation_assembly(
|
|
|
2222
2210
|
|
|
2223
2211
|
|
|
2224
2212
|
def golden_gate_assembly(
|
|
2225
|
-
frags: list[
|
|
2226
|
-
enzymes: list["
|
|
2213
|
+
frags: list[Dseqrecord],
|
|
2214
|
+
enzymes: list["AbstractCut"],
|
|
2227
2215
|
allow_blunt: bool = True,
|
|
2228
2216
|
circular_only: bool = False,
|
|
2229
|
-
) -> list[
|
|
2217
|
+
) -> list[Dseqrecord]:
|
|
2230
2218
|
"""Returns the products for Golden Gate assembly. This is the same as
|
|
2231
2219
|
restriction ligation assembly, but with a different name. Check the documentation
|
|
2232
2220
|
for ``restriction_ligation_assembly`` for more details.
|
|
2233
2221
|
|
|
2234
2222
|
Parameters
|
|
2235
2223
|
----------
|
|
2236
|
-
frags : list[
|
|
2224
|
+
frags : list[Dseqrecord]
|
|
2237
2225
|
List of DNA fragments to assemble
|
|
2238
|
-
enzymes : list[
|
|
2226
|
+
enzymes : list[AbstractCut]
|
|
2239
2227
|
List of restriction enzymes to use
|
|
2240
2228
|
allow_blunt : bool, optional
|
|
2241
2229
|
If True, allow blunt end ligations, by default True
|
|
@@ -2244,7 +2232,7 @@ def golden_gate_assembly(
|
|
|
2244
2232
|
|
|
2245
2233
|
Returns
|
|
2246
2234
|
-------
|
|
2247
|
-
list[
|
|
2235
|
+
list[Dseqrecord]
|
|
2248
2236
|
List of assembled DNA molecules
|
|
2249
2237
|
|
|
2250
2238
|
Examples
|
|
@@ -2255,11 +2243,11 @@ def golden_gate_assembly(
|
|
|
2255
2243
|
|
|
2256
2244
|
|
|
2257
2245
|
def ligation_assembly(
|
|
2258
|
-
frags: list[
|
|
2246
|
+
frags: list[Dseqrecord],
|
|
2259
2247
|
allow_blunt: bool = False,
|
|
2260
2248
|
allow_partial_overlap: bool = False,
|
|
2261
2249
|
circular_only: bool = False,
|
|
2262
|
-
) -> list[
|
|
2250
|
+
) -> list[Dseqrecord]:
|
|
2263
2251
|
"""Returns the products for ligation assembly, as inputs pass the fragments (digested if needed) that
|
|
2264
2252
|
will be ligated.
|
|
2265
2253
|
|
|
@@ -2267,7 +2255,7 @@ def ligation_assembly(
|
|
|
2267
2255
|
|
|
2268
2256
|
Parameters
|
|
2269
2257
|
----------
|
|
2270
|
-
frags : list[
|
|
2258
|
+
frags : list[Dseqrecord]
|
|
2271
2259
|
List of DNA fragments to assemble
|
|
2272
2260
|
allow_blunt : bool, optional
|
|
2273
2261
|
If True, allow blunt end ligations, by default False
|
|
@@ -2278,7 +2266,7 @@ def ligation_assembly(
|
|
|
2278
2266
|
|
|
2279
2267
|
Returns
|
|
2280
2268
|
-------
|
|
2281
|
-
list[
|
|
2269
|
+
list[Dseqrecord]
|
|
2282
2270
|
List of assembled DNA molecules
|
|
2283
2271
|
|
|
2284
2272
|
|
|
@@ -2333,17 +2321,17 @@ def assembly_is_multi_site(asm: list[EdgeRepresentationAssembly]) -> bool:
|
|
|
2333
2321
|
|
|
2334
2322
|
|
|
2335
2323
|
def gateway_assembly(
|
|
2336
|
-
frags: list[
|
|
2324
|
+
frags: list[Dseqrecord],
|
|
2337
2325
|
reaction_type: Literal["BP", "LR"],
|
|
2338
2326
|
greedy: bool = False,
|
|
2339
2327
|
circular_only: bool = False,
|
|
2340
2328
|
multi_site_only: bool = False,
|
|
2341
|
-
) -> list[
|
|
2329
|
+
) -> list[Dseqrecord]:
|
|
2342
2330
|
"""Returns the products for Gateway assembly / Gateway cloning.
|
|
2343
2331
|
|
|
2344
2332
|
Parameters
|
|
2345
2333
|
----------
|
|
2346
|
-
frags : list[
|
|
2334
|
+
frags : list[Dseqrecord]
|
|
2347
2335
|
List of DNA fragments to assemble
|
|
2348
2336
|
reaction_type : Literal['BP', 'LR']
|
|
2349
2337
|
Type of Gateway reaction
|
|
@@ -2359,7 +2347,7 @@ def gateway_assembly(
|
|
|
2359
2347
|
|
|
2360
2348
|
Returns
|
|
2361
2349
|
-------
|
|
2362
|
-
list[
|
|
2350
|
+
list[Dseqrecord]
|
|
2363
2351
|
List of assembled DNA molecules
|
|
2364
2352
|
|
|
2365
2353
|
|
|
@@ -2446,13 +2434,13 @@ def gateway_assembly(
|
|
|
2446
2434
|
|
|
2447
2435
|
|
|
2448
2436
|
def common_function_integration_products(
|
|
2449
|
-
frags: list[
|
|
2450
|
-
) -> list[
|
|
2437
|
+
frags: list[Dseqrecord], limit: int | None, algorithm: Callable
|
|
2438
|
+
) -> list[Dseqrecord]:
|
|
2451
2439
|
"""Common function to avoid code duplication for integration products.
|
|
2452
2440
|
|
|
2453
2441
|
Parameters
|
|
2454
2442
|
----------
|
|
2455
|
-
frags : list[
|
|
2443
|
+
frags : list[Dseqrecord]
|
|
2456
2444
|
List of DNA fragments to integrate
|
|
2457
2445
|
limit : int or None
|
|
2458
2446
|
Minimum overlap length required, or None if not applicable
|
|
@@ -2461,7 +2449,7 @@ def common_function_integration_products(
|
|
|
2461
2449
|
|
|
2462
2450
|
Returns
|
|
2463
2451
|
-------
|
|
2464
|
-
list[
|
|
2452
|
+
list[Dseqrecord]
|
|
2465
2453
|
List of integrated DNA molecules
|
|
2466
2454
|
"""
|
|
2467
2455
|
if len(frags) == 1:
|
|
@@ -2482,27 +2470,27 @@ def common_function_integration_products(
|
|
|
2482
2470
|
|
|
2483
2471
|
|
|
2484
2472
|
def common_handle_insertion_fragments(
|
|
2485
|
-
genome:
|
|
2486
|
-
) -> list[
|
|
2473
|
+
genome: Dseqrecord, inserts: list[Dseqrecord]
|
|
2474
|
+
) -> list[Dseqrecord]:
|
|
2487
2475
|
"""Common function to handle / validate insertion fragments.
|
|
2488
2476
|
|
|
2489
2477
|
Parameters
|
|
2490
2478
|
----------
|
|
2491
|
-
genome :
|
|
2479
|
+
genome : Dseqrecord
|
|
2492
2480
|
Target genome sequence
|
|
2493
|
-
inserts : list[
|
|
2481
|
+
inserts : list[Dseqrecord] or Dseqrecord
|
|
2494
2482
|
DNA fragment(s) to insert
|
|
2495
2483
|
|
|
2496
2484
|
Returns
|
|
2497
2485
|
-------
|
|
2498
|
-
list[
|
|
2486
|
+
list[Dseqrecord]
|
|
2499
2487
|
List containing genome and insert fragments
|
|
2500
2488
|
"""
|
|
2501
|
-
if not isinstance(genome,
|
|
2489
|
+
if not isinstance(genome, Dseqrecord):
|
|
2502
2490
|
raise ValueError("Genome must be a Dseqrecord object")
|
|
2503
2491
|
|
|
2504
2492
|
if not isinstance(inserts, list) or not all(
|
|
2505
|
-
isinstance(f,
|
|
2493
|
+
isinstance(f, Dseqrecord) for f in inserts
|
|
2506
2494
|
):
|
|
2507
2495
|
raise ValueError("Inserts must be a list of Dseqrecord objects")
|
|
2508
2496
|
|
|
@@ -2513,13 +2501,13 @@ def common_handle_insertion_fragments(
|
|
|
2513
2501
|
|
|
2514
2502
|
|
|
2515
2503
|
def common_function_excision_products(
|
|
2516
|
-
genome:
|
|
2517
|
-
) -> list[
|
|
2504
|
+
genome: Dseqrecord, limit: int | None, algorithm: Callable
|
|
2505
|
+
) -> list[Dseqrecord]:
|
|
2518
2506
|
"""Common function to avoid code duplication for excision products.
|
|
2519
2507
|
|
|
2520
2508
|
Parameters
|
|
2521
2509
|
----------
|
|
2522
|
-
genome :
|
|
2510
|
+
genome : Dseqrecord
|
|
2523
2511
|
Target genome sequence
|
|
2524
2512
|
limit : int or None
|
|
2525
2513
|
Minimum overlap length required, or None if not applicable
|
|
@@ -2528,7 +2516,7 @@ def common_function_excision_products(
|
|
|
2528
2516
|
|
|
2529
2517
|
Returns
|
|
2530
2518
|
-------
|
|
2531
|
-
list[
|
|
2519
|
+
list[Dseqrecord]
|
|
2532
2520
|
List of excised DNA molecules
|
|
2533
2521
|
"""
|
|
2534
2522
|
asm = SingleFragmentAssembly([genome], limit, algorithm)
|
|
@@ -2536,25 +2524,25 @@ def common_function_excision_products(
|
|
|
2536
2524
|
|
|
2537
2525
|
|
|
2538
2526
|
def homologous_recombination_integration(
|
|
2539
|
-
genome:
|
|
2540
|
-
inserts: list[
|
|
2527
|
+
genome: Dseqrecord,
|
|
2528
|
+
inserts: list[Dseqrecord],
|
|
2541
2529
|
limit: int = 40,
|
|
2542
|
-
) -> list[
|
|
2530
|
+
) -> list[Dseqrecord]:
|
|
2543
2531
|
"""Returns the products resulting from the integration of an insert (or inserts joined
|
|
2544
2532
|
through in vivo recombination) into the genome through homologous recombination.
|
|
2545
2533
|
|
|
2546
2534
|
Parameters
|
|
2547
2535
|
----------
|
|
2548
|
-
genome :
|
|
2536
|
+
genome : Dseqrecord
|
|
2549
2537
|
Target genome sequence
|
|
2550
|
-
inserts : list[
|
|
2538
|
+
inserts : list[Dseqrecord]
|
|
2551
2539
|
DNA fragment(s) to insert
|
|
2552
2540
|
limit : int, optional
|
|
2553
2541
|
Minimum homology length required, by default 40
|
|
2554
2542
|
|
|
2555
2543
|
Returns
|
|
2556
2544
|
-------
|
|
2557
|
-
list[
|
|
2545
|
+
list[Dseqrecord]
|
|
2558
2546
|
List of integrated DNA molecules
|
|
2559
2547
|
|
|
2560
2548
|
|
|
@@ -2590,21 +2578,21 @@ def homologous_recombination_integration(
|
|
|
2590
2578
|
|
|
2591
2579
|
|
|
2592
2580
|
def homologous_recombination_excision(
|
|
2593
|
-
genome:
|
|
2594
|
-
) -> list[
|
|
2581
|
+
genome: Dseqrecord, limit: int = 40
|
|
2582
|
+
) -> list[Dseqrecord]:
|
|
2595
2583
|
"""Returns the products resulting from the excision of a fragment from the genome through
|
|
2596
2584
|
homologous recombination.
|
|
2597
2585
|
|
|
2598
2586
|
Parameters
|
|
2599
2587
|
----------
|
|
2600
|
-
genome :
|
|
2588
|
+
genome : Dseqrecord
|
|
2601
2589
|
Target genome sequence
|
|
2602
2590
|
limit : int, optional
|
|
2603
2591
|
Minimum homology length required, by default 40
|
|
2604
2592
|
|
|
2605
2593
|
Returns
|
|
2606
2594
|
-------
|
|
2607
|
-
list[
|
|
2595
|
+
list[Dseqrecord]
|
|
2608
2596
|
List containing excised plasmid and remaining genome sequence
|
|
2609
2597
|
|
|
2610
2598
|
Examples
|
|
@@ -2627,8 +2615,8 @@ def homologous_recombination_excision(
|
|
|
2627
2615
|
|
|
2628
2616
|
|
|
2629
2617
|
def cre_lox_integration(
|
|
2630
|
-
genome:
|
|
2631
|
-
) -> list[
|
|
2618
|
+
genome: Dseqrecord, inserts: list[Dseqrecord]
|
|
2619
|
+
) -> list[Dseqrecord]:
|
|
2632
2620
|
"""Returns the products resulting from the integration of an insert (or inserts joined
|
|
2633
2621
|
through cre-lox recombination among them) into the genome through cre-lox integration.
|
|
2634
2622
|
|
|
@@ -2636,14 +2624,14 @@ def cre_lox_integration(
|
|
|
2636
2624
|
|
|
2637
2625
|
Parameters
|
|
2638
2626
|
----------
|
|
2639
|
-
genome :
|
|
2627
|
+
genome : Dseqrecord
|
|
2640
2628
|
Target genome sequence
|
|
2641
|
-
inserts : list[
|
|
2629
|
+
inserts : list[Dseqrecord] or Dseqrecord
|
|
2642
2630
|
DNA fragment(s) to insert
|
|
2643
2631
|
|
|
2644
2632
|
Returns
|
|
2645
2633
|
-------
|
|
2646
|
-
list[
|
|
2634
|
+
list[Dseqrecord]
|
|
2647
2635
|
List of integrated DNA molecules
|
|
2648
2636
|
|
|
2649
2637
|
Examples
|
|
@@ -2686,17 +2674,17 @@ def cre_lox_integration(
|
|
|
2686
2674
|
return _recast_sources(products, CreLoxRecombinationSource)
|
|
2687
2675
|
|
|
2688
2676
|
|
|
2689
|
-
def cre_lox_excision(genome:
|
|
2677
|
+
def cre_lox_excision(genome: Dseqrecord) -> list[Dseqrecord]:
|
|
2690
2678
|
"""Returns the products for CRE-lox excision.
|
|
2691
2679
|
|
|
2692
2680
|
Parameters
|
|
2693
2681
|
----------
|
|
2694
|
-
genome :
|
|
2682
|
+
genome : Dseqrecord
|
|
2695
2683
|
Target genome sequence
|
|
2696
2684
|
|
|
2697
2685
|
Returns
|
|
2698
2686
|
-------
|
|
2699
|
-
list[
|
|
2687
|
+
list[Dseqrecord]
|
|
2700
2688
|
List containing excised plasmid and remaining genome sequence
|
|
2701
2689
|
|
|
2702
2690
|
Examples
|
|
@@ -2738,28 +2726,28 @@ def cre_lox_excision(genome: _Dseqrecord) -> list[_Dseqrecord]:
|
|
|
2738
2726
|
|
|
2739
2727
|
|
|
2740
2728
|
def crispr_integration(
|
|
2741
|
-
genome:
|
|
2742
|
-
inserts: list[
|
|
2743
|
-
guides: list[
|
|
2729
|
+
genome: Dseqrecord,
|
|
2730
|
+
inserts: list[Dseqrecord],
|
|
2731
|
+
guides: list[Primer],
|
|
2744
2732
|
limit: int = 40,
|
|
2745
|
-
) -> list[
|
|
2733
|
+
) -> list[Dseqrecord]:
|
|
2746
2734
|
"""
|
|
2747
2735
|
Returns the products for CRISPR integration.
|
|
2748
2736
|
|
|
2749
2737
|
Parameters
|
|
2750
2738
|
----------
|
|
2751
|
-
genome :
|
|
2739
|
+
genome : Dseqrecord
|
|
2752
2740
|
Target genome sequence
|
|
2753
|
-
inserts : list[
|
|
2741
|
+
inserts : list[Dseqrecord]
|
|
2754
2742
|
DNA fragment(s) to insert
|
|
2755
|
-
guides : list[
|
|
2743
|
+
guides : list[Primer]
|
|
2756
2744
|
List of guide RNAs as Primer objects. This may change in the future.
|
|
2757
2745
|
limit : int, optional
|
|
2758
2746
|
Minimum overlap length required, by default 40
|
|
2759
2747
|
|
|
2760
2748
|
Returns
|
|
2761
2749
|
-------
|
|
2762
|
-
list[
|
|
2750
|
+
list[Dseqrecord]
|
|
2763
2751
|
List of integrated DNA molecules
|
|
2764
2752
|
|
|
2765
2753
|
Examples
|
|
@@ -2804,8 +2792,9 @@ def crispr_integration(
|
|
|
2804
2792
|
for i, product in enumerate(products):
|
|
2805
2793
|
# The second element of product.source.input is conventionally the insert/repair fragment
|
|
2806
2794
|
# The other two (first and third) are the two bits of the genome
|
|
2807
|
-
repair_start =
|
|
2808
|
-
|
|
2795
|
+
repair_start = location_boundaries(product.source.input[0].right_location)[0]
|
|
2796
|
+
# Here we do +1 because the position of the cut marks the boundary (e.g. 0:10, 10:20 if a cut is at pos 10)
|
|
2797
|
+
repair_end = location_boundaries(product.source.input[2].left_location)[1] + 1
|
|
2809
2798
|
repair_location = create_location(repair_start, repair_end, len(genome))
|
|
2810
2799
|
some_cuts_inside_repair = []
|
|
2811
2800
|
all_cuts_inside_repair = []
|
|
@@ -2836,22 +2825,22 @@ def crispr_integration(
|
|
|
2836
2825
|
|
|
2837
2826
|
|
|
2838
2827
|
def pcr_assembly(
|
|
2839
|
-
template:
|
|
2840
|
-
fwd_primer:
|
|
2841
|
-
rvs_primer:
|
|
2828
|
+
template: Dseqrecord,
|
|
2829
|
+
fwd_primer: Primer,
|
|
2830
|
+
rvs_primer: Primer,
|
|
2842
2831
|
add_primer_features: bool = False,
|
|
2843
2832
|
limit: int = 14,
|
|
2844
2833
|
mismatches: int = 0,
|
|
2845
|
-
) -> list[
|
|
2834
|
+
) -> list[Dseqrecord]:
|
|
2846
2835
|
"""Returns the products for PCR assembly.
|
|
2847
2836
|
|
|
2848
2837
|
Parameters
|
|
2849
2838
|
----------
|
|
2850
|
-
template :
|
|
2839
|
+
template : Dseqrecord
|
|
2851
2840
|
Template sequence
|
|
2852
|
-
fwd_primer :
|
|
2841
|
+
fwd_primer : Primer
|
|
2853
2842
|
Forward primer
|
|
2854
|
-
rvs_primer :
|
|
2843
|
+
rvs_primer : Primer
|
|
2855
2844
|
Reverse primer
|
|
2856
2845
|
add_primer_features : bool, optional
|
|
2857
2846
|
If True, add primer features to the product, by default False
|
|
@@ -2862,7 +2851,7 @@ def pcr_assembly(
|
|
|
2862
2851
|
|
|
2863
2852
|
Returns
|
|
2864
2853
|
-------
|
|
2865
|
-
list[
|
|
2854
|
+
list[Dseqrecord]
|
|
2866
2855
|
List of assembled DNA molecules
|
|
2867
2856
|
"""
|
|
2868
2857
|
|