pydna 5.5.3__py3-none-any.whl → 5.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +1 -1
- pydna/assembly2.py +415 -159
- pydna/dseqrecord.py +50 -2
- pydna/opencloning_models.py +553 -0
- pydna/types.py +5 -2
- {pydna-5.5.3.dist-info → pydna-5.5.4.dist-info}/METADATA +8 -40
- {pydna-5.5.3.dist-info → pydna-5.5.4.dist-info}/RECORD +9 -8
- {pydna-5.5.3.dist-info → pydna-5.5.4.dist-info}/WHEEL +1 -1
- {pydna-5.5.3.dist-info → pydna-5.5.4.dist-info/licenses}/LICENSE.txt +0 -0
pydna/assembly2.py
CHANGED
|
@@ -39,9 +39,26 @@ from pydna.types import (
|
|
|
39
39
|
from pydna.gateway import gateway_overlap, find_gateway_sites
|
|
40
40
|
from pydna.cre_lox import cre_loxP_overlap
|
|
41
41
|
|
|
42
|
-
from typing import TYPE_CHECKING, Callable
|
|
42
|
+
from typing import TYPE_CHECKING, Callable, Literal
|
|
43
|
+
from pydna.opencloning_models import (
|
|
44
|
+
AssemblySource,
|
|
45
|
+
RestrictionAndLigationSource,
|
|
46
|
+
GibsonAssemblySource,
|
|
47
|
+
InFusionSource,
|
|
48
|
+
OverlapExtensionPCRLigationSource,
|
|
49
|
+
InVivoAssemblySource,
|
|
50
|
+
LigationSource,
|
|
51
|
+
GatewaySource,
|
|
52
|
+
HomologousRecombinationSource,
|
|
53
|
+
CreLoxRecombinationSource,
|
|
54
|
+
PCRSource,
|
|
55
|
+
SourceInput,
|
|
56
|
+
CRISPRSource,
|
|
57
|
+
)
|
|
58
|
+
from pydna.crispr import cas9
|
|
59
|
+
import warnings
|
|
43
60
|
|
|
44
|
-
if TYPE_CHECKING:
|
|
61
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
45
62
|
from Bio.Restriction import AbstractCut as _AbstractCut
|
|
46
63
|
|
|
47
64
|
|
|
@@ -80,15 +97,22 @@ def ends_from_cutsite(
|
|
|
80
97
|
) -> tuple[tuple[str, str], tuple[str, str]]:
|
|
81
98
|
"""Get the sticky or blunt ends created by a restriction enzyme cut.
|
|
82
99
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
cutsite : CutSiteType
|
|
103
|
+
A tuple ((cut_watson, ovhg), enzyme) describing where the cut occurs
|
|
104
|
+
seq : _Dseq
|
|
105
|
+
The DNA sequence being cut
|
|
86
106
|
|
|
87
|
-
Raises
|
|
88
|
-
|
|
107
|
+
Raises
|
|
108
|
+
------
|
|
109
|
+
ValueError
|
|
110
|
+
If cutsite is None
|
|
89
111
|
|
|
90
|
-
Returns
|
|
91
|
-
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
tuple[tuple[str, str], tuple[str, str]]
|
|
115
|
+
A tuple of two tuples, each containing the type of end ('5\'', '3\'', or 'blunt')
|
|
92
116
|
and the sequence of the overhang. The first tuple is for the left end, second for the right end.
|
|
93
117
|
|
|
94
118
|
>>> from Bio.Restriction import NotI
|
|
@@ -129,14 +153,23 @@ def restriction_ligation_overlap(
|
|
|
129
153
|
|
|
130
154
|
Like in sticky and gibson, the order matters (see example below of partial overlap)
|
|
131
155
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
seqx : _Dseqrecord
|
|
159
|
+
The first sequence
|
|
160
|
+
seqy : _Dseqrecord
|
|
161
|
+
The second sequence
|
|
162
|
+
enzymes : RestrictionBatch
|
|
163
|
+
The enzymes to use
|
|
164
|
+
partial : bool
|
|
165
|
+
Whether to allow partial overlaps
|
|
166
|
+
allow_blunt : bool
|
|
167
|
+
Whether to allow blunt ends
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
list[SequenceOverlap]
|
|
172
|
+
A list of overlaps between the two sequences
|
|
140
173
|
|
|
141
174
|
>>> from pydna.dseqrecord import Dseqrecord
|
|
142
175
|
>>> from pydna.assembly2 import restriction_ligation_overlap
|
|
@@ -230,13 +263,19 @@ def blunt_overlap(
|
|
|
230
263
|
It basically returns [(len(seqx), 0, 0)] if the right end of seqx is blunt and the
|
|
231
264
|
left end of seqy is blunt (compatible with blunt ligation). Otherwise, it returns an empty list.
|
|
232
265
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
266
|
+
Parameters
|
|
267
|
+
----------
|
|
268
|
+
seqx : _Dseqrecord
|
|
269
|
+
The first sequence
|
|
270
|
+
seqy : _Dseqrecord
|
|
271
|
+
The second sequence
|
|
272
|
+
limit : int
|
|
273
|
+
There for compatibility, but it is ignored
|
|
237
274
|
|
|
238
|
-
Returns
|
|
239
|
-
|
|
275
|
+
Returns
|
|
276
|
+
-------
|
|
277
|
+
list[SequenceOverlap]
|
|
278
|
+
A list of overlaps between the two sequences
|
|
240
279
|
|
|
241
280
|
>>> from pydna.assembly2 import blunt_overlap
|
|
242
281
|
>>> from pydna.dseqrecord import Dseqrecord
|
|
@@ -322,25 +361,31 @@ def gibson_overlap(seqx: _Dseqrecord, seqy: _Dseqrecord, limit=25):
|
|
|
322
361
|
Assembly algorithm to find terminal overlaps (e.g. for Gibson assembly).
|
|
323
362
|
The order matters, we want alignments like:
|
|
324
363
|
|
|
325
|
-
|
|
326
|
-
seqx: oooo------xxxx
|
|
327
|
-
seqy: xxxx------oooo
|
|
328
|
-
Product: oooo------xxxx------oooo
|
|
364
|
+
::
|
|
329
365
|
|
|
330
|
-
|
|
366
|
+
seqx: oooo------xxxx
|
|
367
|
+
seqy: xxxx------oooo
|
|
368
|
+
Product: oooo------xxxx------oooo
|
|
331
369
|
|
|
332
|
-
|
|
333
|
-
seqy: xxxx------oooo
|
|
334
|
-
Product (unwanted): oooo
|
|
335
|
-
```
|
|
370
|
+
Not like:
|
|
336
371
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
limit (int): Minimum length of the overlap
|
|
372
|
+
seqx: oooo------xxxx
|
|
373
|
+
seqy: xxxx------oooo
|
|
374
|
+
Product (unwanted): oooo
|
|
341
375
|
|
|
342
|
-
|
|
343
|
-
|
|
376
|
+
Parameters
|
|
377
|
+
----------
|
|
378
|
+
seqx : _Dseqrecord
|
|
379
|
+
The first sequence
|
|
380
|
+
seqy : _Dseqrecord
|
|
381
|
+
The second sequence
|
|
382
|
+
limit : int
|
|
383
|
+
Minimum length of the overlap
|
|
384
|
+
|
|
385
|
+
Returns
|
|
386
|
+
-------
|
|
387
|
+
list[SequenceOverlap]
|
|
388
|
+
A list of overlaps between the two sequences
|
|
344
389
|
|
|
345
390
|
>>> from pydna.dseqrecord import Dseqrecord
|
|
346
391
|
>>> from pydna.assembly2 import gibson_overlap
|
|
@@ -384,13 +429,19 @@ def sticky_end_sub_strings(seqx: _Dseqrecord, seqy: _Dseqrecord, limit: bool = F
|
|
|
384
429
|
For now, if limit 0 / False (default) only full overlaps are considered.
|
|
385
430
|
Otherwise, partial overlaps are also returned.
|
|
386
431
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
432
|
+
Parameters
|
|
433
|
+
----------
|
|
434
|
+
seqx : _Dseqrecord
|
|
435
|
+
The first sequence
|
|
436
|
+
seqy : _Dseqrecord
|
|
437
|
+
The second sequence
|
|
438
|
+
limit : bool
|
|
439
|
+
Whether to allow partial overlaps
|
|
391
440
|
|
|
392
|
-
Returns
|
|
393
|
-
|
|
441
|
+
Returns
|
|
442
|
+
-------
|
|
443
|
+
list[SequenceOverlap]
|
|
444
|
+
A list of overlaps between the two sequences
|
|
394
445
|
|
|
395
446
|
|
|
396
447
|
Ligation of fully overlapping sticky ends, note how the order matters
|
|
@@ -520,14 +571,21 @@ def primer_template_overlap(
|
|
|
520
571
|
If seqx is a template and seqy is a primer, it represents the binding of a reverse primer,
|
|
521
572
|
where the primer has been passed as its reverse complement (see examples).
|
|
522
573
|
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
574
|
+
Parameters
|
|
575
|
+
----------
|
|
576
|
+
seqx : _Dseqrecord | _Primer
|
|
577
|
+
The primer
|
|
578
|
+
seqy : _Dseqrecord | _Primer
|
|
579
|
+
The template
|
|
580
|
+
limit : int
|
|
581
|
+
Minimum length of the overlap
|
|
582
|
+
mismatches : int
|
|
583
|
+
Maximum number of mismatches (only substitutions, no deletion or insertion)
|
|
528
584
|
|
|
529
|
-
Returns
|
|
530
|
-
|
|
585
|
+
Returns
|
|
586
|
+
-------
|
|
587
|
+
list[SequenceOverlap]
|
|
588
|
+
A list of overlaps between the primer and the template
|
|
531
589
|
|
|
532
590
|
>>> from pydna.dseqrecord import Dseqrecord
|
|
533
591
|
>>> from pydna.primer import Primer
|
|
@@ -537,7 +595,7 @@ def primer_template_overlap(
|
|
|
537
595
|
>>> primer_template_overlap(primer, template, limit=8, mismatches=0)
|
|
538
596
|
[(0, 2, 8)]
|
|
539
597
|
|
|
540
|
-
This actually represents the binding of the primer
|
|
598
|
+
This actually represents the binding of the primer ``GCTGCTAA`` (reverse complement)
|
|
541
599
|
>>> primer_template_overlap(template, primer, limit=8, mismatches=0)
|
|
542
600
|
[(2, 0, 8)]
|
|
543
601
|
>>> primer_template_overlap(primer, template.reverse_complement(), limit=8, mismatches=0)
|
|
@@ -702,7 +760,7 @@ def assembly2str(assembly: EdgeRepresentationAssembly) -> str:
|
|
|
702
760
|
('1[8:14]:2[1:7]', '2[10:17]:3[1:8]')
|
|
703
761
|
|
|
704
762
|
The reason for this is that by default, a feature '[8:14]' when present in a tuple
|
|
705
|
-
is printed to the console as
|
|
763
|
+
is printed to the console as ``SimpleLocation(ExactPosition(8), ExactPosition(14), strand=1)`` (very long).
|
|
706
764
|
"""
|
|
707
765
|
return str(tuple(f"{u}{lu}:{v}{lv}" for u, v, lu, lv in assembly))
|
|
708
766
|
|
|
@@ -791,7 +849,7 @@ def assemble(
|
|
|
791
849
|
out_dseqrecord = _Dseqrecord(subfragments[0])
|
|
792
850
|
|
|
793
851
|
for fragment, overlap in zip(subfragments[1:], fragment_overlaps):
|
|
794
|
-
# Shift the features of the right fragment to the left by
|
|
852
|
+
# Shift the features of the right fragment to the left by ``overlap``
|
|
795
853
|
new_features = [
|
|
796
854
|
f._shift(len(out_dseqrecord) - overlap) for f in fragment.features
|
|
797
855
|
]
|
|
@@ -808,22 +866,25 @@ def assemble(
|
|
|
808
866
|
|
|
809
867
|
# Special case for blunt circularisation
|
|
810
868
|
if overlap == 0:
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
869
|
+
out_dseqrecord = out_dseqrecord.looped()
|
|
870
|
+
else:
|
|
871
|
+
# Remove trailing overlap
|
|
872
|
+
out_dseqrecord = _Dseqrecord(
|
|
873
|
+
fill_dseq(out_dseqrecord.seq)[:-overlap],
|
|
874
|
+
features=out_dseqrecord.features,
|
|
875
|
+
circular=True,
|
|
876
|
+
)
|
|
877
|
+
for feature in out_dseqrecord.features:
|
|
878
|
+
start, end = _location_boundaries(feature.location)
|
|
879
|
+
if start >= len(out_dseqrecord) or end > len(out_dseqrecord):
|
|
880
|
+
# Wrap around the origin
|
|
881
|
+
feature.location = _shift_location(
|
|
882
|
+
feature.location, 0, len(out_dseqrecord)
|
|
883
|
+
)
|
|
884
|
+
|
|
885
|
+
out_dseqrecord.source = AssemblySource.from_subfragment_representation(
|
|
886
|
+
subfragment_representation, fragments, is_circular
|
|
887
|
+
)
|
|
827
888
|
return out_dseqrecord
|
|
828
889
|
|
|
829
890
|
|
|
@@ -916,30 +977,29 @@ def get_assembly_subfragments(
|
|
|
916
977
|
|
|
917
978
|
Subfragments are the slices of the fragments that are joined together
|
|
918
979
|
|
|
919
|
-
For example
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
To reproduce
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
Subfragments: `cccccgtatcgtgt`, `atcgtgtactgtcatattc`
|
|
980
|
+
For example::
|
|
981
|
+
|
|
982
|
+
--A--
|
|
983
|
+
TACGTAAT
|
|
984
|
+
--B--
|
|
985
|
+
TCGTAACGA
|
|
986
|
+
|
|
987
|
+
Gives: TACGTAA / CGTAACGA
|
|
988
|
+
|
|
989
|
+
To reproduce::
|
|
990
|
+
|
|
991
|
+
a = Dseqrecord('TACGTAAT')
|
|
992
|
+
b = Dseqrecord('TCGTAACGA')
|
|
993
|
+
f = Assembly([a, b], limit=5)
|
|
994
|
+
a0 = f.get_linear_assemblies()[0]
|
|
995
|
+
print(assembly2str(a0))
|
|
996
|
+
a0_subfragment_rep =edge_representation2subfragment_representation(a0, False)
|
|
997
|
+
for f in get_assembly_subfragments([a, b], a0_subfragment_rep):
|
|
998
|
+
print(f.seq)
|
|
999
|
+
|
|
1000
|
+
# prints TACGTAA and CGTAACGA
|
|
1001
|
+
|
|
1002
|
+
Subfragments: ``cccccgtatcgtgt``, ``atcgtgtactgtcatattc``
|
|
943
1003
|
"""
|
|
944
1004
|
subfragments = list()
|
|
945
1005
|
for node, start_location, end_location in subfragment_representation:
|
|
@@ -1028,33 +1088,38 @@ class Assembly:
|
|
|
1028
1088
|
|
|
1029
1089
|
The assembly contains a directed graph, where nodes represent fragments and
|
|
1030
1090
|
edges represent overlaps between fragments. :
|
|
1091
|
+
|
|
1031
1092
|
- The node keys are integers, representing the index of the fragment in the
|
|
1032
|
-
|
|
1033
|
-
|
|
1093
|
+
input list of fragments. The sign of the node key represents the orientation
|
|
1094
|
+
of the fragment, positive for forward orientation, negative for reverse orientation.
|
|
1034
1095
|
- The edges contain the locations of the overlaps in the fragments. For an edge (u, v, key):
|
|
1035
1096
|
- u and v are the nodes connected by the edge.
|
|
1036
1097
|
- key is a string that represents the location of the overlap. In the format:
|
|
1037
|
-
|
|
1098
|
+
'u[start:end](strand):v[start:end](strand)'.
|
|
1038
1099
|
- Edges have a 'locations' attribute, which is a list of two FeatureLocation objects,
|
|
1039
|
-
|
|
1100
|
+
representing the location of the overlap in the u and v fragment, respectively.
|
|
1040
1101
|
- You can think of an edge as a representation of the join of two fragments.
|
|
1041
1102
|
|
|
1042
1103
|
If fragment 1 and 2 share a subsequence of 6bp, [8:14] in fragment 1 and [1:7] in fragment 2,
|
|
1043
1104
|
there will be 4 edges representing that overlap in the graph, for all possible
|
|
1044
1105
|
orientations of the fragments (see add_edges_from_match for details):
|
|
1045
|
-
|
|
1046
|
-
-
|
|
1047
|
-
-
|
|
1048
|
-
-
|
|
1106
|
+
|
|
1107
|
+
- ``(1, 2, '1[8:14]:2[1:7]')``
|
|
1108
|
+
- ``(2, 1, '2[1:7]:1[8:14]')``
|
|
1109
|
+
- ``(-1, -2, '-1[0:6]:-2[10:16]')``
|
|
1110
|
+
- ``(-2, -1, '-2[10:16]:-1[0:6]')``
|
|
1049
1111
|
|
|
1050
1112
|
An assembly can be thought of as a tuple of graph edges, but instead of representing them with node indexes and keys, we represent them
|
|
1051
1113
|
as u, v, locu, locv, where u and v are the nodes connected by the edge, and locu and locv are the locations of the overlap in the first
|
|
1052
1114
|
and second fragment. Assemblies are then represented as:
|
|
1115
|
+
|
|
1053
1116
|
- Linear: ((1, 2, [8:14], [1:7]), (2, 3, [10:17], [1:8]))
|
|
1054
1117
|
- Circular: ((1, 2, [8:14], [1:7]), (2, 3, [10:17], [1:8]), (3, 1, [12:17], [1:6]))
|
|
1118
|
+
|
|
1055
1119
|
Note that the first and last fragment are the same in a circular assembly.
|
|
1056
1120
|
|
|
1057
1121
|
The following constrains are applied to remove duplicate assemblies:
|
|
1122
|
+
|
|
1058
1123
|
- Circular assemblies: the first subfragment is not reversed, and has the smallest index in the input fragment list.
|
|
1059
1124
|
use_fragment_order is ignored.
|
|
1060
1125
|
- Linear assemblies:
|
|
@@ -1065,7 +1130,7 @@ class Assembly:
|
|
|
1065
1130
|
frags : list
|
|
1066
1131
|
A list of Dseqrecord objects.
|
|
1067
1132
|
limit : int, optional
|
|
1068
|
-
The shortest shared homology to be considered, this is passed as the third argument to the
|
|
1133
|
+
The shortest shared homology to be considered, this is passed as the third argument to the ``algorithm`` function.
|
|
1069
1134
|
For certain algorithms, this might be ignored.
|
|
1070
1135
|
algorithm : function, optional
|
|
1071
1136
|
The algorithm used to determine the shared sequences. It's a function that takes two Dseqrecord objects as inputs,
|
|
@@ -1232,11 +1297,12 @@ class Assembly:
|
|
|
1232
1297
|
first: _Dseqrecord,
|
|
1233
1298
|
secnd: _Dseqrecord,
|
|
1234
1299
|
):
|
|
1235
|
-
"""Add edges to the graph from a match returned by the
|
|
1300
|
+
"""Add edges to the graph from a match returned by the ``algorithm`` function (see pydna.common_substrings). For
|
|
1236
1301
|
format of edges (see documentation of the Assembly class).
|
|
1237
1302
|
|
|
1238
|
-
Matches are directional, because not all
|
|
1303
|
+
Matches are directional, because not all ``algorithm`` functions return the same match for (u,v) and (v,u). For example,
|
|
1239
1304
|
homologous recombination does but sticky end ligation does not. The function returns two edges:
|
|
1305
|
+
|
|
1240
1306
|
- Fragments in the orientation they were passed, with locations of the match (u, v, loc_u, loc_v)
|
|
1241
1307
|
- Reverse complement of the fragments with inverted order, with flipped locations (-v, -u, flip(loc_v), flip(loc_u))/
|
|
1242
1308
|
|
|
@@ -1446,17 +1512,18 @@ class Assembly:
|
|
|
1446
1512
|
Here we check if one of the joins between fragments represents the edges of an insertion assembly
|
|
1447
1513
|
The fragment must be linear, and the join must be as indicated below
|
|
1448
1514
|
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1515
|
+
::
|
|
1516
|
+
|
|
1517
|
+
-------- ------- Fragment 1
|
|
1518
|
+
|| ||
|
|
1519
|
+
xxxxxxxx || Fragment 2
|
|
1520
|
+
|| ||
|
|
1521
|
+
oooooooooo Fragment 3
|
|
1522
|
+
|
|
1456
1523
|
The above example will be [(1, 2, [4:6], [0:2]), (2, 3, [6:8], [0:2]), (3, 1, [8:10], [9:11)])]
|
|
1457
1524
|
|
|
1458
1525
|
These could be returned in any order by simple_cycles, so we sort the edges so that the first
|
|
1459
|
-
and last
|
|
1526
|
+
and last ``u`` and ``v`` match the fragment that gets the insertion (1 in the example above).
|
|
1460
1527
|
"""
|
|
1461
1528
|
edge_pair_index = list()
|
|
1462
1529
|
|
|
@@ -1637,8 +1704,8 @@ class Assembly:
|
|
|
1637
1704
|
|
|
1638
1705
|
def get_locations_on_fragments(self) -> dict[int, dict[str, list[Location]]]:
|
|
1639
1706
|
"""Get a dictionary where the keys are the nodes in the graph, and the values are dictionaries with keys
|
|
1640
|
-
|
|
1641
|
-
and right side. The values in
|
|
1707
|
+
``left``, ``right``, containing (for each fragment) the locations where the fragment is joined to another fragment on its left
|
|
1708
|
+
and right side. The values in ``left`` and ``right`` are often the same, except in restriction-ligation with partial overlap enabled,
|
|
1642
1709
|
where we can end up with a situation like this:
|
|
1643
1710
|
|
|
1644
1711
|
GGTCTCCCCAATT and aGGTCTCCAACCAA as fragments
|
|
@@ -1651,13 +1718,14 @@ class Assembly:
|
|
|
1651
1718
|
aGGTCTCCxxCCAATT
|
|
1652
1719
|
tCCAGAGGTTGGxxAA
|
|
1653
1720
|
|
|
1654
|
-
Would return
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1721
|
+
Would return::
|
|
1722
|
+
|
|
1723
|
+
{
|
|
1724
|
+
1: {'left': [7:9], 'right': [9:11]},
|
|
1725
|
+
2: {'left': [8:10], 'right': [10:12]},
|
|
1726
|
+
-1: {'left': [2:4], 'right': [4:6]},
|
|
1727
|
+
-2: {'left': [2:4], 'right': [4:6]}
|
|
1728
|
+
}
|
|
1661
1729
|
|
|
1662
1730
|
"""
|
|
1663
1731
|
|
|
@@ -1686,10 +1754,10 @@ class Assembly:
|
|
|
1686
1754
|
and prevent including partially digested fragments. For example, imagine the following fragment being an input for a digestion
|
|
1687
1755
|
and ligation assembly, where the enzyme cuts at the sites indicated by the vertical lines:
|
|
1688
1756
|
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1757
|
+
::
|
|
1758
|
+
|
|
1759
|
+
x y z
|
|
1760
|
+
-------|-------|-------|---------
|
|
1693
1761
|
|
|
1694
1762
|
We would only want assemblies that contain subfragments start-x, x-y, y-z, z-end, and not start-x, y-end, for instance.
|
|
1695
1763
|
The latter would indicate that the fragment was partially digested.
|
|
@@ -1750,8 +1818,8 @@ class Assembly:
|
|
|
1750
1818
|
|
|
1751
1819
|
class PCRAssembly(Assembly):
|
|
1752
1820
|
"""
|
|
1753
|
-
An assembly that represents a PCR, where
|
|
1754
|
-
It always uses the
|
|
1821
|
+
An assembly that represents a PCR, where ``fragments`` is a list of primer, template, primer (in that order).
|
|
1822
|
+
It always uses the ``primer_template_overlap`` algorithm and accepts the ``mismatches`` argument to indicate
|
|
1755
1823
|
the number of mismatches allowed in the overlap. Only supports substitution mismatches, not indels.
|
|
1756
1824
|
"""
|
|
1757
1825
|
|
|
@@ -1959,6 +2027,21 @@ def common_function_assembly_products(
|
|
|
1959
2027
|
return [assemble(frags, a) for a in output_assemblies]
|
|
1960
2028
|
|
|
1961
2029
|
|
|
2030
|
+
def _recast_sources(
|
|
2031
|
+
products: list[_Dseqrecord], source_cls, **extra_fields
|
|
2032
|
+
) -> list[_Dseqrecord]:
|
|
2033
|
+
"""Recast the `source` of each product to `source_cls` with optional extras.
|
|
2034
|
+
|
|
2035
|
+
This avoids repeating the same for-loop across many assembly functions.
|
|
2036
|
+
"""
|
|
2037
|
+
for prod in products:
|
|
2038
|
+
prod.source = source_cls(
|
|
2039
|
+
**prod.source.model_dump(),
|
|
2040
|
+
**extra_fields,
|
|
2041
|
+
)
|
|
2042
|
+
return products
|
|
2043
|
+
|
|
2044
|
+
|
|
1962
2045
|
def gibson_assembly(
|
|
1963
2046
|
frags: list[_Dseqrecord], limit: int = 25, circular_only: bool = False
|
|
1964
2047
|
) -> list[_Dseqrecord]:
|
|
@@ -1978,9 +2061,11 @@ def gibson_assembly(
|
|
|
1978
2061
|
list[_Dseqrecord]
|
|
1979
2062
|
List of assembled DNA molecules
|
|
1980
2063
|
"""
|
|
1981
|
-
|
|
2064
|
+
|
|
2065
|
+
products = common_function_assembly_products(
|
|
1982
2066
|
frags, limit, gibson_overlap, circular_only
|
|
1983
2067
|
)
|
|
2068
|
+
return _recast_sources(products, GibsonAssemblySource)
|
|
1984
2069
|
|
|
1985
2070
|
|
|
1986
2071
|
def in_fusion_assembly(
|
|
@@ -2003,7 +2088,9 @@ def in_fusion_assembly(
|
|
|
2003
2088
|
list[_Dseqrecord]
|
|
2004
2089
|
List of assembled DNA molecules
|
|
2005
2090
|
"""
|
|
2006
|
-
|
|
2091
|
+
|
|
2092
|
+
products = gibson_assembly(frags, limit)
|
|
2093
|
+
return _recast_sources(products, InFusionSource)
|
|
2007
2094
|
|
|
2008
2095
|
|
|
2009
2096
|
def fusion_pcr_assembly(
|
|
@@ -2026,7 +2113,8 @@ def fusion_pcr_assembly(
|
|
|
2026
2113
|
list[_Dseqrecord]
|
|
2027
2114
|
List of assembled DNA molecules
|
|
2028
2115
|
"""
|
|
2029
|
-
|
|
2116
|
+
products = gibson_assembly(frags, limit)
|
|
2117
|
+
return _recast_sources(products, OverlapExtensionPCRLigationSource)
|
|
2030
2118
|
|
|
2031
2119
|
|
|
2032
2120
|
def in_vivo_assembly(
|
|
@@ -2048,9 +2136,10 @@ def in_vivo_assembly(
|
|
|
2048
2136
|
list[_Dseqrecord]
|
|
2049
2137
|
List of assembled DNA molecules
|
|
2050
2138
|
"""
|
|
2051
|
-
|
|
2139
|
+
products = common_function_assembly_products(
|
|
2052
2140
|
frags, limit, common_sub_strings, circular_only
|
|
2053
2141
|
)
|
|
2142
|
+
return _recast_sources(products, InVivoAssemblySource)
|
|
2054
2143
|
|
|
2055
2144
|
|
|
2056
2145
|
def restriction_ligation_assembly(
|
|
@@ -2060,9 +2149,10 @@ def restriction_ligation_assembly(
|
|
|
2060
2149
|
circular_only: bool = False,
|
|
2061
2150
|
) -> list[_Dseqrecord]:
|
|
2062
2151
|
"""Returns the products for restriction ligation assembly:
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2152
|
+
|
|
2153
|
+
- Finds cutsites in the fragments
|
|
2154
|
+
- Finds all products that could be assembled by ligating the fragments based on those cutsites
|
|
2155
|
+
- Will NOT return products that combine an existing end with an end generated by the same enzyme (see example below)
|
|
2066
2156
|
|
|
2067
2157
|
Parameters
|
|
2068
2158
|
----------
|
|
@@ -2083,9 +2173,9 @@ def restriction_ligation_assembly(
|
|
|
2083
2173
|
Examples
|
|
2084
2174
|
--------
|
|
2085
2175
|
In the example below, we plan to assemble a plasmid from a backbone and an insert, using the EcoRI and SalI enzymes.
|
|
2086
|
-
Note how 2 circular products are returned, one contains the insert (
|
|
2087
|
-
and the desired part of the backbone (
|
|
2088
|
-
reversed insert (
|
|
2176
|
+
Note how 2 circular products are returned, one contains the insert (``acgt``)
|
|
2177
|
+
and the desired part of the backbone (``cccccc``), the other contains the
|
|
2178
|
+
reversed insert (``tgga``) and the cut-out part of the backbone (``aaa``).
|
|
2089
2179
|
|
|
2090
2180
|
>>> from pydna.assembly2 import restriction_ligation_assembly
|
|
2091
2181
|
>>> from pydna.dseqrecord import Dseqrecord
|
|
@@ -2119,11 +2209,16 @@ def restriction_ligation_assembly(
|
|
|
2119
2209
|
TTAAGtttC
|
|
2120
2210
|
"""
|
|
2121
2211
|
|
|
2122
|
-
def
|
|
2212
|
+
def algorithm_fn(x, y, _l):
|
|
2123
2213
|
# By default, we allow blunt ends
|
|
2124
2214
|
return restriction_ligation_overlap(x, y, enzymes, False, allow_blunt)
|
|
2125
2215
|
|
|
2126
|
-
|
|
2216
|
+
products = common_function_assembly_products(
|
|
2217
|
+
frags, None, algorithm_fn, circular_only
|
|
2218
|
+
)
|
|
2219
|
+
return _recast_sources(
|
|
2220
|
+
products, RestrictionAndLigationSource, restriction_enzymes=enzymes
|
|
2221
|
+
)
|
|
2127
2222
|
|
|
2128
2223
|
|
|
2129
2224
|
def golden_gate_assembly(
|
|
@@ -2134,7 +2229,7 @@ def golden_gate_assembly(
|
|
|
2134
2229
|
) -> list[_Dseqrecord]:
|
|
2135
2230
|
"""Returns the products for Golden Gate assembly. This is the same as
|
|
2136
2231
|
restriction ligation assembly, but with a different name. Check the documentation
|
|
2137
|
-
for
|
|
2232
|
+
for ``restriction_ligation_assembly`` for more details.
|
|
2138
2233
|
|
|
2139
2234
|
Parameters
|
|
2140
2235
|
----------
|
|
@@ -2154,7 +2249,7 @@ def golden_gate_assembly(
|
|
|
2154
2249
|
|
|
2155
2250
|
Examples
|
|
2156
2251
|
--------
|
|
2157
|
-
See the example for
|
|
2252
|
+
See the example for ``restriction_ligation_assembly``.
|
|
2158
2253
|
"""
|
|
2159
2254
|
return restriction_ligation_assembly(frags, enzymes, allow_blunt, circular_only)
|
|
2160
2255
|
|
|
@@ -2168,7 +2263,7 @@ def ligation_assembly(
|
|
|
2168
2263
|
"""Returns the products for ligation assembly, as inputs pass the fragments (digested if needed) that
|
|
2169
2264
|
will be ligated.
|
|
2170
2265
|
|
|
2171
|
-
For most cases, you probably should use
|
|
2266
|
+
For most cases, you probably should use ``restriction_ligation_assembly`` instead.
|
|
2172
2267
|
|
|
2173
2268
|
Parameters
|
|
2174
2269
|
----------
|
|
@@ -2215,11 +2310,14 @@ def ligation_assembly(
|
|
|
2215
2310
|
return sticky_end_sub_strings(x, y, allow_partial_overlap)
|
|
2216
2311
|
|
|
2217
2312
|
if allow_blunt:
|
|
2218
|
-
|
|
2313
|
+
algorithm_fn = combine_algorithms(sticky_end_algorithm, blunt_overlap)
|
|
2219
2314
|
else:
|
|
2220
|
-
|
|
2315
|
+
algorithm_fn = sticky_end_algorithm
|
|
2221
2316
|
|
|
2222
|
-
|
|
2317
|
+
products = common_function_assembly_products(
|
|
2318
|
+
frags, None, algorithm_fn, circular_only
|
|
2319
|
+
)
|
|
2320
|
+
return _recast_sources(products, LigationSource)
|
|
2223
2321
|
|
|
2224
2322
|
|
|
2225
2323
|
def assembly_is_multi_site(asm: list[EdgeRepresentationAssembly]) -> bool:
|
|
@@ -2236,7 +2334,7 @@ def assembly_is_multi_site(asm: list[EdgeRepresentationAssembly]) -> bool:
|
|
|
2236
2334
|
|
|
2237
2335
|
def gateway_assembly(
|
|
2238
2336
|
frags: list[_Dseqrecord],
|
|
2239
|
-
reaction_type:
|
|
2337
|
+
reaction_type: Literal["BP", "LR"],
|
|
2240
2338
|
greedy: bool = False,
|
|
2241
2339
|
circular_only: bool = False,
|
|
2242
2340
|
multi_site_only: bool = False,
|
|
@@ -2247,8 +2345,8 @@ def gateway_assembly(
|
|
|
2247
2345
|
----------
|
|
2248
2346
|
frags : list[_Dseqrecord]
|
|
2249
2347
|
List of DNA fragments to assemble
|
|
2250
|
-
reaction_type :
|
|
2251
|
-
Type of Gateway reaction
|
|
2348
|
+
reaction_type : Literal['BP', 'LR']
|
|
2349
|
+
Type of Gateway reaction
|
|
2252
2350
|
greedy : bool, optional
|
|
2253
2351
|
If True, use greedy gateway consensus sites, by default False
|
|
2254
2352
|
circular_only : bool, optional
|
|
@@ -2288,9 +2386,9 @@ def gateway_assembly(
|
|
|
2288
2386
|
>>> len(products_LR)
|
|
2289
2387
|
2
|
|
2290
2388
|
|
|
2291
|
-
Now let's understand the
|
|
2389
|
+
Now let's understand the ``multi_site_only`` parameter. Let's consider a case where we are swapping fragments
|
|
2292
2390
|
between two plasmids using an LR reaction. Experimentally, we expect to obtain two plasmids, resulting from the
|
|
2293
|
-
swapping between the two att sites. That's what we get if we set
|
|
2391
|
+
swapping between the two att sites. That's what we get if we set ``multi_site_only`` to True.
|
|
2294
2392
|
|
|
2295
2393
|
>>> attL2 = 'aaataatgattttattttgactgatagtgacctgttcgttgcaacaaattgataagcaatgctttcttataatgccaactttgtacaagaaagctg'
|
|
2296
2394
|
>>> attR2 = 'accactttgtacaagaaagctgaacgagaaacgtaaaatgatataaatatcaatatattaaattagattttgcataaaaaacagactacataatactgtaaaacacaacatatccagtcactatg'
|
|
@@ -2300,7 +2398,7 @@ def gateway_assembly(
|
|
|
2300
2398
|
>>> len(products)
|
|
2301
2399
|
2
|
|
2302
2400
|
|
|
2303
|
-
However, if we set
|
|
2401
|
+
However, if we set ``multi_site_only`` to False, we get 4 products, which also include the intermediate products
|
|
2304
2402
|
where the two plasmids are combined into a single one through recombination of a single att site. This is an
|
|
2305
2403
|
intermediate of the reaction, and typically we don't want it:
|
|
2306
2404
|
|
|
@@ -2316,13 +2414,19 @@ def gateway_assembly(
|
|
|
2316
2414
|
f"Invalid reaction type: {reaction_type}, can only be BP or LR"
|
|
2317
2415
|
)
|
|
2318
2416
|
|
|
2319
|
-
def
|
|
2417
|
+
def algorithm_fn(x, y, _l):
|
|
2320
2418
|
return gateway_overlap(x, y, reaction_type, greedy)
|
|
2321
2419
|
|
|
2322
2420
|
filter_results_function = None if not multi_site_only else assembly_is_multi_site
|
|
2323
2421
|
|
|
2324
2422
|
products = common_function_assembly_products(
|
|
2325
|
-
frags, None,
|
|
2423
|
+
frags, None, algorithm_fn, circular_only, filter_results_function
|
|
2424
|
+
)
|
|
2425
|
+
products = _recast_sources(
|
|
2426
|
+
products,
|
|
2427
|
+
GatewaySource,
|
|
2428
|
+
reaction_type=reaction_type,
|
|
2429
|
+
greedy=greedy,
|
|
2326
2430
|
)
|
|
2327
2431
|
|
|
2328
2432
|
if len(products) == 0:
|
|
@@ -2479,7 +2583,10 @@ def homologous_recombination_integration(
|
|
|
2479
2583
|
"""
|
|
2480
2584
|
fragments = common_handle_insertion_fragments(genome, inserts)
|
|
2481
2585
|
|
|
2482
|
-
|
|
2586
|
+
products = common_function_integration_products(
|
|
2587
|
+
fragments, limit, common_sub_strings
|
|
2588
|
+
)
|
|
2589
|
+
return _recast_sources(products, HomologousRecombinationSource)
|
|
2483
2590
|
|
|
2484
2591
|
|
|
2485
2592
|
def homologous_recombination_excision(
|
|
@@ -2515,7 +2622,8 @@ def homologous_recombination_excision(
|
|
|
2515
2622
|
>>> products
|
|
2516
2623
|
[Dseqrecord(o25), Dseqrecord(-32)]
|
|
2517
2624
|
"""
|
|
2518
|
-
|
|
2625
|
+
products = common_function_excision_products(genome, limit, common_sub_strings)
|
|
2626
|
+
return _recast_sources(products, HomologousRecombinationSource)
|
|
2519
2627
|
|
|
2520
2628
|
|
|
2521
2629
|
def cre_lox_integration(
|
|
@@ -2524,7 +2632,7 @@ def cre_lox_integration(
|
|
|
2524
2632
|
"""Returns the products resulting from the integration of an insert (or inserts joined
|
|
2525
2633
|
through cre-lox recombination among them) into the genome through cre-lox integration.
|
|
2526
2634
|
|
|
2527
|
-
Also works with lox66 and lox71 (see
|
|
2635
|
+
Also works with lox66 and lox71 (see ``pydna.cre_lox`` for more details).
|
|
2528
2636
|
|
|
2529
2637
|
Parameters
|
|
2530
2638
|
----------
|
|
@@ -2574,7 +2682,8 @@ def cre_lox_integration(
|
|
|
2574
2682
|
|
|
2575
2683
|
"""
|
|
2576
2684
|
fragments = common_handle_insertion_fragments(genome, inserts)
|
|
2577
|
-
|
|
2685
|
+
products = common_function_integration_products(fragments, None, cre_loxP_overlap)
|
|
2686
|
+
return _recast_sources(products, CreLoxRecombinationSource)
|
|
2578
2687
|
|
|
2579
2688
|
|
|
2580
2689
|
def cre_lox_excision(genome: _Dseqrecord) -> list[_Dseqrecord]:
|
|
@@ -2624,4 +2733,151 @@ def cre_lox_excision(genome: _Dseqrecord) -> list[_Dseqrecord]:
|
|
|
2624
2733
|
>>> res2
|
|
2625
2734
|
[Dseqrecord(o39), Dseqrecord(-45)]
|
|
2626
2735
|
"""
|
|
2627
|
-
|
|
2736
|
+
products = common_function_excision_products(genome, None, cre_loxP_overlap)
|
|
2737
|
+
return _recast_sources(products, CreLoxRecombinationSource)
|
|
2738
|
+
|
|
2739
|
+
|
|
2740
|
+
def crispr_integration(
|
|
2741
|
+
genome: _Dseqrecord,
|
|
2742
|
+
inserts: list[_Dseqrecord],
|
|
2743
|
+
guides: list[_Primer],
|
|
2744
|
+
limit: int = 40,
|
|
2745
|
+
) -> list[_Dseqrecord]:
|
|
2746
|
+
"""
|
|
2747
|
+
Returns the products for CRISPR integration.
|
|
2748
|
+
|
|
2749
|
+
Parameters
|
|
2750
|
+
----------
|
|
2751
|
+
genome : _Dseqrecord
|
|
2752
|
+
Target genome sequence
|
|
2753
|
+
inserts : list[_Dseqrecord]
|
|
2754
|
+
DNA fragment(s) to insert
|
|
2755
|
+
guides : list[_Primer]
|
|
2756
|
+
List of guide RNAs as Primer objects. This may change in the future.
|
|
2757
|
+
limit : int, optional
|
|
2758
|
+
Minimum overlap length required, by default 40
|
|
2759
|
+
|
|
2760
|
+
Returns
|
|
2761
|
+
-------
|
|
2762
|
+
list[_Dseqrecord]
|
|
2763
|
+
List of integrated DNA molecules
|
|
2764
|
+
|
|
2765
|
+
Examples
|
|
2766
|
+
--------
|
|
2767
|
+
|
|
2768
|
+
>>> from pydna.dseqrecord import Dseqrecord
|
|
2769
|
+
>>> from pydna.assembly2 import crispr_integration
|
|
2770
|
+
>>> from pydna.primer import Primer
|
|
2771
|
+
>>> genome = Dseqrecord("aaccggttcaatgcaaacagtaatgatggatgacattcaaagcac", name="genome")
|
|
2772
|
+
>>> insert = Dseqrecord("aaccggttAAAAAAAAAttcaaagcac", name="insert")
|
|
2773
|
+
>>> guide = Primer("ttcaatgcaaacagtaatga", name="guide")
|
|
2774
|
+
>>> product, *_ = crispr_integration(genome, [insert], [guide], 8)
|
|
2775
|
+
>>> product
|
|
2776
|
+
Dseqrecord(-27)
|
|
2777
|
+
|
|
2778
|
+
"""
|
|
2779
|
+
if len(guides) == 0:
|
|
2780
|
+
raise ValueError("At least one guide RNA is required for CRISPR integration")
|
|
2781
|
+
|
|
2782
|
+
# Get all the possible products from the homologous recombination integration
|
|
2783
|
+
products = homologous_recombination_integration(genome, inserts, limit)
|
|
2784
|
+
|
|
2785
|
+
# Verify that the guides cut in the region that will be repaired
|
|
2786
|
+
|
|
2787
|
+
# First we collect the positions where the guides cut
|
|
2788
|
+
guide_cuts = []
|
|
2789
|
+
for guide in guides:
|
|
2790
|
+
enzyme = cas9(str(guide.seq))
|
|
2791
|
+
possible_cuts = genome.seq.get_cutsites(enzyme)
|
|
2792
|
+
if len(possible_cuts) == 0:
|
|
2793
|
+
raise ValueError(
|
|
2794
|
+
f"Could not find Cas9 cutsite in the target sequence using the guide: {guide.name}"
|
|
2795
|
+
)
|
|
2796
|
+
# Keep only the position of the cut
|
|
2797
|
+
possible_cuts = [cut[0] for (cut, _) in possible_cuts]
|
|
2798
|
+
guide_cuts.append(possible_cuts)
|
|
2799
|
+
|
|
2800
|
+
# Then, we check it the possible homologous recombination products contain the cuts
|
|
2801
|
+
# from the guides inside the repair region.
|
|
2802
|
+
# We also add the used guides to each product. This is very important!
|
|
2803
|
+
valid_products = []
|
|
2804
|
+
for i, product in enumerate(products):
|
|
2805
|
+
# The second element of product.source.input is conventionally the insert/repair fragment
|
|
2806
|
+
# The other two (first and third) are the two bits of the genome
|
|
2807
|
+
repair_start = _location_boundaries(product.source.input[0].right_location)[0]
|
|
2808
|
+
repair_end = _location_boundaries(product.source.input[2].left_location)[1]
|
|
2809
|
+
repair_location = create_location(repair_start, repair_end, len(genome))
|
|
2810
|
+
some_cuts_inside_repair = []
|
|
2811
|
+
all_cuts_inside_repair = []
|
|
2812
|
+
for cut_group in guide_cuts:
|
|
2813
|
+
cuts_in_repair = [cut for cut in cut_group if cut in repair_location]
|
|
2814
|
+
some_cuts_inside_repair.append(len(cuts_in_repair) != 0)
|
|
2815
|
+
all_cuts_inside_repair.append(len(cuts_in_repair) == len(cut_group))
|
|
2816
|
+
|
|
2817
|
+
if all(some_cuts_inside_repair):
|
|
2818
|
+
used_guides = [g for i, g in enumerate(guides) if all_cuts_inside_repair[i]]
|
|
2819
|
+
# Add the used guides to the product <----- VERY IMPORTANT!
|
|
2820
|
+
product.source.input.extend([SourceInput(sequence=g) for g in used_guides])
|
|
2821
|
+
valid_products.append(product)
|
|
2822
|
+
|
|
2823
|
+
if not all(all_cuts_inside_repair):
|
|
2824
|
+
raise ValueError(
|
|
2825
|
+
"Some guides cut outside the repair region, please check the guides"
|
|
2826
|
+
)
|
|
2827
|
+
|
|
2828
|
+
if len(valid_products) != len(products):
|
|
2829
|
+
warnings.warn(
|
|
2830
|
+
"Some recombination products were discarded because they had off-target cuts",
|
|
2831
|
+
category=UserWarning,
|
|
2832
|
+
stacklevel=2,
|
|
2833
|
+
)
|
|
2834
|
+
|
|
2835
|
+
return _recast_sources(valid_products, CRISPRSource)
|
|
2836
|
+
|
|
2837
|
+
|
|
2838
|
+
def pcr_assembly(
|
|
2839
|
+
template: _Dseqrecord,
|
|
2840
|
+
fwd_primer: _Primer,
|
|
2841
|
+
rvs_primer: _Primer,
|
|
2842
|
+
add_primer_features: bool = False,
|
|
2843
|
+
limit: int = 14,
|
|
2844
|
+
mismatches: int = 0,
|
|
2845
|
+
) -> list[_Dseqrecord]:
|
|
2846
|
+
"""Returns the products for PCR assembly.
|
|
2847
|
+
|
|
2848
|
+
Parameters
|
|
2849
|
+
----------
|
|
2850
|
+
template : _Dseqrecord
|
|
2851
|
+
Template sequence
|
|
2852
|
+
fwd_primer : _Primer
|
|
2853
|
+
Forward primer
|
|
2854
|
+
rvs_primer : _Primer
|
|
2855
|
+
Reverse primer
|
|
2856
|
+
add_primer_features : bool, optional
|
|
2857
|
+
If True, add primer features to the product, by default False
|
|
2858
|
+
limit : int, optional
|
|
2859
|
+
Minimum overlap length required, by default 14
|
|
2860
|
+
mismatches : int, optional
|
|
2861
|
+
Maximum number of mismatches, by default 0
|
|
2862
|
+
|
|
2863
|
+
Returns
|
|
2864
|
+
-------
|
|
2865
|
+
list[_Dseqrecord]
|
|
2866
|
+
List of assembled DNA molecules
|
|
2867
|
+
"""
|
|
2868
|
+
|
|
2869
|
+
minimal_annealing = limit + mismatches
|
|
2870
|
+
fragments = [fwd_primer, template, rvs_primer]
|
|
2871
|
+
asm = PCRAssembly(
|
|
2872
|
+
fragments,
|
|
2873
|
+
limit=minimal_annealing,
|
|
2874
|
+
mismatches=mismatches,
|
|
2875
|
+
)
|
|
2876
|
+
products = asm.assemble_linear()
|
|
2877
|
+
# If both primers are the same, remove duplicates
|
|
2878
|
+
if str(fwd_primer.seq).upper() == str(rvs_primer.seq).upper():
|
|
2879
|
+
products = [p for p in products if not p.source.input[1].reverse_complemented]
|
|
2880
|
+
if add_primer_features:
|
|
2881
|
+
products = [annotate_primer_binding_sites(prod, fragments) for prod in products]
|
|
2882
|
+
|
|
2883
|
+
return _recast_sources(products, PCRSource, add_primer_features=add_primer_features)
|