pydna 5.5.4__py3-none-any.whl → 5.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +24 -193
- pydna/_pretty.py +8 -8
- pydna/_thermodynamic_data.py +3 -3
- pydna/alphabet.py +995 -0
- pydna/amplicon.py +19 -24
- pydna/amplify.py +75 -95
- pydna/assembly.py +64 -81
- pydna/assembly2.py +283 -294
- pydna/codon.py +4 -4
- pydna/common_sub_strings.py +6 -8
- pydna/contig.py +203 -10
- pydna/design.py +176 -60
- pydna/download.py +6 -15
- pydna/dseq.py +1794 -718
- pydna/dseqrecord.py +170 -169
- pydna/gateway.py +6 -6
- pydna/gel.py +5 -5
- pydna/genbank.py +43 -46
- pydna/genbankfixer.py +89 -92
- pydna/ladders.py +11 -12
- pydna/oligonucleotide_hybridization.py +124 -0
- pydna/opencloning_models.py +187 -60
- pydna/parsers.py +45 -32
- pydna/primer.py +4 -4
- pydna/primer_screen.py +833 -0
- pydna/readers.py +14 -9
- pydna/seq.py +137 -47
- pydna/seqrecord.py +54 -62
- pydna/sequence_picker.py +2 -5
- pydna/sequence_regex.py +6 -6
- pydna/tm.py +17 -17
- pydna/types.py +19 -19
- pydna/utils.py +97 -75
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/METADATA +8 -8
- pydna-5.5.5.dist-info/RECORD +43 -0
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/WHEEL +1 -1
- pydna/conftest.py +0 -42
- pydna/genbankfile.py +0 -42
- pydna/genbankrecord.py +0 -168
- pydna/goldengate.py +0 -45
- pydna/ligate.py +0 -62
- pydna/user_cloning.py +0 -29
- pydna-5.5.4.dist-info/RECORD +0 -46
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/licenses/LICENSE.txt +0 -0
pydna/codon.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
# https://www.genome.jp/kegg/catalog/org_list.html
|
|
8
8
|
|
|
9
|
-
from typing import Dict
|
|
9
|
+
from typing import Dict
|
|
10
10
|
|
|
11
11
|
_sce_weights = {
|
|
12
12
|
"TTT": 1.0,
|
|
@@ -81,7 +81,7 @@ weights = {"sce": _sce_weights}
|
|
|
81
81
|
# PMID: 11589713
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
start:
|
|
84
|
+
start: Dict[str, Dict[str, float]] = {
|
|
85
85
|
"sce": {"ATG": 1.000, "TTG": 0.069, "ATA": 0.005},
|
|
86
86
|
"eco": {},
|
|
87
87
|
}
|
|
@@ -99,13 +99,13 @@ rare_codons = {
|
|
|
99
99
|
"eco": ["AGG", "AGA", "ATA", "CTA", "CGA", "CGG", "CCC", "TCG"],
|
|
100
100
|
}
|
|
101
101
|
|
|
102
|
-
stop:
|
|
102
|
+
stop: Dict[str, Dict[str, float]] = {
|
|
103
103
|
"sce": {"TAA": 0.470, "TAG": 0.230, "TGA": 0.300},
|
|
104
104
|
"eco": {},
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
|
|
108
|
-
n_end:
|
|
108
|
+
n_end: Dict[str, Dict[str, str]] = {
|
|
109
109
|
"sce": {
|
|
110
110
|
"Val": ">30 h",
|
|
111
111
|
"Met": ">30 h",
|
pydna/common_sub_strings.py
CHANGED
|
@@ -13,12 +13,10 @@ https://github.com/gip0/py-rstr-max
|
|
|
13
13
|
the original code was covered by an MIT licence."""
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
from operator import itemgetter as _itemgetter
|
|
19
|
-
from typing import List as _List, Tuple as _Tuple
|
|
16
|
+
from operator import itemgetter
|
|
17
|
+
from typing import List, Tuple
|
|
20
18
|
|
|
21
|
-
Match =
|
|
19
|
+
Match = Tuple[int, int, int] # (x_start, y_start, length)
|
|
22
20
|
|
|
23
21
|
# def _kark_sort(s, SA, n, K):
|
|
24
22
|
# def radixpass(a, b, r, s, n, k):
|
|
@@ -314,7 +312,7 @@ Match = _Tuple[int, int, int] # (x_start, y_start, length)
|
|
|
314
312
|
# return match
|
|
315
313
|
|
|
316
314
|
|
|
317
|
-
def common_sub_strings(stringx: str, stringy: str, limit: int = 25) ->
|
|
315
|
+
def common_sub_strings(stringx: str, stringy: str, limit: int = 25) -> List[Match]:
|
|
318
316
|
"""
|
|
319
317
|
Finds all common substrings between stringx and stringy, and returns
|
|
320
318
|
them sorted by length.
|
|
@@ -340,11 +338,11 @@ def common_sub_strings(stringx: str, stringy: str, limit: int = 25) -> _List[Mat
|
|
|
340
338
|
|
|
341
339
|
matches = common_substrings(stringx, stringy, limit=limit)
|
|
342
340
|
matches.sort()
|
|
343
|
-
matches.sort(key=
|
|
341
|
+
matches.sort(key=itemgetter(2), reverse=True)
|
|
344
342
|
return matches
|
|
345
343
|
|
|
346
344
|
|
|
347
|
-
def terminal_overlap(stringx: str, stringy: str, limit: int = 15) ->
|
|
345
|
+
def terminal_overlap(stringx: str, stringy: str, limit: int = 15) -> List[Match]:
|
|
348
346
|
"""Finds the the flanking common substrings between stringx and stringy
|
|
349
347
|
longer than limit. This means that the results only contains substrings
|
|
350
348
|
that starts or ends at the the ends of stringx and stringy.
|
pydna/contig.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
import textwrap
|
|
3
|
-
import networkx as
|
|
4
|
-
from pydna._pretty import pretty_str as
|
|
5
|
-
from pydna.dseqrecord import Dseqrecord
|
|
6
|
-
from pydna.utils import rc
|
|
2
|
+
import textwrap
|
|
3
|
+
import networkx as nx
|
|
4
|
+
from pydna._pretty import pretty_str as ps
|
|
5
|
+
from pydna.dseqrecord import Dseqrecord
|
|
6
|
+
from pydna.utils import rc
|
|
7
|
+
import numpy as np
|
|
7
8
|
|
|
8
9
|
|
|
9
|
-
class Contig(
|
|
10
|
+
class Contig(Dseqrecord):
|
|
10
11
|
"""This class holds information about a DNA assembly. This class is instantiated by
|
|
11
12
|
the :class:`Assembly` class and is not meant to be used directly.
|
|
12
13
|
|
|
@@ -47,7 +48,7 @@ class Contig(_Dseqrecord):
|
|
|
47
48
|
|
|
48
49
|
def reverse_complement(self):
|
|
49
50
|
answer = type(self)(super().reverse_complement())
|
|
50
|
-
g =
|
|
51
|
+
g = nx.DiGraph()
|
|
51
52
|
nm = self.nodemap
|
|
52
53
|
g.add_edges_from(
|
|
53
54
|
[(nm[v], nm[u], d) for u, v, d in list(self.graph.edges(data=True))[::-1]]
|
|
@@ -59,7 +60,7 @@ class Contig(_Dseqrecord):
|
|
|
59
60
|
if ed["name"].endswith("_rc")
|
|
60
61
|
else "{}_rc".format(ed["name"])[:13]
|
|
61
62
|
)
|
|
62
|
-
ed["seq"] =
|
|
63
|
+
ed["seq"] = rc(ed["seq"])
|
|
63
64
|
ln = len(ed["seq"])
|
|
64
65
|
start, stop = ed["piece"].start, ed["piece"].stop
|
|
65
66
|
ed["piece"] = slice(
|
|
@@ -124,7 +125,7 @@ class Contig(_Dseqrecord):
|
|
|
124
125
|
for p, s in mylist:
|
|
125
126
|
fig += "{}{}\n".format(" " * (p + firstpos), s)
|
|
126
127
|
|
|
127
|
-
return
|
|
128
|
+
return ps(fig)
|
|
128
129
|
|
|
129
130
|
def figure(self):
|
|
130
131
|
r"""Compact ascii representation of the assembled fragments.
|
|
@@ -262,4 +263,196 @@ class Contig(_Dseqrecord):
|
|
|
262
263
|
)
|
|
263
264
|
fig += "|{space} |\n".format(space=" " * (space))
|
|
264
265
|
fig += " {space}".format(space="-" * (space + 3))
|
|
265
|
-
return
|
|
266
|
+
return ps(textwrap.dedent(fig))
|
|
267
|
+
|
|
268
|
+
def figure_mpl(self):
|
|
269
|
+
"""
|
|
270
|
+
Graphic representation of the assembly.
|
|
271
|
+
|
|
272
|
+
Returns
|
|
273
|
+
-------
|
|
274
|
+
matplotlib.figure.Figure
|
|
275
|
+
A representation of a linear or culrcular assembly.
|
|
276
|
+
|
|
277
|
+
"""
|
|
278
|
+
# lazy imports in case matplotlib is not installed
|
|
279
|
+
import matplotlib.pyplot as plt
|
|
280
|
+
import matplotlib.patches as mpatches
|
|
281
|
+
|
|
282
|
+
plt.ioff() # Disable interactive mode, otherwise two plots are shown in Spyder.
|
|
283
|
+
# https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.isinteractive.html#matplotlib.pyplot.isinteractive
|
|
284
|
+
|
|
285
|
+
def pick_n_colors(n, cmap_name="tab20"):
|
|
286
|
+
cmap = plt.get_cmap(cmap_name)
|
|
287
|
+
return [cmap(i / n) for i in range(n)]
|
|
288
|
+
|
|
289
|
+
fig, ax = plt.subplots()
|
|
290
|
+
edges = list(self.graph.edges(data=True))
|
|
291
|
+
colors = pick_n_colors(len(edges))
|
|
292
|
+
|
|
293
|
+
if self.circular:
|
|
294
|
+
# Circle parameters for Circular assembly
|
|
295
|
+
center = 0, 0
|
|
296
|
+
outer_radius = 1.5 # fragments on the outer lane
|
|
297
|
+
middle_radius = 1.3 # fragments on the inner lane
|
|
298
|
+
small_radius = 1.1 # odd number of fragments require an extra radius
|
|
299
|
+
arc_width = 0.1 # Arc thickness
|
|
300
|
+
|
|
301
|
+
circle = len(self) # The circle has the length of the assembly
|
|
302
|
+
radii = [outer_radius, middle_radius] * (
|
|
303
|
+
len(edges) // 2
|
|
304
|
+
) # radii alternates, starting with outer.
|
|
305
|
+
|
|
306
|
+
if len(edges) % 2 != 0: # last fragment get a smaller radius
|
|
307
|
+
radii.append(small_radius)
|
|
308
|
+
|
|
309
|
+
assert (
|
|
310
|
+
len(colors) == len(radii) == len(edges)
|
|
311
|
+
) # One color and one radius for each edge.
|
|
312
|
+
|
|
313
|
+
# The recombination between last and first fragments
|
|
314
|
+
# end at the origin (twelve o'clock).
|
|
315
|
+
start = 0 - len(edges[0][0])
|
|
316
|
+
|
|
317
|
+
for edge, radius, color in zip(edges, radii, colors):
|
|
318
|
+
|
|
319
|
+
node1, node2, meta = edge
|
|
320
|
+
slc = meta["piece"]
|
|
321
|
+
extra = len(node2)
|
|
322
|
+
# slc contain the first but not the second node, so add extra to the length
|
|
323
|
+
length = slc.stop - slc.start + extra
|
|
324
|
+
|
|
325
|
+
theta1 = 90.0 - 360.0 / circle * start
|
|
326
|
+
theta2 = 90.0 - 360.0 / circle * (start + length)
|
|
327
|
+
|
|
328
|
+
# Create arc
|
|
329
|
+
arc_patch = mpatches.Wedge(
|
|
330
|
+
center=center,
|
|
331
|
+
r=radius,
|
|
332
|
+
theta1=theta2,
|
|
333
|
+
theta2=theta1,
|
|
334
|
+
width=arc_width,
|
|
335
|
+
edgecolor=color,
|
|
336
|
+
facecolor=(1, 1, 1, 0),
|
|
337
|
+
linewidth=1,
|
|
338
|
+
)
|
|
339
|
+
ax.add_patch(arc_patch)
|
|
340
|
+
|
|
341
|
+
# Compute label position slightly outside the arc
|
|
342
|
+
mid_angle = (theta1 + theta2) / 2
|
|
343
|
+
rad = np.deg2rad(mid_angle)
|
|
344
|
+
label_radius = radius + arc_width + 0.1 # place outside the arc
|
|
345
|
+
x = label_radius * np.cos(rad)
|
|
346
|
+
y = label_radius * np.sin(rad)
|
|
347
|
+
|
|
348
|
+
# Choose alignment based on angle
|
|
349
|
+
ha = "left" if np.cos(rad) >= 0 else "right"
|
|
350
|
+
va = "center"
|
|
351
|
+
|
|
352
|
+
ax.text(x, y, meta["name"], ha=ha, va=va, fontsize=10)
|
|
353
|
+
|
|
354
|
+
start += length - len(node2)
|
|
355
|
+
ax.axis("off")
|
|
356
|
+
ax.set_aspect("equal")
|
|
357
|
+
ax.set_xlim(-1.6, 1.6) # This should be enough, but not extensively tested.
|
|
358
|
+
ax.set_ylim(-1.6, 1.6)
|
|
359
|
+
|
|
360
|
+
else: # Linear assembly
|
|
361
|
+
import itertools # 3131 bp
|
|
362
|
+
|
|
363
|
+
unit = len(self) / 50
|
|
364
|
+
upper = 4 * unit
|
|
365
|
+
lower = 1 * unit
|
|
366
|
+
height = 1 * unit
|
|
367
|
+
x = 0
|
|
368
|
+
|
|
369
|
+
for edge, y, color in zip(edges, itertools.cycle((lower, upper)), colors):
|
|
370
|
+
node1, node2, metadict = edge
|
|
371
|
+
slc = metadict["piece"]
|
|
372
|
+
# slc contain the first but not the second node, so add extra to the length if not begin or end.
|
|
373
|
+
extra = len(node2) if node2 not in ("begin", "end") else 0
|
|
374
|
+
length = slc.stop - slc.start + extra
|
|
375
|
+
box = mpatches.FancyBboxPatch(
|
|
376
|
+
(x, y),
|
|
377
|
+
length,
|
|
378
|
+
height,
|
|
379
|
+
linewidth=1,
|
|
380
|
+
boxstyle="round",
|
|
381
|
+
edgecolor=color,
|
|
382
|
+
facecolor=(1, 1, 1, 0),
|
|
383
|
+
)
|
|
384
|
+
ax.add_patch(box)
|
|
385
|
+
ax.text(
|
|
386
|
+
x + length / 2,
|
|
387
|
+
y + height * 2 if y == upper else y - height * 2,
|
|
388
|
+
metadict["name"],
|
|
389
|
+
ha="center",
|
|
390
|
+
va="center",
|
|
391
|
+
fontsize=10,
|
|
392
|
+
)
|
|
393
|
+
x += length - len(node2)
|
|
394
|
+
ax.axis("off")
|
|
395
|
+
ax.set_aspect("equal")
|
|
396
|
+
ax.set_xlim(-1, len(self) + 1)
|
|
397
|
+
ax.set_ylim(-height, height * 2 + upper)
|
|
398
|
+
return fig
|
|
399
|
+
|
|
400
|
+
# FIXME: This code uses plotly, but I see no reason for it at this point.
|
|
401
|
+
# def figure_plotly(self):
|
|
402
|
+
# import plotly.graph_objects as go
|
|
403
|
+
# import numpy as np
|
|
404
|
+
|
|
405
|
+
# circ = len(self)
|
|
406
|
+
# arcs = list(self.graph.edges(data=True))
|
|
407
|
+
|
|
408
|
+
# # Radii setup
|
|
409
|
+
# small_radius = 1.1
|
|
410
|
+
# middle_radius = 1.3
|
|
411
|
+
# outer_radius = 1.5
|
|
412
|
+
# arc_width = 0.1
|
|
413
|
+
|
|
414
|
+
# radii = [outer_radius, middle_radius] * (len(arcs) // 2)
|
|
415
|
+
# if len(arcs) % 2 != 0:
|
|
416
|
+
# radii.append(small_radius)
|
|
417
|
+
|
|
418
|
+
# fig = go.Figure()
|
|
419
|
+
# start = 0 - len(arcs[0][0])
|
|
420
|
+
|
|
421
|
+
# for (node1, node2, meta), radius in zip(arcs, radii):
|
|
422
|
+
# slc = meta["piece"]
|
|
423
|
+
# length = slc.stop - slc.start + len(node1)
|
|
424
|
+
|
|
425
|
+
# theta1 = 90.0 - 360.0 / circ * start
|
|
426
|
+
# theta2 = 90.0 - 360.0 / circ * (start + length)
|
|
427
|
+
|
|
428
|
+
# # Generate arc points
|
|
429
|
+
# theta = np.linspace(theta1, theta2, 50)
|
|
430
|
+
# theta_rev = theta[::-1]
|
|
431
|
+
|
|
432
|
+
# r_outer = np.full_like(theta, radius)
|
|
433
|
+
# r_inner = np.full_like(theta_rev, radius - arc_width)
|
|
434
|
+
|
|
435
|
+
# r = np.concatenate([r_outer, r_inner])
|
|
436
|
+
# t = np.concatenate([theta, theta_rev])
|
|
437
|
+
|
|
438
|
+
# fig.add_trace(
|
|
439
|
+
# go.Scatterpolar(
|
|
440
|
+
# r=r,
|
|
441
|
+
# theta=t,
|
|
442
|
+
# fill="toself",
|
|
443
|
+
# mode="lines",
|
|
444
|
+
# line_color="rgba(0,100,200,0.6)",
|
|
445
|
+
# hoverinfo="text",
|
|
446
|
+
# text=meta["name"],
|
|
447
|
+
# name=meta["name"],
|
|
448
|
+
# )
|
|
449
|
+
# )
|
|
450
|
+
|
|
451
|
+
# start += length - len(node2)
|
|
452
|
+
|
|
453
|
+
# fig.update_layout(
|
|
454
|
+
# polar=dict(radialaxis=dict(visible=False), angularaxis=dict(visible=False)),
|
|
455
|
+
# showlegend=False,
|
|
456
|
+
# )
|
|
457
|
+
|
|
458
|
+
# fig.show("browser")
|
pydna/design.py
CHANGED
|
@@ -14,27 +14,23 @@
|
|
|
14
14
|
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
from pydna.tm import tm_default
|
|
18
|
-
import math
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
from pydna.
|
|
23
|
-
from pydna.
|
|
24
|
-
from pydna.
|
|
25
|
-
|
|
26
|
-
from pydna.primer import Primer as _Primer
|
|
27
|
-
|
|
28
|
-
# import logging as _logging
|
|
29
|
-
import operator as _operator
|
|
17
|
+
from pydna.tm import tm_default
|
|
18
|
+
import math
|
|
19
|
+
import copy
|
|
20
|
+
from pydna.amplicon import Amplicon
|
|
21
|
+
from pydna.amplify import Anneal
|
|
22
|
+
from pydna.amplify import pcr
|
|
23
|
+
from pydna.dseqrecord import Dseqrecord
|
|
24
|
+
from pydna.primer import Primer
|
|
25
|
+
import operator
|
|
30
26
|
from typing import Tuple
|
|
31
|
-
|
|
32
|
-
|
|
27
|
+
from itertools import pairwise, product
|
|
28
|
+
import re
|
|
33
29
|
|
|
34
30
|
|
|
35
31
|
def _design_primer(
|
|
36
32
|
target_tm: float,
|
|
37
|
-
template:
|
|
33
|
+
template: Dseqrecord,
|
|
38
34
|
limit: int,
|
|
39
35
|
tm_func,
|
|
40
36
|
starting_length: int = 0,
|
|
@@ -52,10 +48,10 @@ def _design_primer(
|
|
|
52
48
|
p = str(template.seq[:length])
|
|
53
49
|
|
|
54
50
|
if tmp < target_tm:
|
|
55
|
-
condition =
|
|
51
|
+
condition = operator.le
|
|
56
52
|
increment = 1
|
|
57
53
|
else:
|
|
58
|
-
condition =
|
|
54
|
+
condition = operator.ge
|
|
59
55
|
increment = -1
|
|
60
56
|
while condition(tmp, target_tm):
|
|
61
57
|
prev_temp = tmp
|
|
@@ -69,7 +65,6 @@ def _design_primer(
|
|
|
69
65
|
if length < limit:
|
|
70
66
|
return template.seq[:limit]
|
|
71
67
|
|
|
72
|
-
# _module_logger.debug(((p, tmp), (prev_primer, prev_temp)))
|
|
73
68
|
if abs(target_tm - tmp) < abs(target_tm - prev_temp):
|
|
74
69
|
return p
|
|
75
70
|
else:
|
|
@@ -82,7 +77,7 @@ def primer_design(
|
|
|
82
77
|
rp=None,
|
|
83
78
|
limit=13,
|
|
84
79
|
target_tm=55.0,
|
|
85
|
-
tm_func=
|
|
80
|
+
tm_func=tm_default,
|
|
86
81
|
estimate_function=None,
|
|
87
82
|
**kwargs,
|
|
88
83
|
):
|
|
@@ -191,33 +186,33 @@ def primer_design(
|
|
|
191
186
|
return _design_primer(target_tm, template, limit, tm_func)
|
|
192
187
|
|
|
193
188
|
if not fp and not rp:
|
|
194
|
-
|
|
195
|
-
fp =
|
|
189
|
+
|
|
190
|
+
fp = Primer((design(target_tm, template)))
|
|
196
191
|
target_tm = tm_func(str(fp.seq))
|
|
197
|
-
|
|
198
|
-
rp =
|
|
192
|
+
|
|
193
|
+
rp = Primer(design(target_tm, template.reverse_complement()))
|
|
199
194
|
elif fp and not rp:
|
|
200
195
|
try:
|
|
201
|
-
fp =
|
|
196
|
+
fp = Anneal((fp,), template).forward_primers.pop()
|
|
202
197
|
except IndexError:
|
|
203
198
|
raise ValueError("Forward primer does not anneal")
|
|
204
199
|
except Exception: # pragma: no cover
|
|
205
200
|
print("Unexpected error")
|
|
206
201
|
raise
|
|
207
202
|
target_tm = tm_func(fp.footprint)
|
|
208
|
-
|
|
209
|
-
rp =
|
|
203
|
+
|
|
204
|
+
rp = Primer(design(target_tm, template.reverse_complement()))
|
|
210
205
|
elif not fp and rp:
|
|
211
206
|
try:
|
|
212
|
-
rp =
|
|
207
|
+
rp = Anneal((rp,), template).reverse_primers.pop()
|
|
213
208
|
except IndexError:
|
|
214
209
|
raise ValueError("Reverse primer does not anneal")
|
|
215
210
|
except Exception: # pragma: no cover
|
|
216
211
|
print("Unexpected error")
|
|
217
212
|
raise
|
|
218
213
|
target_tm = tm_func(rp.footprint)
|
|
219
|
-
|
|
220
|
-
fp =
|
|
214
|
+
|
|
215
|
+
fp = Primer(design(target_tm, template))
|
|
221
216
|
else:
|
|
222
217
|
raise ValueError("Specify maximum one of the two primers.")
|
|
223
218
|
|
|
@@ -236,15 +231,15 @@ def primer_design(
|
|
|
236
231
|
fp.description = fp.id + " " + template.accession
|
|
237
232
|
rp.description = rp.id + " " + template.accession
|
|
238
233
|
|
|
239
|
-
ampl =
|
|
234
|
+
ampl = Anneal((fp, rp), template, limit=limit)
|
|
240
235
|
|
|
241
|
-
prod = ampl.products[0] if ampl.products else
|
|
236
|
+
prod = ampl.products[0] if ampl.products else Amplicon("")
|
|
242
237
|
|
|
243
238
|
if len(ampl.products) > 1:
|
|
244
|
-
import warnings
|
|
239
|
+
import warnings
|
|
245
240
|
from pydna import _PydnaWarning
|
|
246
241
|
|
|
247
|
-
|
|
242
|
+
warnings.warn(
|
|
248
243
|
"designed primers do not yield a unique PCR product", _PydnaWarning
|
|
249
244
|
)
|
|
250
245
|
|
|
@@ -646,7 +641,7 @@ def assembly_fragments(f, overlap=35, maxlink=40, circular=False):
|
|
|
646
641
|
)
|
|
647
642
|
|
|
648
643
|
if hasattr(fragments[0], "template"):
|
|
649
|
-
fragments[0] =
|
|
644
|
+
fragments[0] = pcr(
|
|
650
645
|
(fragments[-1].forward_primer, fragments[0].reverse_primer),
|
|
651
646
|
fragments[0].template,
|
|
652
647
|
)
|
|
@@ -661,11 +656,7 @@ def assembly_fragments(f, overlap=35, maxlink=40, circular=False):
|
|
|
661
656
|
"Every second fragment larger than maxlink has to be an Amplicon object"
|
|
662
657
|
)
|
|
663
658
|
|
|
664
|
-
|
|
665
|
-
# _module_logger.debug("overlap = %s", overlap)
|
|
666
|
-
# _module_logger.debug("max_link = %s", maxlink)
|
|
667
|
-
|
|
668
|
-
f = [_copy.copy(f) for f in f]
|
|
659
|
+
f = [copy.copy(f) for f in f]
|
|
669
660
|
|
|
670
661
|
first_fragment_length = len(f[0])
|
|
671
662
|
last_fragment_length = len(f[-1])
|
|
@@ -673,26 +664,21 @@ def assembly_fragments(f, overlap=35, maxlink=40, circular=False):
|
|
|
673
664
|
if first_fragment_length <= maxlink:
|
|
674
665
|
# first fragment should be removed and added to second fragment (new first fragment) forward primer
|
|
675
666
|
f[1].forward_primer = f[0].seq._data.decode("ASCII") + f[1].forward_primer
|
|
676
|
-
|
|
667
|
+
|
|
677
668
|
f = f[1:]
|
|
678
669
|
# else:
|
|
679
|
-
# _module_logger.debug("first fragment stays since len(f[0]) = %s", first_fragment_length)
|
|
680
670
|
|
|
681
671
|
if last_fragment_length <= maxlink:
|
|
682
672
|
f[-2].reverse_primer = (
|
|
683
673
|
f[-1].seq.reverse_complement()._data.decode("ASCII") + f[-2].reverse_primer
|
|
684
674
|
)
|
|
685
675
|
f = f[:-1]
|
|
686
|
-
# _module_logger.debug("last fragment removed since len(f[%s]) = %s", len(f), last_fragment_length)
|
|
687
|
-
# else:
|
|
688
|
-
# _module_logger.debug("last fragment stays since len(f[%s]) = %s", len(f), last_fragment_length)
|
|
689
676
|
|
|
690
|
-
|
|
677
|
+
# else:
|
|
691
678
|
|
|
692
|
-
|
|
693
|
-
# _module_logger.debug("loop through fragments in groups of three:")
|
|
679
|
+
empty = Dseqrecord("")
|
|
694
680
|
|
|
695
|
-
tail_length =
|
|
681
|
+
tail_length = math.ceil(overlap / 2)
|
|
696
682
|
|
|
697
683
|
for i in range(len(f) - 1):
|
|
698
684
|
first = f[i]
|
|
@@ -700,15 +686,12 @@ def assembly_fragments(f, overlap=35, maxlink=40, circular=False):
|
|
|
700
686
|
|
|
701
687
|
secnd_len = len(secnd)
|
|
702
688
|
|
|
703
|
-
# _module_logger.debug("first = %s", str(first.seq))
|
|
704
|
-
# _module_logger.debug("secnd = %s", str(secnd.seq))
|
|
705
|
-
|
|
706
689
|
if secnd_len <= maxlink:
|
|
707
|
-
|
|
690
|
+
|
|
708
691
|
third = f[i + 2]
|
|
709
|
-
|
|
692
|
+
|
|
710
693
|
if hasattr(f[i], "template") and hasattr(third, "template"):
|
|
711
|
-
|
|
694
|
+
|
|
712
695
|
# "secnd is is flanked by amplicons, so half of secnd should be added each flanking primers"
|
|
713
696
|
# )
|
|
714
697
|
|
|
@@ -729,14 +712,14 @@ def assembly_fragments(f, overlap=35, maxlink=40, circular=False):
|
|
|
729
712
|
: secnd_len // 2
|
|
730
713
|
]
|
|
731
714
|
)[-tail_length:]
|
|
732
|
-
|
|
715
|
+
|
|
733
716
|
first.reverse_primer = lnk + first.reverse_primer
|
|
734
717
|
|
|
735
718
|
lnk = (
|
|
736
719
|
first.seq._data.decode("ASCII")
|
|
737
720
|
+ secnd.seq._data.decode("ASCII")[: secnd_len // 2]
|
|
738
721
|
)[-tail_length:]
|
|
739
|
-
|
|
722
|
+
|
|
740
723
|
third.forward_primer = lnk + third.forward_primer
|
|
741
724
|
|
|
742
725
|
elif hasattr(first, "template"):
|
|
@@ -773,13 +756,11 @@ def assembly_fragments(f, overlap=35, maxlink=40, circular=False):
|
|
|
773
756
|
f[i] = first
|
|
774
757
|
f[i + 1] = secnd
|
|
775
758
|
|
|
776
|
-
# _module_logger.debug("loop ended")
|
|
777
|
-
|
|
778
759
|
f = [item for item in f if len(item)]
|
|
779
760
|
|
|
780
761
|
return [
|
|
781
762
|
(
|
|
782
|
-
|
|
763
|
+
pcr(
|
|
783
764
|
p.forward_primer,
|
|
784
765
|
p.reverse_primer,
|
|
785
766
|
p.template,
|
|
@@ -806,3 +787,138 @@ def circular_assembly_fragments(f, overlap=35, maxlink=40):
|
|
|
806
787
|
stacklevel=2,
|
|
807
788
|
)
|
|
808
789
|
return assembly_fragments(f, overlap=overlap, maxlink=maxlink, circular=True)
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
def user_assembly_design(
|
|
793
|
+
f: list[Amplicon], max_overlap: int = 15, min_overlap: int = 4, max_tail=50
|
|
794
|
+
) -> list[Amplicon]:
|
|
795
|
+
|
|
796
|
+
import warnings
|
|
797
|
+
|
|
798
|
+
warnings.warn(
|
|
799
|
+
"The user_assembly_design function is experimental and "
|
|
800
|
+
"may change in future versions.",
|
|
801
|
+
category=FutureWarning,
|
|
802
|
+
stacklevel=2,
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
assert max_overlap > min_overlap, (
|
|
806
|
+
f"max_overlap ({max_overlap}) "
|
|
807
|
+
"has to be larger than min_overlap "
|
|
808
|
+
f"({min_overlap})"
|
|
809
|
+
)
|
|
810
|
+
amplicons = []
|
|
811
|
+
|
|
812
|
+
for fragment in f:
|
|
813
|
+
amplicons.append(primer_design(fragment))
|
|
814
|
+
|
|
815
|
+
flag = True
|
|
816
|
+
|
|
817
|
+
for ths, nxt in pairwise(amplicons):
|
|
818
|
+
|
|
819
|
+
A_positions_in_ths = [m.start() for m in re.finditer("A|a", str(ths.seq))]
|
|
820
|
+
T_positions_in_nxt = [m.start() for m in re.finditer("T|t", str(nxt.seq))]
|
|
821
|
+
|
|
822
|
+
for ths_a, ths_t in zip(A_positions_in_ths[::-1], T_positions_in_nxt):
|
|
823
|
+
|
|
824
|
+
sticky_length = ths_t + len(ths) - ths_a
|
|
825
|
+
|
|
826
|
+
if sticky_length < min_overlap:
|
|
827
|
+
continue
|
|
828
|
+
|
|
829
|
+
if sticky_length > max_overlap:
|
|
830
|
+
flag = False
|
|
831
|
+
break
|
|
832
|
+
|
|
833
|
+
rp = bytearray(
|
|
834
|
+
nxt.seq[: ths_t + 1].rc()._data + ths.reverse_primer.seq._data
|
|
835
|
+
)
|
|
836
|
+
fp = bytearray(ths.seq[ths_a:]._data + nxt.forward_primer.seq._data)
|
|
837
|
+
|
|
838
|
+
fp[sticky_length] = ord(b"U")
|
|
839
|
+
rp[sticky_length] = ord(b"U")
|
|
840
|
+
|
|
841
|
+
ths.reverse_primer = Primer(rp)
|
|
842
|
+
nxt.forward_primer = Primer(fp)
|
|
843
|
+
|
|
844
|
+
break # Primers were designed.
|
|
845
|
+
else:
|
|
846
|
+
flag = False
|
|
847
|
+
|
|
848
|
+
if flag:
|
|
849
|
+
continue
|
|
850
|
+
|
|
851
|
+
# No suitable T-A pair was found on opposite sides of both fragments
|
|
852
|
+
# Look for T-A pairs contained in either sequence
|
|
853
|
+
# Distance between the T-A and proximity to the junction are important
|
|
854
|
+
# factors
|
|
855
|
+
T_positions_in_ths = [m.start() for m in re.finditer("T|t", str(ths.seq))]
|
|
856
|
+
pairs_ths = product(A_positions_in_ths[::-1], T_positions_in_ths[::-1])
|
|
857
|
+
|
|
858
|
+
for ths_a, ths_t in pairs_ths:
|
|
859
|
+
if ths_a > ths_t:
|
|
860
|
+
continue
|
|
861
|
+
sticky_length = ths_t - ths_a
|
|
862
|
+
if sticky_length < min_overlap:
|
|
863
|
+
continue
|
|
864
|
+
if sticky_length > max_overlap:
|
|
865
|
+
continue
|
|
866
|
+
pair_ths = ths_a, ths_t
|
|
867
|
+
break
|
|
868
|
+
else:
|
|
869
|
+
pair_ths = tuple()
|
|
870
|
+
ths_a, ths_t = 0, 0
|
|
871
|
+
|
|
872
|
+
A_positions_in_nxt = [m.start() for m in re.finditer("A|a", str(nxt.seq))]
|
|
873
|
+
pairs_nxt = product(A_positions_in_nxt, T_positions_in_nxt)
|
|
874
|
+
|
|
875
|
+
for nxt_a, nxt_t in pairs_nxt:
|
|
876
|
+
if nxt_a > nxt_t:
|
|
877
|
+
continue
|
|
878
|
+
sticky_length = nxt_t - nxt_a
|
|
879
|
+
if sticky_length < min_overlap:
|
|
880
|
+
continue
|
|
881
|
+
if sticky_length > max_overlap:
|
|
882
|
+
continue
|
|
883
|
+
pair_nxt = nxt_a, nxt_t
|
|
884
|
+
break
|
|
885
|
+
else:
|
|
886
|
+
pair_nxt = tuple()
|
|
887
|
+
nxt_a, nxt_t = 0, 0
|
|
888
|
+
|
|
889
|
+
if (pair_ths and not pair_nxt) or len(ths) - ths_a <= nxt_t:
|
|
890
|
+
# T-A pair in ths;
|
|
891
|
+
# Move ths reverse primer downstream
|
|
892
|
+
# Extend nxt foward primer tail
|
|
893
|
+
|
|
894
|
+
fp = bytearray(ths.seq[ths_a:]._data + nxt.forward_primer.seq._data)
|
|
895
|
+
fp[ths_t - ths_a] = ord(b"U")
|
|
896
|
+
nxt.forward_primer = Primer(fp)
|
|
897
|
+
shorter_ths = ths[: ths_t + 1]
|
|
898
|
+
rp = bytearray(
|
|
899
|
+
primer_design(
|
|
900
|
+
shorter_ths, limit=ths_t - ths_a + 1
|
|
901
|
+
).reverse_primer.seq._data
|
|
902
|
+
)
|
|
903
|
+
rp[ths_t - ths_a] = ord(b"U")
|
|
904
|
+
ths.reverse_primer = Primer(rp)
|
|
905
|
+
|
|
906
|
+
elif (not pair_ths and pair_nxt) or len(ths) - ths_a >= nxt_t:
|
|
907
|
+
# T-A pair in nxt; modify ths reverse primer
|
|
908
|
+
# Move nxt forward primer upstream
|
|
909
|
+
# Extend ths reverse primer tail
|
|
910
|
+
rp = bytearray(
|
|
911
|
+
nxt.seq[: nxt_t + 1].rc()._data + ths.reverse_primer.seq._data
|
|
912
|
+
)
|
|
913
|
+
rp[nxt_t - nxt_a] = ord(b"U")
|
|
914
|
+
ths.reverse_primer = Primer(rp)
|
|
915
|
+
shorter_nxt = nxt[nxt_a:]
|
|
916
|
+
fp = bytearray(
|
|
917
|
+
primer_design(
|
|
918
|
+
shorter_nxt, limit=nxt_t - nxt_a + 1
|
|
919
|
+
).forward_primer.seq._data
|
|
920
|
+
)
|
|
921
|
+
fp[nxt_t - nxt_a] = ord(b"U")
|
|
922
|
+
nxt.forward_primer = Primer(fp)
|
|
923
|
+
|
|
924
|
+
return amplicons
|