pydna 5.5.4__py3-none-any.whl → 5.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +24 -193
- pydna/_pretty.py +8 -8
- pydna/_thermodynamic_data.py +3 -3
- pydna/alphabet.py +995 -0
- pydna/amplicon.py +19 -24
- pydna/amplify.py +75 -95
- pydna/assembly.py +64 -81
- pydna/assembly2.py +283 -294
- pydna/codon.py +4 -4
- pydna/common_sub_strings.py +6 -8
- pydna/contig.py +203 -10
- pydna/design.py +176 -60
- pydna/download.py +6 -15
- pydna/dseq.py +1794 -718
- pydna/dseqrecord.py +170 -169
- pydna/gateway.py +6 -6
- pydna/gel.py +5 -5
- pydna/genbank.py +43 -46
- pydna/genbankfixer.py +89 -92
- pydna/ladders.py +11 -12
- pydna/oligonucleotide_hybridization.py +124 -0
- pydna/opencloning_models.py +187 -60
- pydna/parsers.py +45 -32
- pydna/primer.py +4 -4
- pydna/primer_screen.py +833 -0
- pydna/readers.py +14 -9
- pydna/seq.py +137 -47
- pydna/seqrecord.py +54 -62
- pydna/sequence_picker.py +2 -5
- pydna/sequence_regex.py +6 -6
- pydna/tm.py +17 -17
- pydna/types.py +19 -19
- pydna/utils.py +97 -75
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/METADATA +8 -8
- pydna-5.5.5.dist-info/RECORD +43 -0
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/WHEEL +1 -1
- pydna/conftest.py +0 -42
- pydna/genbankfile.py +0 -42
- pydna/genbankrecord.py +0 -168
- pydna/goldengate.py +0 -45
- pydna/ligate.py +0 -62
- pydna/user_cloning.py +0 -29
- pydna-5.5.4.dist-info/RECORD +0 -46
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/licenses/LICENSE.txt +0 -0
pydna/assembly.py
CHANGED
|
@@ -42,41 +42,28 @@ sequences separating the overlapping regions form edges.
|
|
|
42
42
|
The NetworkX package is used to trace linear and circular paths through the
|
|
43
43
|
graph.
|
|
44
44
|
"""
|
|
45
|
-
import os
|
|
46
|
-
from Bio.SeqFeature import SeqFeature
|
|
47
|
-
from Bio.SeqFeature import ExactPosition
|
|
48
|
-
from Bio.SeqFeature import SimpleLocation
|
|
49
|
-
from Bio.SeqFeature import CompoundLocation
|
|
50
|
-
from pydna.utils import rc
|
|
51
|
-
|
|
52
|
-
# from pydna.utils import memorize as _memorize
|
|
53
|
-
from pydna._pretty import pretty_str as _pretty_str
|
|
54
|
-
from pydna.contig import Contig as _Contig
|
|
55
|
-
from pydna.common_sub_strings import common_sub_strings, Match as _Match
|
|
56
|
-
|
|
57
|
-
from pydna.dseqrecord import Dseqrecord as _Dseqrecord
|
|
58
|
-
import networkx as _nx
|
|
59
|
-
|
|
60
|
-
from copy import deepcopy as _deepcopy
|
|
61
|
-
from typing import (
|
|
62
|
-
Callable as _Callable,
|
|
63
|
-
Dict as _Dict,
|
|
64
|
-
List as _List,
|
|
65
|
-
NamedTuple as _NamedTuple,
|
|
66
|
-
TypedDict as _TypedDict,
|
|
67
|
-
)
|
|
68
|
-
import itertools as _itertools
|
|
45
|
+
import os
|
|
46
|
+
from Bio.SeqFeature import SeqFeature
|
|
47
|
+
from Bio.SeqFeature import ExactPosition
|
|
48
|
+
from Bio.SeqFeature import SimpleLocation
|
|
49
|
+
from Bio.SeqFeature import CompoundLocation
|
|
50
|
+
from pydna.utils import rc
|
|
69
51
|
|
|
70
|
-
|
|
52
|
+
from pydna._pretty import pretty_str as ps
|
|
53
|
+
from pydna.contig import Contig
|
|
54
|
+
from pydna.common_sub_strings import common_sub_strings, Match
|
|
71
55
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
56
|
+
from pydna.dseqrecord import Dseqrecord
|
|
57
|
+
import networkx as nx
|
|
58
|
+
|
|
59
|
+
from copy import deepcopy
|
|
60
|
+
from typing import Callable, Dict, List, NamedTuple, TypedDict
|
|
61
|
+
import itertools
|
|
75
62
|
|
|
76
|
-
|
|
63
|
+
from pydna.threading_timer_decorator_exit import exit_after
|
|
77
64
|
|
|
78
65
|
|
|
79
|
-
class Assembly(object):
|
|
66
|
+
class Assembly(object):
|
|
80
67
|
"""Assembly of a list of linear DNA fragments into linear or circular
|
|
81
68
|
constructs. The Assembly is meant to replace the Assembly method as it
|
|
82
69
|
is easier to use. Accepts a list of Dseqrecords (source fragments) to
|
|
@@ -123,13 +110,13 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
123
110
|
|
|
124
111
|
def __init__(
|
|
125
112
|
self,
|
|
126
|
-
frags:
|
|
113
|
+
frags: List[Dseqrecord],
|
|
127
114
|
limit: int = 25,
|
|
128
|
-
algorithm:
|
|
115
|
+
algorithm: Callable[[str, str, int], List[Match]] = common_sub_strings,
|
|
129
116
|
) -> None:
|
|
130
117
|
# Fragments is a string subclass with some extra properties
|
|
131
118
|
# The order of the fragments has significance
|
|
132
|
-
fragments:
|
|
119
|
+
fragments: List[_FragmentDict] = [
|
|
133
120
|
{
|
|
134
121
|
"upper": str(f.seq).upper(),
|
|
135
122
|
"mixed": str(f.seq),
|
|
@@ -142,7 +129,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
142
129
|
|
|
143
130
|
# rcfragments is a dict with fragments as keys and the reverse
|
|
144
131
|
# complement as value
|
|
145
|
-
rcfragments:
|
|
132
|
+
rcfragments: Dict[str, _FragmentDict] = {
|
|
146
133
|
f["mixed"]: {
|
|
147
134
|
"upper": str(frc.seq).upper(),
|
|
148
135
|
"mixed": str(frc.seq),
|
|
@@ -163,7 +150,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
163
150
|
# all combinations of fragments are compared.
|
|
164
151
|
# see https://docs.python.org/3.10/library/itertools.html
|
|
165
152
|
# itertools.combinations('ABCD', 2)--> AB AC AD BC BD CD
|
|
166
|
-
for first, secnd in
|
|
153
|
+
for first, secnd in itertools.combinations(fragments, 2):
|
|
167
154
|
if first["upper"] == secnd["upper"]:
|
|
168
155
|
continue
|
|
169
156
|
|
|
@@ -222,7 +209,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
222
209
|
# multidigraph.html
|
|
223
210
|
|
|
224
211
|
order = 0
|
|
225
|
-
G =
|
|
212
|
+
G = nx.MultiDiGraph()
|
|
226
213
|
# loop through all fragments their and reverse complements
|
|
227
214
|
|
|
228
215
|
for f in fragments:
|
|
@@ -231,7 +218,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
231
218
|
for f in rcfragments.values():
|
|
232
219
|
f["nodes"] = sorted(set(f["nodes"]))
|
|
233
220
|
|
|
234
|
-
for f in
|
|
221
|
+
for f in itertools.chain(fragments, rcfragments.values()):
|
|
235
222
|
# nodes are sorted in place in the order of their position
|
|
236
223
|
# duplicates are removed (same position and sequence)
|
|
237
224
|
# along the fragment since nodes are a tuple (position(int),
|
|
@@ -250,7 +237,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
250
237
|
start2,
|
|
251
238
|
length2,
|
|
252
239
|
node2,
|
|
253
|
-
) in
|
|
240
|
+
) in itertools.combinations(f["nodes"], 2):
|
|
254
241
|
feats = [
|
|
255
242
|
ft
|
|
256
243
|
for ft in f["features"]
|
|
@@ -270,7 +257,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
270
257
|
name=f["name"],
|
|
271
258
|
) # string
|
|
272
259
|
|
|
273
|
-
self.G =
|
|
260
|
+
self.G = nx.create_empty_copy(G)
|
|
274
261
|
self.G.add_edges_from(
|
|
275
262
|
sorted(
|
|
276
263
|
G.edges(data=True), key=lambda t: len(t[2].get("seq", 1)), reverse=True
|
|
@@ -282,9 +269,9 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
282
269
|
self.rcfragments = rcfragments
|
|
283
270
|
self.algorithm = algorithm
|
|
284
271
|
|
|
285
|
-
@exit_after(int(
|
|
272
|
+
@exit_after(int(os.getenv("pydna_assembly_limit", 10)))
|
|
286
273
|
def assemble_linear(self, start=None, end=None, max_nodes=None):
|
|
287
|
-
G =
|
|
274
|
+
G = nx.MultiDiGraph(self.G)
|
|
288
275
|
|
|
289
276
|
G.add_nodes_from(["begin", "begin_rc", "end", "end_rc"], length=0)
|
|
290
277
|
|
|
@@ -354,16 +341,12 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
354
341
|
max_nodes = max_nodes or len(self.fragments)
|
|
355
342
|
|
|
356
343
|
linearpaths = list(
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
_nx.DiGraph(G), "begin_rc", "end", cutoff=max_nodes
|
|
364
|
-
),
|
|
365
|
-
_nx.all_simple_paths(
|
|
366
|
-
_nx.DiGraph(G), "begin_rc", "end_rc", cutoff=max_nodes
|
|
344
|
+
itertools.chain(
|
|
345
|
+
nx.all_simple_paths(nx.DiGraph(G), "begin", "end", cutoff=max_nodes),
|
|
346
|
+
nx.all_simple_paths(nx.DiGraph(G), "begin", "end_rc", cutoff=max_nodes),
|
|
347
|
+
nx.all_simple_paths(nx.DiGraph(G), "begin_rc", "end", cutoff=max_nodes),
|
|
348
|
+
nx.all_simple_paths(
|
|
349
|
+
nx.DiGraph(G), "begin_rc", "end_rc", cutoff=max_nodes
|
|
367
350
|
),
|
|
368
351
|
)
|
|
369
352
|
)
|
|
@@ -379,7 +362,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
379
362
|
e.append((u, v, d))
|
|
380
363
|
edgelol.append(e)
|
|
381
364
|
|
|
382
|
-
for edges in
|
|
365
|
+
for edges in itertools.product(*edgelol):
|
|
383
366
|
# TODO explain
|
|
384
367
|
if [
|
|
385
368
|
True
|
|
@@ -392,14 +375,14 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
392
375
|
|
|
393
376
|
if key in lps:
|
|
394
377
|
continue # TODO: is this test needed?
|
|
395
|
-
sg =
|
|
378
|
+
sg = nx.DiGraph()
|
|
396
379
|
sg.add_edges_from(edges)
|
|
397
380
|
sg.add_nodes_from((n, d) for n, d in G.nodes(data=True) if n in lp)
|
|
398
381
|
|
|
399
382
|
edgefeatures = []
|
|
400
383
|
offset = 0
|
|
401
384
|
for u, v, e in edges:
|
|
402
|
-
feats =
|
|
385
|
+
feats = deepcopy(e["features"])
|
|
403
386
|
for f in feats:
|
|
404
387
|
f.location += offset - e["piece"].start
|
|
405
388
|
edgefeatures.extend(feats)
|
|
@@ -409,7 +392,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
409
392
|
|
|
410
393
|
return sorted(
|
|
411
394
|
(
|
|
412
|
-
|
|
395
|
+
Contig.from_string(
|
|
413
396
|
lp[0],
|
|
414
397
|
features=lp[1],
|
|
415
398
|
graph=lp[2],
|
|
@@ -423,11 +406,11 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
423
406
|
reverse=True,
|
|
424
407
|
)
|
|
425
408
|
|
|
426
|
-
@exit_after(int(
|
|
409
|
+
@exit_after(int(os.getenv("pydna_assembly_limit", 10)))
|
|
427
410
|
def assemble_circular(self, length_bound=None):
|
|
428
411
|
cps = {} # circular assembly
|
|
429
412
|
cpsrc = {}
|
|
430
|
-
cpaths = sorted(
|
|
413
|
+
cpaths = sorted(nx.simple_cycles(self.G, length_bound=length_bound), key=len)
|
|
431
414
|
cpaths_sorted = []
|
|
432
415
|
for cpath in cpaths:
|
|
433
416
|
order, node = min((self.G.nodes[node]["order"], node) for node in cpath)
|
|
@@ -449,7 +432,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
449
432
|
e.append((u, v, d))
|
|
450
433
|
edgelol.append(e)
|
|
451
434
|
|
|
452
|
-
for edges in
|
|
435
|
+
for edges in itertools.product(*edgelol):
|
|
453
436
|
if [
|
|
454
437
|
True
|
|
455
438
|
for ((u, v, e), (x, y, z)) in zip(edges, edges[1:])
|
|
@@ -461,7 +444,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
461
444
|
|
|
462
445
|
if key in cps or key in cpsrc:
|
|
463
446
|
continue # TODO: is test in cpsrc needed?
|
|
464
|
-
sg =
|
|
447
|
+
sg = nx.DiGraph()
|
|
465
448
|
sg.add_edges_from(edges)
|
|
466
449
|
sg.add_nodes_from((n, d) for n, d in self.G.nodes(data=True) if n in cp)
|
|
467
450
|
|
|
@@ -469,7 +452,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
469
452
|
offset = 0
|
|
470
453
|
|
|
471
454
|
for u, v, e in edges:
|
|
472
|
-
feats =
|
|
455
|
+
feats = deepcopy(e["features"])
|
|
473
456
|
for feat in feats:
|
|
474
457
|
feat.location += offset
|
|
475
458
|
edgefeatures.extend(feats)
|
|
@@ -478,18 +461,18 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
478
461
|
if f.location.start > len(ct) and f.location.end > len(ct):
|
|
479
462
|
f.location += -len(ct)
|
|
480
463
|
elif f.location.end > len(ct):
|
|
481
|
-
f.location =
|
|
464
|
+
f.location = CompoundLocation(
|
|
482
465
|
(
|
|
483
|
-
|
|
484
|
-
f.location.start,
|
|
466
|
+
SimpleLocation(
|
|
467
|
+
f.location.start, ExactPosition(len(ct))
|
|
485
468
|
),
|
|
486
|
-
|
|
487
|
-
|
|
469
|
+
SimpleLocation(
|
|
470
|
+
ExactPosition(0), f.location.end - len(ct)
|
|
488
471
|
),
|
|
489
472
|
)
|
|
490
473
|
)
|
|
491
474
|
|
|
492
|
-
cps[key] = cpsrc[
|
|
475
|
+
cps[key] = cpsrc[rc(key)] = (
|
|
493
476
|
ct,
|
|
494
477
|
edgefeatures,
|
|
495
478
|
sg,
|
|
@@ -499,7 +482,7 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
499
482
|
|
|
500
483
|
return sorted(
|
|
501
484
|
(
|
|
502
|
-
|
|
485
|
+
Contig.from_string(
|
|
503
486
|
cp[0],
|
|
504
487
|
features=cp[1],
|
|
505
488
|
graph=cp[2],
|
|
@@ -513,9 +496,9 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
513
496
|
reverse=True,
|
|
514
497
|
)
|
|
515
498
|
|
|
516
|
-
def __repr__(self) ->
|
|
499
|
+
def __repr__(self) -> ps:
|
|
517
500
|
# https://pyformat.info
|
|
518
|
-
return
|
|
501
|
+
return ps(
|
|
519
502
|
"Assembly\n"
|
|
520
503
|
"fragments..: {sequences}\n"
|
|
521
504
|
"limit(bp)..: {limit}\n"
|
|
@@ -532,34 +515,34 @@ class Assembly(object): # , metaclass=_Memoize):
|
|
|
532
515
|
|
|
533
516
|
|
|
534
517
|
example_fragments = (
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
518
|
+
Dseqrecord("AacgatCAtgctcc", name="a"),
|
|
519
|
+
Dseqrecord("TtgctccTAAattctgc", name="b"),
|
|
520
|
+
Dseqrecord("CattctgcGAGGacgatG", name="c"),
|
|
538
521
|
)
|
|
539
522
|
|
|
540
523
|
|
|
541
524
|
linear_results = (
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
525
|
+
Dseqrecord("AacgatCAtgctccTAAattctgcGAGGacgatG", name="abc"),
|
|
526
|
+
Dseqrecord("ggagcaTGatcgtCCTCgcagaatG", name="ac_rc"),
|
|
527
|
+
Dseqrecord("AacgatG", name="ac"),
|
|
545
528
|
)
|
|
546
529
|
|
|
547
530
|
|
|
548
531
|
circular_results = (
|
|
549
|
-
|
|
550
|
-
|
|
532
|
+
Dseqrecord("acgatCAtgctccTAAattctgcGAGG", name="abc", circular=True),
|
|
533
|
+
Dseqrecord("ggagcaTGatcgtCCTCgcagaatTTA", name="abc_rc", circular=True),
|
|
551
534
|
)
|
|
552
535
|
|
|
553
536
|
|
|
554
|
-
class _NodeTuple(
|
|
537
|
+
class _NodeTuple(NamedTuple):
|
|
555
538
|
start: int
|
|
556
539
|
length: int
|
|
557
540
|
shared_seq: str # uppercase
|
|
558
541
|
|
|
559
542
|
|
|
560
|
-
class _FragmentDict(
|
|
543
|
+
class _FragmentDict(TypedDict):
|
|
561
544
|
upper: str
|
|
562
545
|
mixed: str
|
|
563
546
|
name: str
|
|
564
|
-
features:
|
|
565
|
-
nodes:
|
|
547
|
+
features: List[SeqFeature]
|
|
548
|
+
nodes: List[_NodeTuple]
|