pydna 5.5.4__py3-none-any.whl → 5.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +30 -195
- pydna/_pretty.py +8 -8
- pydna/_thermodynamic_data.py +3 -3
- pydna/all.py +1 -12
- pydna/alphabet.py +995 -0
- pydna/amplicon.py +19 -24
- pydna/amplify.py +75 -95
- pydna/assembly.py +64 -81
- pydna/assembly2.py +375 -310
- pydna/codon.py +4 -4
- pydna/common_sub_strings.py +6 -8
- pydna/contig.py +203 -10
- pydna/design.py +176 -60
- pydna/dseq.py +1788 -718
- pydna/dseqrecord.py +197 -179
- pydna/gateway.py +6 -6
- pydna/gel.py +5 -5
- pydna/genbank.py +43 -46
- pydna/genbankfixer.py +89 -92
- pydna/ladders.py +11 -12
- pydna/oligonucleotide_hybridization.py +124 -0
- pydna/opencloning_models.py +187 -60
- pydna/parsers.py +45 -32
- pydna/primer.py +4 -4
- pydna/primer_screen.py +833 -0
- pydna/readers.py +14 -9
- pydna/seq.py +137 -47
- pydna/seqrecord.py +54 -62
- pydna/sequence_picker.py +2 -5
- pydna/sequence_regex.py +6 -6
- pydna/tm.py +17 -17
- pydna/types.py +19 -19
- pydna/utils.py +97 -75
- {pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/METADATA +8 -8
- pydna-5.5.6.dist-info/RECORD +42 -0
- {pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/WHEEL +1 -1
- pydna/conftest.py +0 -42
- pydna/download.py +0 -32
- pydna/genbankfile.py +0 -42
- pydna/genbankrecord.py +0 -168
- pydna/goldengate.py +0 -45
- pydna/ligate.py +0 -62
- pydna/user_cloning.py +0 -29
- pydna-5.5.4.dist-info/RECORD +0 -46
- {pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/licenses/LICENSE.txt +0 -0
pydna/amplicon.py
CHANGED
|
@@ -10,22 +10,17 @@
|
|
|
10
10
|
This class is not meant to be use directly but is
|
|
11
11
|
used by the :mod:`amplify` module"""
|
|
12
12
|
|
|
13
|
-
from pydna.tm import dbd_program
|
|
14
|
-
from pydna.tm import program
|
|
15
|
-
from pydna.primer import Primer
|
|
16
|
-
from pydna._pretty import pretty_str
|
|
17
|
-
from pydna.dseqrecord import Dseqrecord
|
|
18
|
-
from pydna.seqrecord import SeqRecord
|
|
19
|
-
import textwrap
|
|
20
|
-
import copy
|
|
13
|
+
from pydna.tm import dbd_program
|
|
14
|
+
from pydna.tm import program
|
|
15
|
+
from pydna.primer import Primer
|
|
16
|
+
from pydna._pretty import pretty_str
|
|
17
|
+
from pydna.dseqrecord import Dseqrecord
|
|
18
|
+
from pydna.seqrecord import SeqRecord
|
|
19
|
+
import textwrap
|
|
20
|
+
import copy
|
|
21
21
|
|
|
22
|
-
# import logging as _logging
|
|
23
22
|
|
|
24
|
-
|
|
25
|
-
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class Amplicon(_Dseqrecord):
|
|
23
|
+
class Amplicon(Dseqrecord):
|
|
29
24
|
"""The Amplicon class holds information about a PCR reaction involving two
|
|
30
25
|
primers and one template. This class is used by the Anneal class and is not
|
|
31
26
|
meant to be instantiated directly.
|
|
@@ -69,12 +64,12 @@ class Amplicon(_Dseqrecord):
|
|
|
69
64
|
return obj
|
|
70
65
|
|
|
71
66
|
def __getitem__(self, sl):
|
|
72
|
-
answer =
|
|
67
|
+
answer = copy.copy(self)
|
|
73
68
|
answer.seq = answer.seq.__getitem__(sl)
|
|
74
69
|
# answer.seq.alphabet = self.seq.alphabet
|
|
75
|
-
sr =
|
|
70
|
+
sr = SeqRecord("n" * len(self))
|
|
76
71
|
sr.features = self.features
|
|
77
|
-
answer.features =
|
|
72
|
+
answer.features = SeqRecord.__getitem__(sr, sl).features
|
|
78
73
|
return answer
|
|
79
74
|
|
|
80
75
|
def __repr__(self):
|
|
@@ -90,8 +85,8 @@ class Amplicon(_Dseqrecord):
|
|
|
90
85
|
def reverse_complement(self):
|
|
91
86
|
r = type(self)(super().reverse_complement())
|
|
92
87
|
r.template = self.template.rc()
|
|
93
|
-
r.forward_primer =
|
|
94
|
-
r.reverse_primer =
|
|
88
|
+
r.forward_primer = copy.copy(self.reverse_primer)
|
|
89
|
+
r.reverse_primer = copy.copy(self.forward_primer)
|
|
95
90
|
r.forward_primer.position, r.reverse_primer.position = (
|
|
96
91
|
r.reverse_primer.position,
|
|
97
92
|
r.forward_primer.position,
|
|
@@ -143,23 +138,23 @@ class Amplicon(_Dseqrecord):
|
|
|
143
138
|
{" " * ft}3{fzc}...{rzc}5
|
|
144
139
|
"""
|
|
145
140
|
# breakpoint()
|
|
146
|
-
return
|
|
141
|
+
return pretty_str(textwrap.dedent(f).strip("\n"))
|
|
147
142
|
|
|
148
143
|
def set_forward_primer_footprint(self, length):
|
|
149
|
-
self.forward_primer =
|
|
144
|
+
self.forward_primer = Primer(
|
|
150
145
|
self.forward_primer.tail + self.seq[:length], footprint=length
|
|
151
146
|
)
|
|
152
147
|
|
|
153
148
|
def set_reverse_primer_footprint(self, length):
|
|
154
|
-
self.reverse_primer =
|
|
149
|
+
self.reverse_primer = Primer(
|
|
155
150
|
self.reverse_primer.tail + self.seq[:length], footprint=length
|
|
156
151
|
)
|
|
157
152
|
|
|
158
153
|
def program(self):
|
|
159
|
-
return
|
|
154
|
+
return program(self)
|
|
160
155
|
|
|
161
156
|
def dbd_program(self):
|
|
162
|
-
return
|
|
157
|
+
return dbd_program(self)
|
|
163
158
|
|
|
164
159
|
def primers(self):
|
|
165
160
|
return self.forward_primer, self.reverse_primer
|
pydna/amplify.py
CHANGED
|
@@ -13,48 +13,22 @@ PCR product. The Anneal class should be used if more flexibility is required.
|
|
|
13
13
|
Primers with 5' tails as well as inverse PCR on circular templates are handled
|
|
14
14
|
correctly."""
|
|
15
15
|
|
|
16
|
-
from pydna._pretty import pretty_str
|
|
17
|
-
from pydna.utils import flatten
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
from pydna.
|
|
21
|
-
from pydna.
|
|
22
|
-
from pydna.
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
from Bio.SeqFeature import
|
|
26
|
-
from
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
import
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
import operator as _operator
|
|
33
|
-
|
|
34
|
-
# import os as _os
|
|
35
|
-
|
|
36
|
-
# import logging as _logging
|
|
37
|
-
|
|
38
|
-
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
39
|
-
|
|
40
|
-
_table = { # IUPAC Ambiguity Codes for Nucleotide Degeneracy and U for Uracile
|
|
41
|
-
"A": "A",
|
|
42
|
-
"C": "C",
|
|
43
|
-
"G": "G",
|
|
44
|
-
"T": "T",
|
|
45
|
-
"U": "A", # XXX
|
|
46
|
-
"R": "(A|G)",
|
|
47
|
-
"Y": "(C|T)",
|
|
48
|
-
"S": "(G|C)",
|
|
49
|
-
"W": "(A|T)",
|
|
50
|
-
"K": "(G|T)",
|
|
51
|
-
"M": "(A|C)",
|
|
52
|
-
"B": "(C|G|T)",
|
|
53
|
-
"D": "(A|G|T)",
|
|
54
|
-
"H": "(A|C|T)",
|
|
55
|
-
"V": "(A|C|G)",
|
|
56
|
-
"N": "(A|G|C|T)",
|
|
57
|
-
}
|
|
16
|
+
from pydna._pretty import pretty_str
|
|
17
|
+
from pydna.utils import flatten
|
|
18
|
+
from pydna.utils import shift_feature
|
|
19
|
+
from pydna.amplicon import Amplicon
|
|
20
|
+
from pydna.primer import Primer
|
|
21
|
+
from pydna.seqrecord import SeqRecord
|
|
22
|
+
from pydna.dseqrecord import Dseqrecord
|
|
23
|
+
from Bio.SeqFeature import SeqFeature
|
|
24
|
+
from Bio.SeqFeature import SimpleLocation
|
|
25
|
+
from Bio.SeqFeature import CompoundLocation
|
|
26
|
+
from pydna.seq import Seq
|
|
27
|
+
import re
|
|
28
|
+
import copy
|
|
29
|
+
import operator
|
|
30
|
+
from pydna.alphabet import iupac_compl_regex
|
|
31
|
+
from pydna.utils import anneal_from_left
|
|
58
32
|
|
|
59
33
|
|
|
60
34
|
def _annealing_positions(primer, template, limit):
|
|
@@ -70,13 +44,14 @@ def _annealing_positions(primer, template, limit):
|
|
|
70
44
|
|
|
71
45
|
<- - - - - - - - - - template - - - - - - - - - - - - - >
|
|
72
46
|
|
|
73
|
-
|
|
74
|
-
|
|
47
|
+
< ----- start = 26 ------>
|
|
48
|
+
5'- gctactacacacgtactgactgcctccaagatagagtcagtaaccacactcgatag...3'
|
|
75
49
|
||||||||||||||||||||||||||||||||||||||||||||||||
|
|
76
50
|
3'-gttctatctcagtcattggtgtATAGTG-5'
|
|
77
51
|
|
|
78
52
|
<-footprint length -->
|
|
79
53
|
|
|
54
|
+
|
|
80
55
|
Parameters
|
|
81
56
|
----------
|
|
82
57
|
primer : string
|
|
@@ -85,7 +60,7 @@ def _annealing_positions(primer, template, limit):
|
|
|
85
60
|
template : string
|
|
86
61
|
The template sequence 5'-3'
|
|
87
62
|
|
|
88
|
-
limit : int
|
|
63
|
+
limit : int
|
|
89
64
|
footprint needs to be at least of length limit.
|
|
90
65
|
|
|
91
66
|
Returns
|
|
@@ -94,32 +69,37 @@ def _annealing_positions(primer, template, limit):
|
|
|
94
69
|
[ (start1, footprint1), (start2, footprint2) ,..., ]
|
|
95
70
|
"""
|
|
96
71
|
|
|
72
|
+
# under_tail
|
|
73
|
+
# anchor AACCACACTCGAT
|
|
74
|
+
# CAAGATAGAGTCAGT
|
|
75
|
+
# |||||||||||||||
|
|
76
|
+
# gttctatctcagtca
|
|
77
|
+
# ttggtgtATAGTG revprimer
|
|
78
|
+
# tail
|
|
79
|
+
#
|
|
80
|
+
# | <- limit -> |
|
|
81
|
+
|
|
97
82
|
# return empty list if primer too short
|
|
98
83
|
if len(primer) < limit:
|
|
99
84
|
return []
|
|
100
85
|
|
|
101
|
-
|
|
86
|
+
revprimer = primer[::-1]
|
|
102
87
|
|
|
103
88
|
# head is minimum part of primer that must anneal
|
|
104
|
-
head =
|
|
89
|
+
head = revprimer[:limit].upper()
|
|
90
|
+
tail = revprimer[limit:].upper()
|
|
105
91
|
|
|
106
92
|
# Make regex pattern that reflects extended IUPAC DNA code
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
footprint = len(
|
|
118
|
-
list(_itertools.takewhile(lambda x: x[0] == x[1], zip(tail, tm)))
|
|
119
|
-
)
|
|
120
|
-
results.append((match_start, footprint + limit))
|
|
121
|
-
return results
|
|
122
|
-
return []
|
|
93
|
+
head_regex = "".join(iupac_compl_regex[key] for key in head)
|
|
94
|
+
primer_regex = f"(?:({head_regex})(.{{0,{len(primer) - limit}}}))"
|
|
95
|
+
|
|
96
|
+
results = []
|
|
97
|
+
for m in re.finditer(primer_regex, template.upper()):
|
|
98
|
+
anchor, under_tail = m.groups()
|
|
99
|
+
match_start = m.start()
|
|
100
|
+
match_extension = anneal_from_left(tail, under_tail[::-1])
|
|
101
|
+
results.append((match_start, limit + match_extension))
|
|
102
|
+
return results
|
|
123
103
|
|
|
124
104
|
|
|
125
105
|
# class _Memoize(type):
|
|
@@ -219,7 +199,7 @@ class Anneal(object): # ), metaclass=_Memoize):
|
|
|
219
199
|
|
|
220
200
|
"""
|
|
221
201
|
self.primers = primers
|
|
222
|
-
self.template =
|
|
202
|
+
self.template = copy.deepcopy(template)
|
|
223
203
|
|
|
224
204
|
self.limit = limit
|
|
225
205
|
self.kwargs = kwargs
|
|
@@ -242,7 +222,7 @@ class Anneal(object): # ), metaclass=_Memoize):
|
|
|
242
222
|
for p in self.primers:
|
|
243
223
|
self.forward_primers.extend(
|
|
244
224
|
(
|
|
245
|
-
|
|
225
|
+
Primer(
|
|
246
226
|
p,
|
|
247
227
|
# template = self.template,
|
|
248
228
|
position=tcl - pos - min(self.template.seq.ovhg, 0),
|
|
@@ -254,7 +234,7 @@ class Anneal(object): # ), metaclass=_Memoize):
|
|
|
254
234
|
)
|
|
255
235
|
self.reverse_primers.extend(
|
|
256
236
|
(
|
|
257
|
-
|
|
237
|
+
Primer(
|
|
258
238
|
p,
|
|
259
239
|
# template = self.template,
|
|
260
240
|
position=pos + max(0, self.template.seq.ovhg),
|
|
@@ -265,16 +245,16 @@ class Anneal(object): # ), metaclass=_Memoize):
|
|
|
265
245
|
)
|
|
266
246
|
)
|
|
267
247
|
|
|
268
|
-
self.forward_primers.sort(key=
|
|
269
|
-
self.reverse_primers.sort(key=
|
|
248
|
+
self.forward_primers.sort(key=operator.attrgetter("position"))
|
|
249
|
+
self.reverse_primers.sort(key=operator.attrgetter("position"), reverse=True)
|
|
270
250
|
|
|
271
251
|
for fp in self.forward_primers:
|
|
272
252
|
if fp.position - fp._fp >= 0:
|
|
273
253
|
start = fp.position - fp._fp
|
|
274
254
|
end = fp.position
|
|
275
255
|
self.template.features.append(
|
|
276
|
-
|
|
277
|
-
|
|
256
|
+
SeqFeature(
|
|
257
|
+
SimpleLocation(start, end, strand=1),
|
|
278
258
|
type="primer_bind",
|
|
279
259
|
qualifiers={
|
|
280
260
|
"label": [fp.name],
|
|
@@ -287,11 +267,11 @@ class Anneal(object): # ), metaclass=_Memoize):
|
|
|
287
267
|
else:
|
|
288
268
|
start = len(self.template) - fp._fp + fp.position
|
|
289
269
|
end = start + fp._fp - len(self.template)
|
|
290
|
-
sf =
|
|
291
|
-
|
|
270
|
+
sf = SeqFeature(
|
|
271
|
+
CompoundLocation(
|
|
292
272
|
[
|
|
293
|
-
|
|
294
|
-
|
|
273
|
+
SimpleLocation(start, len(self.template)),
|
|
274
|
+
SimpleLocation(0, end),
|
|
295
275
|
]
|
|
296
276
|
),
|
|
297
277
|
type="primer_bind",
|
|
@@ -309,8 +289,8 @@ class Anneal(object): # ), metaclass=_Memoize):
|
|
|
309
289
|
start = rp.position
|
|
310
290
|
end = rp.position + rp._fp
|
|
311
291
|
self.template.features.append(
|
|
312
|
-
|
|
313
|
-
|
|
292
|
+
SeqFeature(
|
|
293
|
+
SimpleLocation(start, end, strand=-1),
|
|
314
294
|
type="primer_bind",
|
|
315
295
|
qualifiers={
|
|
316
296
|
"label": [rp.name],
|
|
@@ -324,11 +304,11 @@ class Anneal(object): # ), metaclass=_Memoize):
|
|
|
324
304
|
start = rp.position
|
|
325
305
|
end = rp.position + rp._fp - len(self.template)
|
|
326
306
|
self.template.features.append(
|
|
327
|
-
|
|
328
|
-
|
|
307
|
+
SeqFeature(
|
|
308
|
+
CompoundLocation(
|
|
329
309
|
[
|
|
330
|
-
|
|
331
|
-
|
|
310
|
+
SimpleLocation(0, end, strand=-1),
|
|
311
|
+
SimpleLocation(start, len(self.template), strand=-1),
|
|
332
312
|
],
|
|
333
313
|
),
|
|
334
314
|
type="primer_bind",
|
|
@@ -368,15 +348,15 @@ class Anneal(object): # ), metaclass=_Memoize):
|
|
|
368
348
|
continue
|
|
369
349
|
# Shift features to the right if there was a tail
|
|
370
350
|
shift_amount = len(fp.tail)
|
|
371
|
-
feats = [
|
|
351
|
+
feats = [shift_feature(f, shift_amount, None) for f in feats]
|
|
372
352
|
|
|
373
353
|
if tpl.circular and fp.position == rp.position:
|
|
374
|
-
prd =
|
|
354
|
+
prd = Dseqrecord(fp) + Dseqrecord(rp).reverse_complement()
|
|
375
355
|
else:
|
|
376
356
|
prd = (
|
|
377
|
-
|
|
357
|
+
Dseqrecord(fp)
|
|
378
358
|
+ tpl[fp.position : rp.position]
|
|
379
|
-
+
|
|
359
|
+
+ Dseqrecord(rp).reverse_complement()
|
|
380
360
|
)
|
|
381
361
|
prd.features = feats
|
|
382
362
|
full_tmpl_features = [
|
|
@@ -393,16 +373,16 @@ class Anneal(object): # ), metaclass=_Memoize):
|
|
|
393
373
|
new_identifier = " ".join(ft.qualifiers["note"])
|
|
394
374
|
|
|
395
375
|
from pydna.utils import (
|
|
396
|
-
identifier_from_string
|
|
376
|
+
identifier_from_string,
|
|
397
377
|
) # TODO: clean this up
|
|
398
378
|
|
|
399
379
|
prd.name = (
|
|
400
|
-
|
|
380
|
+
identifier_from_string(new_identifier)[:16]
|
|
401
381
|
or self.kwargs.get("name")
|
|
402
382
|
or f"{len(prd)}bp_PCR_prod"[:16]
|
|
403
383
|
)
|
|
404
384
|
prd.id = (
|
|
405
|
-
|
|
385
|
+
identifier_from_string(new_identifier)[:16]
|
|
406
386
|
or self.kwargs.get("id")
|
|
407
387
|
or f"{len(prd)}bp"[:16]
|
|
408
388
|
)
|
|
@@ -410,7 +390,7 @@ class Anneal(object): # ), metaclass=_Memoize):
|
|
|
410
390
|
"description"
|
|
411
391
|
) or "pcr_product_{}_{}".format(fp.description, rp.description)
|
|
412
392
|
|
|
413
|
-
amplicon =
|
|
393
|
+
amplicon = Amplicon(
|
|
414
394
|
prd,
|
|
415
395
|
template=tpl,
|
|
416
396
|
forward_primer=fp,
|
|
@@ -456,12 +436,12 @@ class Anneal(object): # ), metaclass=_Memoize):
|
|
|
456
436
|
)
|
|
457
437
|
else:
|
|
458
438
|
mystring += "No reverse primers anneal...\n"
|
|
459
|
-
return
|
|
439
|
+
return pretty_str(mystring.strip())
|
|
460
440
|
|
|
461
441
|
report = __str__
|
|
462
442
|
|
|
463
443
|
|
|
464
|
-
def pcr(*args, **kwargs) ->
|
|
444
|
+
def pcr(*args, **kwargs) -> Amplicon:
|
|
465
445
|
"""pcr is a convenience function for the Anneal class to simplify its
|
|
466
446
|
usage, especially from the command line. If more than one or no PCR
|
|
467
447
|
product is formed, a ValueError is raised.
|
|
@@ -523,15 +503,15 @@ tatcgactgtatcatctgatagcac")
|
|
|
523
503
|
|
|
524
504
|
"""
|
|
525
505
|
|
|
526
|
-
output =
|
|
506
|
+
output = flatten(args) # flatten
|
|
527
507
|
new = []
|
|
528
508
|
for s in output:
|
|
529
509
|
if hasattr(s, "watson"):
|
|
530
|
-
s =
|
|
510
|
+
s = SeqRecord(Seq(s.watson))
|
|
531
511
|
elif hasattr(s, "transcribe"):
|
|
532
|
-
s =
|
|
512
|
+
s = SeqRecord(s)
|
|
533
513
|
elif isinstance(s, str):
|
|
534
|
-
s =
|
|
514
|
+
s = SeqRecord(Seq(s))
|
|
535
515
|
elif hasattr(s, "features"):
|
|
536
516
|
pass
|
|
537
517
|
else:
|
|
@@ -546,7 +526,7 @@ tatcgactgtatcatctgatagcac")
|
|
|
546
526
|
new = [new[0].forward_primer, new[0].reverse_primer, new[0].template]
|
|
547
527
|
|
|
548
528
|
if not hasattr(new[-1].seq, "watson"):
|
|
549
|
-
new[-1] =
|
|
529
|
+
new[-1] = Dseqrecord(s)
|
|
550
530
|
|
|
551
531
|
anneal_primers = Anneal(new[:-1], new[-1], **kwargs)
|
|
552
532
|
|