pydna 5.5.1__py3-none-any.whl → 5.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +116 -134
- pydna/_pretty.py +2 -14
- pydna/all.py +10 -20
- pydna/amplicon.py +25 -20
- pydna/amplify.py +46 -26
- pydna/assembly.py +50 -27
- pydna/assembly2.py +1902 -0
- pydna/common_sub_strings.py +2 -12
- pydna/contig.py +39 -22
- pydna/crispr.py +8 -13
- pydna/design.py +89 -59
- pydna/download.py +10 -18
- pydna/dseq.py +119 -59
- pydna/dseqrecord.py +88 -45
- pydna/fakeseq.py +0 -11
- pydna/fusionpcr.py +3 -1
- pydna/gateway.py +2 -2
- pydna/gel.py +8 -13
- pydna/genbank.py +33 -32
- pydna/genbankfile.py +8 -13
- pydna/genbankfixer.py +41 -28
- pydna/genbankrecord.py +11 -14
- pydna/goldengate.py +2 -2
- pydna/ladders.py +4 -11
- pydna/ligate.py +8 -14
- pydna/parsers.py +5 -12
- pydna/primer.py +3 -12
- pydna/readers.py +0 -11
- pydna/seq.py +21 -18
- pydna/seqrecord.py +19 -19
- pydna/sequence_picker.py +3 -12
- pydna/tm.py +13 -15
- pydna/types.py +41 -0
- pydna/utils.py +173 -58
- {pydna-5.5.1.dist-info → pydna-5.5.2.dist-info}/METADATA +17 -3
- pydna-5.5.2.dist-info/RECORD +43 -0
- pydna/editor.py +0 -119
- pydna/myenzymes.py +0 -51
- pydna/myprimers.py +0 -219
- pydna-5.5.1.dist-info/RECORD +0 -44
- {pydna-5.5.1.dist-info → pydna-5.5.2.dist-info}/LICENSE.txt +0 -0
- {pydna-5.5.1.dist-info → pydna-5.5.2.dist-info}/WHEEL +0 -0
pydna/genbankfixer.py
CHANGED
|
@@ -33,7 +33,9 @@ GoodLocus = (
|
|
|
33
33
|
+ _pp.Word(_pp.nums).setResultsName("size")
|
|
34
34
|
+ _pp.Suppress(_pp.CaselessLiteral("bp"))
|
|
35
35
|
+ _pp.Word(_pp.alphas + "-").setResultsName("seqtype")
|
|
36
|
-
+ (_pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular")).setResultsName(
|
|
36
|
+
+ (_pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular")).setResultsName(
|
|
37
|
+
"topology"
|
|
38
|
+
)
|
|
37
39
|
+ _pp.Optional(_pp.Word(_pp.alphas), default=" ").setResultsName("divcode")
|
|
38
40
|
+ _pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date")
|
|
39
41
|
)
|
|
@@ -44,7 +46,9 @@ BrokenLocus1 = (
|
|
|
44
46
|
+ _pp.Word(_pp.nums).setResultsName("size")
|
|
45
47
|
+ _pp.Suppress(_pp.CaselessLiteral("bp"))
|
|
46
48
|
+ _pp.Word(_pp.alphas + "-").setResultsName("seqtype")
|
|
47
|
-
+ (_pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular")).setResultsName(
|
|
49
|
+
+ (_pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular")).setResultsName(
|
|
50
|
+
"topology"
|
|
51
|
+
)
|
|
48
52
|
+ _pp.Optional(_pp.Word(_pp.alphas), default=" ").setResultsName("divcode")
|
|
49
53
|
+ _pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date")
|
|
50
54
|
)
|
|
@@ -97,7 +101,8 @@ CapWord = _pp.Word("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
|
|
97
101
|
SpacedLine = _pp.White(min=1) + _pp.CharsNotIn("\n") + _pp.LineEnd()
|
|
98
102
|
# HeaderLine = CapWord + CharsNotIn("\n") + LineEnd()
|
|
99
103
|
GenericEntry = _pp.Group(
|
|
100
|
-
CapWord
|
|
104
|
+
CapWord
|
|
105
|
+
+ _pp.Combine(_pp.CharsNotIn("\n") + _pp.LineEnd() + _pp.ZeroOrMore(SpacedLine))
|
|
101
106
|
).setResultsName("generics", listAllMatches=True)
|
|
102
107
|
|
|
103
108
|
|
|
@@ -135,7 +140,9 @@ RPAREN = _pp.Suppress(")")
|
|
|
135
140
|
SEP = _pp.Suppress(_pp.Literal(".."))
|
|
136
141
|
|
|
137
142
|
# recognize numbers w. < & > uncertainty specs, then strip the <> chars to make it fixed
|
|
138
|
-
gbIndex = _pp.Word(_pp.nums + "<>").setParseAction(
|
|
143
|
+
gbIndex = _pp.Word(_pp.nums + "<>").setParseAction(
|
|
144
|
+
lambda s, l_, t: int(t[0].replace("<", "").replace(">", ""))
|
|
145
|
+
)
|
|
139
146
|
SimpleSlice = _pp.Group(gbIndex + SEP + gbIndex) | _pp.Group(gbIndex).setParseAction(
|
|
140
147
|
lambda s, l_, t: [[t[0][0], t[0][0]]]
|
|
141
148
|
)
|
|
@@ -194,12 +201,19 @@ QuoteFeaturekeyval = _pp.Group(
|
|
|
194
201
|
|
|
195
202
|
# UnQuoted KeyVal: /key=value (I'm assuming it doesn't do multilines this way? wrong! ApE does store long labels this way! sigh.)
|
|
196
203
|
# NoQuoteFeaturekeyval = Group(Suppress('/') + Word(alphas+nums+"_-") + Suppress('=') + OneOrMore(CharsNotIn("\n")) )
|
|
197
|
-
keyvalspacedline =
|
|
204
|
+
keyvalspacedline = (
|
|
205
|
+
_pp.White(exact=21)
|
|
206
|
+
+ _pp.CharsNotIn("/")
|
|
207
|
+
+ _pp.OneOrMore(_pp.CharsNotIn("\n"))
|
|
208
|
+
+ _pp.LineEnd()
|
|
209
|
+
)
|
|
198
210
|
NoQuoteFeaturekeyval = _pp.Group(
|
|
199
211
|
_pp.Suppress("/")
|
|
200
212
|
+ _pp.Word(_pp.alphas + _pp.nums + "_-")
|
|
201
213
|
+ _pp.Suppress("=")
|
|
202
|
-
+ _pp.Combine(
|
|
214
|
+
+ _pp.Combine(
|
|
215
|
+
_pp.CharsNotIn("\n") + _pp.LineEnd() + _pp.ZeroOrMore(keyvalspacedline)
|
|
216
|
+
)
|
|
203
217
|
)
|
|
204
218
|
|
|
205
219
|
# Special Case for Numerical Vals: /bases=12 OR /bases="12"
|
|
@@ -213,14 +227,18 @@ NumFeaturekeyval = _pp.Group(
|
|
|
213
227
|
|
|
214
228
|
# Key Only KeyVal: /pseudo
|
|
215
229
|
# post-parse convert it into a pair to resemble the structure of the first three cases i.e. [pseudo, True]
|
|
216
|
-
FlagFeaturekeyval = _pp.Group(
|
|
217
|
-
|
|
218
|
-
)
|
|
230
|
+
FlagFeaturekeyval = _pp.Group(
|
|
231
|
+
_pp.Suppress("/") + _pp.Word(_pp.alphas + _pp.nums + "_-")
|
|
232
|
+
).setParseAction(lambda s, l_, t: [[t[0][0], True]])
|
|
219
233
|
|
|
220
234
|
Feature = _pp.Group(
|
|
221
|
-
_pp.Word(_pp.alphas + _pp.nums + "_-").setParseAction(
|
|
235
|
+
_pp.Word(_pp.alphas + _pp.nums + "_-").setParseAction(
|
|
236
|
+
lambda s, l_, t: [["type", t[0]]]
|
|
237
|
+
)
|
|
222
238
|
+ featLocation.setResultsName("location")
|
|
223
|
-
+ _pp.OneOrMore(
|
|
239
|
+
+ _pp.OneOrMore(
|
|
240
|
+
NumFeaturekeyval | QuoteFeaturekeyval | NoQuoteFeaturekeyval | FlagFeaturekeyval
|
|
241
|
+
)
|
|
224
242
|
)
|
|
225
243
|
|
|
226
244
|
FeaturesEntry = (
|
|
@@ -234,7 +252,9 @@ FeaturesEntry = (
|
|
|
234
252
|
|
|
235
253
|
# sequence is just a column-spaced big table of dna nucleotides
|
|
236
254
|
# should it recognize full IUPAC alphabet? NCBI uses n for unknown region
|
|
237
|
-
Sequence = _pp.OneOrMore(
|
|
255
|
+
Sequence = _pp.OneOrMore(
|
|
256
|
+
_pp.Suppress(_pp.Word(_pp.nums)) + _pp.OneOrMore(_pp.Word("ACGTacgtNn"))
|
|
257
|
+
)
|
|
238
258
|
|
|
239
259
|
# Group( ) hides the setResultsName names def'd inside, such that one needs to first access this group and then access the dict of contents inside
|
|
240
260
|
SequenceEntry = _pp.Suppress(_pp.Literal("ORIGIN")) + Sequence.setParseAction(
|
|
@@ -352,7 +372,9 @@ def wrapstring(str_, rowstart, rowend, padfirst=True):
|
|
|
352
372
|
if linenum == 0 and not padfirst:
|
|
353
373
|
wrappedstr += str_[linenum * rowlen : (linenum + 1) * rowlen] + "\n"
|
|
354
374
|
else:
|
|
355
|
-
wrappedstr +=
|
|
375
|
+
wrappedstr += (
|
|
376
|
+
" " * leftpad + str_[linenum * rowlen : (linenum + 1) * rowlen] + "\n"
|
|
377
|
+
)
|
|
356
378
|
# if str_.startswith("/translation="):
|
|
357
379
|
# print(str_)
|
|
358
380
|
# print(wrappedstr)
|
|
@@ -480,7 +502,9 @@ def toGB(jseq):
|
|
|
480
502
|
fstr += wrapstring("/" + str(k) + "=" + str(feat[k]), 21, 80)
|
|
481
503
|
# standard: wrap val in quotes
|
|
482
504
|
else:
|
|
483
|
-
fstr += wrapstring(
|
|
505
|
+
fstr += wrapstring(
|
|
506
|
+
"/" + str(k) + "=" + '"' + str(feat[k]) + '"', 21, 80
|
|
507
|
+
)
|
|
484
508
|
featuresstr += fstr
|
|
485
509
|
|
|
486
510
|
# the spaced, numbered sequence
|
|
@@ -511,11 +535,11 @@ def gbtext_clean(gbtext):
|
|
|
511
535
|
... //'''
|
|
512
536
|
>>> from pydna.readers import read
|
|
513
537
|
>>> read(s) # doctest: +SKIP
|
|
514
|
-
/
|
|
538
|
+
... /site-packages/Bio/GenBank/Scanner.py:1388: BiopythonParserWarning: Malformed LOCUS line found - is this correct?
|
|
515
539
|
:'LOCUS New_DNA 3 bp DNA CIRCULAR SYN 19-JUN-2013\\n'
|
|
516
540
|
"correct?\\n:%r" % line, BiopythonParserWarning)
|
|
517
541
|
Traceback (most recent call last):
|
|
518
|
-
File "/
|
|
542
|
+
File "... /pydna/readers.py", line 48, in read
|
|
519
543
|
results = results.pop()
|
|
520
544
|
IndexError: pop from empty list
|
|
521
545
|
<BLANKLINE>
|
|
@@ -523,7 +547,7 @@ def gbtext_clean(gbtext):
|
|
|
523
547
|
<BLANKLINE>
|
|
524
548
|
Traceback (most recent call last):
|
|
525
549
|
File "<stdin>", line 1, in <module>
|
|
526
|
-
File "/
|
|
550
|
+
File "... /pydna/readers.py", line 50, in read
|
|
527
551
|
raise ValueError("No sequences found in data:\\n({})".format(data[:79]))
|
|
528
552
|
ValueError: No sequences found in data:
|
|
529
553
|
(LOCUS New_DNA 3 bp DNA CIRCULAR SYN 19-JUN-2013
|
|
@@ -570,14 +594,3 @@ def gbtext_clean(gbtext):
|
|
|
570
594
|
Result = _namedtuple("Result", "gbtext jseq")
|
|
571
595
|
result = Result(_pretty_str(toGB(jseq).strip()), jseq)
|
|
572
596
|
return result
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
if __name__ == "__main__":
|
|
576
|
-
import os as _os
|
|
577
|
-
|
|
578
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
579
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
580
|
-
import doctest
|
|
581
|
-
|
|
582
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
583
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/genbankrecord.py
CHANGED
|
@@ -11,7 +11,9 @@ import os as _os
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class GenbankRecord(_Dseqrecord):
|
|
14
|
-
def __init__(
|
|
14
|
+
def __init__(
|
|
15
|
+
self, record, *args, item="accession", start=None, stop=None, strand=1, **kwargs
|
|
16
|
+
):
|
|
15
17
|
super().__init__(record, *args, **kwargs)
|
|
16
18
|
self.item = item
|
|
17
19
|
self.start = start
|
|
@@ -64,7 +66,9 @@ class GenbankRecord(_Dseqrecord):
|
|
|
64
66
|
return obj
|
|
65
67
|
|
|
66
68
|
@classmethod
|
|
67
|
-
def from_SeqRecord(
|
|
69
|
+
def from_SeqRecord(
|
|
70
|
+
cls, record, *args, item="accession", start=None, stop=None, strand=1, **kwargs
|
|
71
|
+
):
|
|
68
72
|
obj = super().from_SeqRecord(record, *args, **kwargs)
|
|
69
73
|
obj.item = item
|
|
70
74
|
obj.start = start
|
|
@@ -95,7 +99,9 @@ class GenbankRecord(_Dseqrecord):
|
|
|
95
99
|
|
|
96
100
|
def __repr__(self):
|
|
97
101
|
"""returns a short string representation of the object"""
|
|
98
|
-
return "Gbnk({}{} {})".format(
|
|
102
|
+
return "Gbnk({}{} {})".format(
|
|
103
|
+
{True: "-", False: "o"}[not self.circular], len(self), self._repr
|
|
104
|
+
)
|
|
99
105
|
|
|
100
106
|
def _repr_pretty_(self, p, cycle):
|
|
101
107
|
"""returns a short string representation of the object"""
|
|
@@ -121,7 +127,7 @@ class GenbankRecord(_Dseqrecord):
|
|
|
121
127
|
|
|
122
128
|
code = (
|
|
123
129
|
"from pydna.genbank import Genbank\n"
|
|
124
|
-
f"gb = Genbank('{_os.
|
|
130
|
+
f"gb = Genbank('{_os.getenv('pydna_email')}')\n"
|
|
125
131
|
f"seq = gb.nucleotide('{self.item}'"
|
|
126
132
|
)
|
|
127
133
|
if self.start and self.start:
|
|
@@ -141,7 +147,7 @@ class GenbankRecord(_Dseqrecord):
|
|
|
141
147
|
|
|
142
148
|
code = (
|
|
143
149
|
"from Bio import Entrez, SeqIO\n"
|
|
144
|
-
f"Entrez.email = '{_os.
|
|
150
|
+
f"Entrez.email = '{_os.getenv('pydna_email')}'\n"
|
|
145
151
|
"handle = Entrez.efetch(db='nuccore',\n"
|
|
146
152
|
f" id='{self.item}',\n"
|
|
147
153
|
" rettype='gbwithparts',\n"
|
|
@@ -160,12 +166,3 @@ class GenbankRecord(_Dseqrecord):
|
|
|
160
166
|
code += "record = SeqIO.read(handle, 'genbank')"
|
|
161
167
|
|
|
162
168
|
return _ps(code)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
if __name__ == "__main__":
|
|
166
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
167
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
168
|
-
import doctest
|
|
169
|
-
|
|
170
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
171
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/goldengate.py
CHANGED
|
@@ -27,9 +27,9 @@ from Bio.Restriction import BsaI, BsmBI, BbsI, FokI
|
|
|
27
27
|
from pydna.dseqrecord import Dseqrecord as _Dseqrecord
|
|
28
28
|
|
|
29
29
|
# from copy import deepcopy as _deepcopy
|
|
30
|
-
import logging as _logging
|
|
30
|
+
# import logging as _logging
|
|
31
31
|
|
|
32
|
-
_module_logger = _logging.getLogger("pydna." + __name__)
|
|
32
|
+
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
33
33
|
|
|
34
34
|
BsaI, BsmBI, BbsI, FokI
|
|
35
35
|
|
pydna/ladders.py
CHANGED
|
@@ -19,7 +19,10 @@ a gel image. Exampel can be found in scripts/molecular_weight_standards.ods.
|
|
|
19
19
|
from pydna.fakeseq import FakeSeq as _FakeSeq
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
PennStateLadder = [
|
|
22
|
+
PennStateLadder = [
|
|
23
|
+
_FakeSeq(int(n))
|
|
24
|
+
for n in (10000, 7750, 5000, 4000, 3000, 2000, 1500, 1000, 750, 500)
|
|
25
|
+
]
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
GeneRuler_1kb = [
|
|
@@ -131,13 +134,3 @@ FakeGel = [
|
|
|
131
134
|
],
|
|
132
135
|
PennStateLadder,
|
|
133
136
|
]
|
|
134
|
-
|
|
135
|
-
if __name__ == "__main__":
|
|
136
|
-
import os as _os
|
|
137
|
-
|
|
138
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
139
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
140
|
-
import doctest
|
|
141
|
-
|
|
142
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
143
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/ligate.py
CHANGED
|
@@ -9,9 +9,10 @@ from operator import add
|
|
|
9
9
|
from functools import reduce
|
|
10
10
|
import networkx as _nx
|
|
11
11
|
from itertools import permutations
|
|
12
|
-
import logging as _logging
|
|
13
12
|
|
|
14
|
-
|
|
13
|
+
# import logging as _logging
|
|
14
|
+
|
|
15
|
+
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
def ligate(fragments: list):
|
|
@@ -51,18 +52,11 @@ def ligate(fragments: list):
|
|
|
51
52
|
|
|
52
53
|
cpaths = [p for p in sorted(_nx.simple_cycles(G), key=len) if len(p) > 1]
|
|
53
54
|
csequences = [reduce(add, x).looped() for x in cpaths]
|
|
54
|
-
lpaths = [
|
|
55
|
+
lpaths = [
|
|
56
|
+
p
|
|
57
|
+
for p in sorted(_nx.all_simple_paths(G, "begin", "end"), key=len)
|
|
58
|
+
if len(p) > 3
|
|
59
|
+
]
|
|
55
60
|
lsequences = [reduce(add, lp[1:-1]) for lp in lpaths]
|
|
56
61
|
|
|
57
62
|
return csequences, lsequences
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
if __name__ == "__main__":
|
|
61
|
-
import os as _os
|
|
62
|
-
|
|
63
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
64
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
65
|
-
import doctest
|
|
66
|
-
|
|
67
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
68
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/parsers.py
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
"""Provides two functions, parse and parse_primers"""
|
|
9
9
|
|
|
10
|
-
import os as _os
|
|
10
|
+
# import os as _os
|
|
11
11
|
import re as _re
|
|
12
12
|
import io as _io
|
|
13
13
|
import textwrap as _textwrap
|
|
@@ -40,7 +40,9 @@ except ImportError:
|
|
|
40
40
|
|
|
41
41
|
# gb_fasta_embl_regex = r"(?:>.+\n^(?:^[^>]+?)(?=\n\n|>|LOCUS|ID))|(?:(?:LOCUS|ID)(?:(?:.|\n)+?)^//)"
|
|
42
42
|
|
|
43
|
-
gb_fasta_embl_regex =
|
|
43
|
+
gb_fasta_embl_regex = (
|
|
44
|
+
r"(?:^>.+\n^(?:^[^>]+?)(?=\n\n|>|^LOCUS|^ID))|(?:(?:^LOCUS|^ID)(?:(?:.|\n)+?)^//)"
|
|
45
|
+
)
|
|
44
46
|
|
|
45
47
|
# The gb_fasta_embl_regex is meant to be able to extract sequences from
|
|
46
48
|
# text where sequences are mixed with other contents as well
|
|
@@ -95,7 +97,7 @@ def embl_gb_fasta(text):
|
|
|
95
97
|
except ValueError:
|
|
96
98
|
handle.seek(0)
|
|
97
99
|
try:
|
|
98
|
-
parsed = _SeqIO.read(handle, "fasta")
|
|
100
|
+
parsed = _SeqIO.read(handle, "fasta-blast")
|
|
99
101
|
except ValueError:
|
|
100
102
|
handle.close()
|
|
101
103
|
continue
|
|
@@ -206,12 +208,3 @@ def parse(data, ds=True):
|
|
|
206
208
|
def parse_primers(data):
|
|
207
209
|
"""docstring."""
|
|
208
210
|
return [_Primer(x) for x in parse(data, ds=False)]
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
if __name__ == "__main__":
|
|
212
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
213
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
214
|
-
import doctest
|
|
215
|
-
|
|
216
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
217
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/primer.py
CHANGED
|
@@ -14,7 +14,9 @@ from pydna.seqrecord import SeqRecord as _SeqRecord
|
|
|
14
14
|
class Primer(_SeqRecord):
|
|
15
15
|
"""Primer and its position on a template, footprint and tail."""
|
|
16
16
|
|
|
17
|
-
def __init__(
|
|
17
|
+
def __init__(
|
|
18
|
+
self, record, *args, amplicon=None, position=None, footprint=0, **kwargs
|
|
19
|
+
):
|
|
18
20
|
if hasattr(record, "features"): # Seqrecord
|
|
19
21
|
self.__dict__.update(record.__dict__)
|
|
20
22
|
self.__dict__.update(kwargs)
|
|
@@ -57,14 +59,3 @@ class Primer(_SeqRecord):
|
|
|
57
59
|
answer.position = None
|
|
58
60
|
answer._fp = len(self)
|
|
59
61
|
return answer
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if __name__ == "__main__":
|
|
63
|
-
import os as _os
|
|
64
|
-
|
|
65
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
66
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
67
|
-
import doctest
|
|
68
|
-
|
|
69
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
70
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/readers.py
CHANGED
|
@@ -54,14 +54,3 @@ def read_primer(data):
|
|
|
54
54
|
The usage is similar to the :func:`parse_primer` function."""
|
|
55
55
|
|
|
56
56
|
return _Primer(read(data, ds=False))
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
if __name__ == "__main__":
|
|
60
|
-
import os as _os
|
|
61
|
-
|
|
62
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
63
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
64
|
-
import doctest
|
|
65
|
-
|
|
66
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
67
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/seq.py
CHANGED
|
@@ -25,9 +25,10 @@ from Bio.Seq import Seq as _Seq
|
|
|
25
25
|
from pydna._pretty import PrettyTable as _PrettyTable
|
|
26
26
|
|
|
27
27
|
from typing import List as _List, Optional as _Optional, Tuple as _Tuple
|
|
28
|
-
import logging as _logging
|
|
29
28
|
|
|
30
|
-
|
|
29
|
+
# import logging as _logging
|
|
30
|
+
|
|
31
|
+
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
class Seq(_Seq):
|
|
@@ -43,7 +44,9 @@ class Seq(_Seq):
|
|
|
43
44
|
**kwargs,
|
|
44
45
|
) -> "ProteinSeq":
|
|
45
46
|
"""Translate.."""
|
|
46
|
-
p = super().translate(
|
|
47
|
+
p = super().translate(
|
|
48
|
+
*args, stop_symbol=stop_symbol, to_stop=to_stop, cds=cds, gap=gap, **kwargs
|
|
49
|
+
)
|
|
47
50
|
return ProteinSeq(p._data)
|
|
48
51
|
|
|
49
52
|
def gc(self) -> float:
|
|
@@ -78,10 +81,17 @@ class Seq(_Seq):
|
|
|
78
81
|
|
|
79
82
|
def express(self, organism: str = "sce") -> _PrettyTable:
|
|
80
83
|
"""docstring."""
|
|
81
|
-
x = _PrettyTable(
|
|
84
|
+
x = _PrettyTable(
|
|
85
|
+
["cds", "len", "cai", "gc", "sta", "stp", "n-end"]
|
|
86
|
+
+ _rare_codons[organism]
|
|
87
|
+
+ ["rare"]
|
|
88
|
+
)
|
|
82
89
|
val = []
|
|
83
90
|
|
|
84
|
-
val.append(
|
|
91
|
+
val.append(
|
|
92
|
+
f"{self._data.upper().decode('ASCII')[:3]}..."
|
|
93
|
+
f"{self._data.upper().decode('ASCII')[-3:]}"
|
|
94
|
+
)
|
|
85
95
|
val.append(len(self) / 3)
|
|
86
96
|
val.append(self.cai(organism))
|
|
87
97
|
val.append(self.gc())
|
|
@@ -103,7 +113,9 @@ class Seq(_Seq):
|
|
|
103
113
|
|
|
104
114
|
def orfs2(self, minsize: int = 30) -> _List[str]:
|
|
105
115
|
"""docstring."""
|
|
106
|
-
orf = _re.compile(
|
|
116
|
+
orf = _re.compile(
|
|
117
|
+
f"ATG(?:...){{{minsize},}}?(?:TAG|TAA|TGA)", flags=_re.IGNORECASE
|
|
118
|
+
)
|
|
107
119
|
start = 0
|
|
108
120
|
matches: _List[slice] = []
|
|
109
121
|
s = self._data.decode("ASCII")
|
|
@@ -203,7 +215,9 @@ class ProteinSeq(_Seq):
|
|
|
203
215
|
----------
|
|
204
216
|
.. [#] http://wiki.christophchamp.com/index.php/SEGUID
|
|
205
217
|
"""
|
|
206
|
-
return _lsseguid(
|
|
218
|
+
return _lsseguid(
|
|
219
|
+
self._data.decode("utf8").upper(), alphabet="{protein-extended}"
|
|
220
|
+
)
|
|
207
221
|
|
|
208
222
|
def __getitem__(self, key):
|
|
209
223
|
result = super().__getitem__(key)
|
|
@@ -232,14 +246,3 @@ class ProteinSeq(_Seq):
|
|
|
232
246
|
Guruprasad K., Reddy B.V.B., Pandit M.W. Protein Engineering 4:155-161(1990).
|
|
233
247
|
"""
|
|
234
248
|
return self._pa().instability_index()
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
if __name__ == "__main__":
|
|
238
|
-
import os as _os
|
|
239
|
-
|
|
240
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
241
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
242
|
-
import doctest
|
|
243
|
-
|
|
244
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
245
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/seqrecord.py
CHANGED
|
@@ -35,10 +35,10 @@ from copy import copy as _copy
|
|
|
35
35
|
from pydna import _PydnaWarning
|
|
36
36
|
from warnings import warn as _warn
|
|
37
37
|
|
|
38
|
-
import logging as _logging
|
|
38
|
+
# import logging as _logging
|
|
39
39
|
import datetime
|
|
40
40
|
|
|
41
|
-
_module_logger = _logging.getLogger("pydna." + __name__)
|
|
41
|
+
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
class SeqRecord(_SeqRecord):
|
|
@@ -87,7 +87,9 @@ class SeqRecord(_SeqRecord):
|
|
|
87
87
|
self.seq = _Seq(self.seq)
|
|
88
88
|
|
|
89
89
|
self.seq._data = b"".join(self.seq._data.split()) # remove whitespaces
|
|
90
|
-
self.annotations = {
|
|
90
|
+
self.annotations = {
|
|
91
|
+
_pretty_str(k): _pretty_str(v) for k, v in self.annotations.items()
|
|
92
|
+
}
|
|
91
93
|
|
|
92
94
|
@classmethod
|
|
93
95
|
def from_Bio_SeqRecord(clc, sr: _SeqRecord):
|
|
@@ -109,7 +111,9 @@ class SeqRecord(_SeqRecord):
|
|
|
109
111
|
if len(value) > 16:
|
|
110
112
|
shortvalue = value[:16]
|
|
111
113
|
_warn(
|
|
112
|
-
("locus property {} truncated" "to 16 chars {}").format(
|
|
114
|
+
("locus property {} truncated" "to 16 chars {}").format(
|
|
115
|
+
value, shortvalue
|
|
116
|
+
),
|
|
113
117
|
_PydnaWarning,
|
|
114
118
|
stacklevel=2,
|
|
115
119
|
)
|
|
@@ -239,7 +243,9 @@ class SeqRecord(_SeqRecord):
|
|
|
239
243
|
f.qualifiers["ApEinfo_fwdcolor"] = [cols[i % len(cols)]]
|
|
240
244
|
f.qualifiers["ApEinfo_revcolor"] = [cols[::-1][i % len(cols)]]
|
|
241
245
|
|
|
242
|
-
def add_feature(
|
|
246
|
+
def add_feature(
|
|
247
|
+
self, x=None, y=None, seq=None, type_="misc", strand=1, *args, **kwargs
|
|
248
|
+
):
|
|
243
249
|
"""Add a feature of type misc to the feature list of the sequence.
|
|
244
250
|
|
|
245
251
|
Parameters
|
|
@@ -327,7 +333,9 @@ class SeqRecord(_SeqRecord):
|
|
|
327
333
|
| 0 | L:ft2 | --> | 2 | 4 | 2 | misc | no |
|
|
328
334
|
+-----+---------------+-----+-----+-----+-----+------+------+
|
|
329
335
|
"""
|
|
330
|
-
x = _PrettyTable(
|
|
336
|
+
x = _PrettyTable(
|
|
337
|
+
["Ft#", "Label or Note", "Dir", "Sta", "End", "Len", "type", "orf?"]
|
|
338
|
+
)
|
|
331
339
|
x.align["Ft#"] = "r" # Left align
|
|
332
340
|
x.align["Label or Note"] = "l" # Left align
|
|
333
341
|
x.align["Len"] = "r"
|
|
@@ -357,7 +365,8 @@ class SeqRecord(_SeqRecord):
|
|
|
357
365
|
len(sf),
|
|
358
366
|
sf.type,
|
|
359
367
|
{True: "yes", False: "no"}[
|
|
360
|
-
self.extract_feature(i).isorf()
|
|
368
|
+
self.extract_feature(i).isorf()
|
|
369
|
+
or self.extract_feature(i).reverse_complement().isorf()
|
|
361
370
|
],
|
|
362
371
|
]
|
|
363
372
|
)
|
|
@@ -480,7 +489,9 @@ class SeqRecord(_SeqRecord):
|
|
|
480
489
|
f"Stamp change.\nNew: {chksum}\nOld: {oldstamp[0]}",
|
|
481
490
|
_PydnaWarning,
|
|
482
491
|
)
|
|
483
|
-
self.annotations["comment"] = (
|
|
492
|
+
self.annotations["comment"] = (
|
|
493
|
+
f"{oldcomment}\n" f"{tool} {chksum} {now()} {comment}"
|
|
494
|
+
).strip()
|
|
484
495
|
return _pretty_str(chksum)
|
|
485
496
|
|
|
486
497
|
def lcs(self, other, *args, limit=25, **kwargs):
|
|
@@ -729,14 +740,3 @@ class ProteinSeqRecord(SeqRecord):
|
|
|
729
740
|
def __format__(self, format):
|
|
730
741
|
"""docstring."""
|
|
731
742
|
return _pretty_str(_SeqRecord.__format__(self, format))
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
if __name__ == "__main__":
|
|
735
|
-
import os as _os
|
|
736
|
-
|
|
737
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
738
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
739
|
-
import doctest
|
|
740
|
-
|
|
741
|
-
doctest.testmod(verbose=True, optionflags=(doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE))
|
|
742
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/sequence_picker.py
CHANGED
|
@@ -7,11 +7,12 @@
|
|
|
7
7
|
|
|
8
8
|
from pydna.dseqrecord import Dseqrecord
|
|
9
9
|
import os as _os
|
|
10
|
-
|
|
10
|
+
|
|
11
|
+
# import logging as _logging
|
|
11
12
|
from Bio.Blast import NCBIWWW
|
|
12
13
|
from Bio.Blast import NCBIXML
|
|
13
14
|
|
|
14
|
-
_module_logger = _logging.getLogger("pydna." + __name__)
|
|
15
|
+
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
email = _os.getenv("pydna_email")
|
|
@@ -51,13 +52,3 @@ def genbank_accession(s: str) -> Dseqrecord:
|
|
|
51
52
|
description=(f"{best_alignment.accession} " f"REGION: {start}..{stop}"),
|
|
52
53
|
)
|
|
53
54
|
return result
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if __name__ == "__main__":
|
|
57
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
58
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
59
|
-
import doctest
|
|
60
|
-
|
|
61
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
62
|
-
_os.environ["pydna_cached_funcs"] = cached
|
|
63
|
-
pass
|
pydna/tm.py
CHANGED
|
@@ -213,7 +213,9 @@ def dbd_program(amplicon, tm=tm_dbd, ta=ta_dbd):
|
|
|
213
213
|
|
|
214
214
|
"""
|
|
215
215
|
PfuSso7d_extension_rate = 15 # seconds/kB PCR product
|
|
216
|
-
extension_time_PfuSso7d = max(
|
|
216
|
+
extension_time_PfuSso7d = max(
|
|
217
|
+
10, int(PfuSso7d_extension_rate * len(amplicon) / 1000)
|
|
218
|
+
) # seconds
|
|
217
219
|
|
|
218
220
|
# The program returned is eaither a two step or three step progrem
|
|
219
221
|
# This depends on the tm and length of the primers in the
|
|
@@ -324,7 +326,10 @@ def tmbresluc(primer: str, *args, primerc=500.0, saltc=50, **kwargs):
|
|
|
324
326
|
dH += _thermodynamic_data.dHBr[n1 - 97][n2 - 97]
|
|
325
327
|
dS += _thermodynamic_data.dSBr[n1 - 97][n2 - 97]
|
|
326
328
|
|
|
327
|
-
tm = (
|
|
329
|
+
tm = (
|
|
330
|
+
dH / (1.9872 * _math.log(pri / 1600) + dS)
|
|
331
|
+
+ (16.6 * _math.log(saltc)) / _math.log(10)
|
|
332
|
+
) - 273.15
|
|
328
333
|
|
|
329
334
|
return tm
|
|
330
335
|
|
|
@@ -365,25 +370,18 @@ def tm_neb(primer, conc=0.5, prodcode="q5-0"):
|
|
|
365
370
|
try:
|
|
366
371
|
res = requests.get(url, params=params, headers=headers)
|
|
367
372
|
except requests.exceptions.ConnectionError as e:
|
|
368
|
-
raise requests.exceptions.ConnectionError(
|
|
373
|
+
raise requests.exceptions.ConnectionError(
|
|
374
|
+
"Could not connect to NEB API."
|
|
375
|
+
) from e
|
|
369
376
|
if res.status_code != 200:
|
|
370
377
|
if "error" in res.json():
|
|
371
378
|
raise requests.exceptions.HTTPError(res.status_code, res.json()["error"])
|
|
372
379
|
else:
|
|
373
|
-
raise requests.exceptions.HTTPError(
|
|
380
|
+
raise requests.exceptions.HTTPError(
|
|
381
|
+
res.status_code, res.text
|
|
382
|
+
) # pragma: no cover
|
|
374
383
|
r = res.json()
|
|
375
384
|
if r["success"]:
|
|
376
385
|
return r["data"]["tm1"]
|
|
377
386
|
else:
|
|
378
387
|
raise requests.exceptions.HTTPError(r["error"])
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
if __name__ == "__main__":
|
|
382
|
-
import os as _os
|
|
383
|
-
|
|
384
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
385
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
386
|
-
import doctest
|
|
387
|
-
|
|
388
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
389
|
-
_os.environ["pydna_cached_funcs"] = cached
|