pydna 5.5.1__py3-none-any.whl → 5.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +116 -134
- pydna/_pretty.py +2 -14
- pydna/all.py +10 -20
- pydna/amplicon.py +25 -20
- pydna/amplify.py +46 -26
- pydna/assembly.py +50 -27
- pydna/assembly2.py +2627 -0
- pydna/common_sub_strings.py +2 -12
- pydna/contig.py +39 -22
- pydna/cre_lox.py +130 -0
- pydna/crispr.py +8 -13
- pydna/design.py +89 -59
- pydna/download.py +10 -18
- pydna/dseq.py +119 -59
- pydna/dseqrecord.py +88 -45
- pydna/fakeseq.py +0 -11
- pydna/fusionpcr.py +3 -1
- pydna/gateway.py +154 -152
- pydna/gel.py +8 -13
- pydna/genbank.py +33 -32
- pydna/genbankfile.py +8 -13
- pydna/genbankfixer.py +41 -28
- pydna/genbankrecord.py +11 -14
- pydna/goldengate.py +2 -2
- pydna/ladders.py +4 -11
- pydna/ligate.py +8 -14
- pydna/parsers.py +25 -9
- pydna/primer.py +3 -12
- pydna/readers.py +0 -11
- pydna/seq.py +21 -18
- pydna/seqrecord.py +20 -20
- pydna/sequence_picker.py +3 -12
- pydna/sequence_regex.py +44 -0
- pydna/tm.py +13 -15
- pydna/types.py +41 -0
- pydna/utils.py +173 -58
- {pydna-5.5.1.dist-info → pydna-5.5.3.dist-info}/METADATA +22 -18
- pydna-5.5.3.dist-info/RECORD +45 -0
- pydna/editor.py +0 -119
- pydna/myenzymes.py +0 -51
- pydna/myprimers.py +0 -219
- pydna-5.5.1.dist-info/RECORD +0 -44
- {pydna-5.5.1.dist-info → pydna-5.5.3.dist-info}/LICENSE.txt +0 -0
- {pydna-5.5.1.dist-info → pydna-5.5.3.dist-info}/WHEEL +0 -0
pydna/gel.py
CHANGED
|
@@ -27,7 +27,9 @@ def interpolator(mwstd):
|
|
|
27
27
|
return interpolator
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
def gel(
|
|
30
|
+
def gel(
|
|
31
|
+
samples=None, gel_length=600, margin=50, interpolator=interpolator(mwstd=_mwstd)
|
|
32
|
+
):
|
|
31
33
|
import numpy as np
|
|
32
34
|
from PIL import Image as Image
|
|
33
35
|
from PIL import ImageDraw as ImageDraw
|
|
@@ -65,7 +67,11 @@ def gel(samples=None, gel_length=600, margin=50, interpolator=interpolator(mwstd
|
|
|
65
67
|
y1 = peak_centre - i
|
|
66
68
|
y2 = peak_centre + i
|
|
67
69
|
intensity = (
|
|
68
|
-
height
|
|
70
|
+
height
|
|
71
|
+
* _math.exp(
|
|
72
|
+
-float(((y1 - peak_centre) ** 2)) / (2 * (band_spread**2))
|
|
73
|
+
)
|
|
74
|
+
* max_intensity
|
|
69
75
|
)
|
|
70
76
|
for y in range(int(y1), int(y2)):
|
|
71
77
|
try:
|
|
@@ -95,14 +101,3 @@ def gel(samples=None, gel_length=600, margin=50, interpolator=interpolator(mwstd
|
|
|
95
101
|
# from PIL import ImageOps
|
|
96
102
|
# im_invert = ImageOps.invert(im)
|
|
97
103
|
# im.rotate(90, expand=1)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if __name__ == "__main__":
|
|
101
|
-
import os as _os
|
|
102
|
-
|
|
103
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
104
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
105
|
-
import doctest
|
|
106
|
-
|
|
107
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
108
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/genbank.py
CHANGED
|
@@ -11,7 +11,7 @@ The function can be used if the environmental variable **pydna_email** has
|
|
|
11
11
|
been set to a valid email address. The easiest way to do this permanantly is to edit the
|
|
12
12
|
`pydna.ini` file. See the documentation of :func:`pydna.open_config_folder`"""
|
|
13
13
|
|
|
14
|
-
from pydna.utils import memorize as _memorize
|
|
14
|
+
# from pydna.utils import memorize as _memorize
|
|
15
15
|
from pydna.genbankrecord import GenbankRecord as _GenbankRecord
|
|
16
16
|
from pydna.readers import read as _read
|
|
17
17
|
|
|
@@ -19,9 +19,10 @@ from Bio import Entrez as _Entrez
|
|
|
19
19
|
from typing import Literal as _Literal, Optional as _Optional
|
|
20
20
|
import re as _re
|
|
21
21
|
import os as _os
|
|
22
|
-
import logging as _logging
|
|
23
22
|
|
|
24
|
-
|
|
23
|
+
# import logging as _logging
|
|
24
|
+
|
|
25
|
+
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
# TODO http://httpbin.org/ use for testing?
|
|
@@ -53,15 +54,19 @@ class Genbank:
|
|
|
53
54
|
*,
|
|
54
55
|
tool: str = "pydna",
|
|
55
56
|
) -> None:
|
|
56
|
-
if not _re.match(
|
|
57
|
+
if not _re.match(
|
|
58
|
+
r"[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}", users_email, _re.IGNORECASE
|
|
59
|
+
):
|
|
57
60
|
raise ValueError("email address {} is not valid.".format(users_email))
|
|
58
61
|
|
|
59
|
-
_module_logger.info("#### Genbank ititiation ####")
|
|
60
|
-
_module_logger.info("Genbank initiated with email: %s", users_email)
|
|
61
|
-
_module_logger.info("Genbank initiated with tool : %s", tool)
|
|
62
|
+
# _module_logger.info("#### Genbank ititiation ####")
|
|
63
|
+
# _module_logger.info("Genbank initiated with email: %s", users_email)
|
|
64
|
+
# _module_logger.info("Genbank initiated with tool : %s", tool)
|
|
62
65
|
|
|
63
66
|
if users_email == "someone@example.com":
|
|
64
|
-
raise ValueError(
|
|
67
|
+
raise ValueError(
|
|
68
|
+
"you have to set your email address in order to download from Genbank"
|
|
69
|
+
)
|
|
65
70
|
self.email = users_email
|
|
66
71
|
self.tool = tool
|
|
67
72
|
|
|
@@ -69,7 +74,7 @@ class Genbank:
|
|
|
69
74
|
"""This method returns a short representation containing the email used to initiate."""
|
|
70
75
|
return "GenbankConnection({})".format(self.email)
|
|
71
76
|
|
|
72
|
-
@_memorize("pydna.genbank.Genbank.nucleotide")
|
|
77
|
+
# @_memorize("pydna.genbank.Genbank.nucleotide")
|
|
73
78
|
def nucleotide(
|
|
74
79
|
self,
|
|
75
80
|
item: str,
|
|
@@ -127,7 +132,9 @@ class Genbank:
|
|
|
127
132
|
(1, _re.search(r"(REGION:\s(?P<start>\d+)\.\.(?P<stop>\d+))", item)),
|
|
128
133
|
(
|
|
129
134
|
2,
|
|
130
|
-
_re.search(
|
|
135
|
+
_re.search(
|
|
136
|
+
r"(REGION: complement\((?P<start>\d+)\.\.(?P<stop>\d+)\))", item
|
|
137
|
+
),
|
|
131
138
|
),
|
|
132
139
|
(1, _re.search(r"(:|\s)(?P<start>\d+)-(?P<stop>\d+)", item)),
|
|
133
140
|
(2, _re.search(r"(:|\s)c(?P<start>\d+)-(?P<stop>\d+)", item)),
|
|
@@ -143,21 +150,23 @@ class Genbank:
|
|
|
143
150
|
|
|
144
151
|
if strand not in [1, 2]:
|
|
145
152
|
try:
|
|
146
|
-
strand = {"c": 2, "crick": 2, "antisense": 2, "2": 2, "-": 2, "-1": 2}[
|
|
153
|
+
strand = {"c": 2, "crick": 2, "antisense": 2, "2": 2, "-": 2, "-1": 2}[
|
|
154
|
+
strand.lower()
|
|
155
|
+
]
|
|
147
156
|
except (KeyError, AttributeError):
|
|
148
157
|
strand = 1
|
|
149
158
|
|
|
150
|
-
_module_logger.info("#### Genbank download ####")
|
|
151
|
-
_module_logger.info("item %s", item)
|
|
152
|
-
_module_logger.info("start %s", seq_start)
|
|
153
|
-
_module_logger.info("stop %s", seq_stop)
|
|
159
|
+
# _module_logger.info("#### Genbank download ####")
|
|
160
|
+
# _module_logger.info("item %s", item)
|
|
161
|
+
# _module_logger.info("start %s", seq_start)
|
|
162
|
+
# _module_logger.info("stop %s", seq_stop)
|
|
154
163
|
|
|
155
|
-
_module_logger.info("strand %s", str(strand))
|
|
164
|
+
# _module_logger.info("strand %s", str(strand))
|
|
156
165
|
|
|
157
166
|
_Entrez.email = self.email
|
|
158
167
|
_Entrez.tool = self.tool
|
|
159
168
|
|
|
160
|
-
_module_logger.info("Entrez.email %s", self.email)
|
|
169
|
+
# _module_logger.info("Entrez.email %s", self.email)
|
|
161
170
|
text = _Entrez.efetch(
|
|
162
171
|
db="nuccore",
|
|
163
172
|
id=item,
|
|
@@ -168,9 +177,11 @@ class Genbank:
|
|
|
168
177
|
retmode="text",
|
|
169
178
|
).read()
|
|
170
179
|
|
|
171
|
-
_module_logger.info("text[:160] %s", text[:160])
|
|
180
|
+
# _module_logger.info("text[:160] %s", text[:160])
|
|
172
181
|
|
|
173
|
-
return _GenbankRecord(
|
|
182
|
+
return _GenbankRecord(
|
|
183
|
+
_read(text), item=item, start=seq_start, stop=seq_stop, strand=strand
|
|
184
|
+
)
|
|
174
185
|
|
|
175
186
|
|
|
176
187
|
def genbank(accession: str = "CS570233.1", *args, **kwargs) -> _GenbankRecord:
|
|
@@ -219,18 +230,8 @@ def genbank(accession: str = "CS570233.1", *args, **kwargs) -> _GenbankRecord:
|
|
|
219
230
|
|
|
220
231
|
"""
|
|
221
232
|
email = _os.getenv("pydna_email")
|
|
222
|
-
_module_logger.info("#### genbank function called ####")
|
|
223
|
-
_module_logger.info("email %s", email)
|
|
224
|
-
_module_logger.info("accession %s", email)
|
|
233
|
+
# _module_logger.info("#### genbank function called ####")
|
|
234
|
+
# _module_logger.info("email %s", email)
|
|
235
|
+
# _module_logger.info("accession %s", email)
|
|
225
236
|
gb = Genbank(email)
|
|
226
237
|
return gb.nucleotide(accession, *args, **kwargs)
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
if __name__ == "__main__":
|
|
230
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
231
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
232
|
-
import doctest
|
|
233
|
-
|
|
234
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
235
|
-
_os.environ["pydna_cached_funcs"] = cached
|
|
236
|
-
pass
|
pydna/genbankfile.py
CHANGED
|
@@ -20,11 +20,17 @@ class GenbankFile(_Dseqrecord):
|
|
|
20
20
|
|
|
21
21
|
def __repr__(self):
|
|
22
22
|
"""returns a short string representation of the object"""
|
|
23
|
-
return "File({})({}{})".format(
|
|
23
|
+
return "File({})({}{})".format(
|
|
24
|
+
self.id, {True: "-", False: "o"}[not self.circular], len(self)
|
|
25
|
+
)
|
|
24
26
|
|
|
25
27
|
def _repr_pretty_(self, p, cycle):
|
|
26
28
|
"""returns a short string representation of the object"""
|
|
27
|
-
p.text(
|
|
29
|
+
p.text(
|
|
30
|
+
"File({})({}{})".format(
|
|
31
|
+
self.id, {True: "-", False: "o"}[not self.circular], len(self)
|
|
32
|
+
)
|
|
33
|
+
)
|
|
28
34
|
|
|
29
35
|
def _repr_html_(self):
|
|
30
36
|
return "<a href='{path}' target='_blank'>{path}</a><br>".format(path=self.path)
|
|
@@ -34,14 +40,3 @@ class GenbankFile(_Dseqrecord):
|
|
|
34
40
|
return answer
|
|
35
41
|
|
|
36
42
|
rc = reverse_complement
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
if __name__ == "__main__":
|
|
40
|
-
import os as _os
|
|
41
|
-
|
|
42
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
43
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
44
|
-
import doctest
|
|
45
|
-
|
|
46
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
47
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/genbankfixer.py
CHANGED
|
@@ -33,7 +33,9 @@ GoodLocus = (
|
|
|
33
33
|
+ _pp.Word(_pp.nums).setResultsName("size")
|
|
34
34
|
+ _pp.Suppress(_pp.CaselessLiteral("bp"))
|
|
35
35
|
+ _pp.Word(_pp.alphas + "-").setResultsName("seqtype")
|
|
36
|
-
+ (_pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular")).setResultsName(
|
|
36
|
+
+ (_pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular")).setResultsName(
|
|
37
|
+
"topology"
|
|
38
|
+
)
|
|
37
39
|
+ _pp.Optional(_pp.Word(_pp.alphas), default=" ").setResultsName("divcode")
|
|
38
40
|
+ _pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date")
|
|
39
41
|
)
|
|
@@ -44,7 +46,9 @@ BrokenLocus1 = (
|
|
|
44
46
|
+ _pp.Word(_pp.nums).setResultsName("size")
|
|
45
47
|
+ _pp.Suppress(_pp.CaselessLiteral("bp"))
|
|
46
48
|
+ _pp.Word(_pp.alphas + "-").setResultsName("seqtype")
|
|
47
|
-
+ (_pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular")).setResultsName(
|
|
49
|
+
+ (_pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular")).setResultsName(
|
|
50
|
+
"topology"
|
|
51
|
+
)
|
|
48
52
|
+ _pp.Optional(_pp.Word(_pp.alphas), default=" ").setResultsName("divcode")
|
|
49
53
|
+ _pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date")
|
|
50
54
|
)
|
|
@@ -97,7 +101,8 @@ CapWord = _pp.Word("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
|
|
97
101
|
SpacedLine = _pp.White(min=1) + _pp.CharsNotIn("\n") + _pp.LineEnd()
|
|
98
102
|
# HeaderLine = CapWord + CharsNotIn("\n") + LineEnd()
|
|
99
103
|
GenericEntry = _pp.Group(
|
|
100
|
-
CapWord
|
|
104
|
+
CapWord
|
|
105
|
+
+ _pp.Combine(_pp.CharsNotIn("\n") + _pp.LineEnd() + _pp.ZeroOrMore(SpacedLine))
|
|
101
106
|
).setResultsName("generics", listAllMatches=True)
|
|
102
107
|
|
|
103
108
|
|
|
@@ -135,7 +140,9 @@ RPAREN = _pp.Suppress(")")
|
|
|
135
140
|
SEP = _pp.Suppress(_pp.Literal(".."))
|
|
136
141
|
|
|
137
142
|
# recognize numbers w. < & > uncertainty specs, then strip the <> chars to make it fixed
|
|
138
|
-
gbIndex = _pp.Word(_pp.nums + "<>").setParseAction(
|
|
143
|
+
gbIndex = _pp.Word(_pp.nums + "<>").setParseAction(
|
|
144
|
+
lambda s, l_, t: int(t[0].replace("<", "").replace(">", ""))
|
|
145
|
+
)
|
|
139
146
|
SimpleSlice = _pp.Group(gbIndex + SEP + gbIndex) | _pp.Group(gbIndex).setParseAction(
|
|
140
147
|
lambda s, l_, t: [[t[0][0], t[0][0]]]
|
|
141
148
|
)
|
|
@@ -194,12 +201,19 @@ QuoteFeaturekeyval = _pp.Group(
|
|
|
194
201
|
|
|
195
202
|
# UnQuoted KeyVal: /key=value (I'm assuming it doesn't do multilines this way? wrong! ApE does store long labels this way! sigh.)
|
|
196
203
|
# NoQuoteFeaturekeyval = Group(Suppress('/') + Word(alphas+nums+"_-") + Suppress('=') + OneOrMore(CharsNotIn("\n")) )
|
|
197
|
-
keyvalspacedline =
|
|
204
|
+
keyvalspacedline = (
|
|
205
|
+
_pp.White(exact=21)
|
|
206
|
+
+ _pp.CharsNotIn("/")
|
|
207
|
+
+ _pp.OneOrMore(_pp.CharsNotIn("\n"))
|
|
208
|
+
+ _pp.LineEnd()
|
|
209
|
+
)
|
|
198
210
|
NoQuoteFeaturekeyval = _pp.Group(
|
|
199
211
|
_pp.Suppress("/")
|
|
200
212
|
+ _pp.Word(_pp.alphas + _pp.nums + "_-")
|
|
201
213
|
+ _pp.Suppress("=")
|
|
202
|
-
+ _pp.Combine(
|
|
214
|
+
+ _pp.Combine(
|
|
215
|
+
_pp.CharsNotIn("\n") + _pp.LineEnd() + _pp.ZeroOrMore(keyvalspacedline)
|
|
216
|
+
)
|
|
203
217
|
)
|
|
204
218
|
|
|
205
219
|
# Special Case for Numerical Vals: /bases=12 OR /bases="12"
|
|
@@ -213,14 +227,18 @@ NumFeaturekeyval = _pp.Group(
|
|
|
213
227
|
|
|
214
228
|
# Key Only KeyVal: /pseudo
|
|
215
229
|
# post-parse convert it into a pair to resemble the structure of the first three cases i.e. [pseudo, True]
|
|
216
|
-
FlagFeaturekeyval = _pp.Group(
|
|
217
|
-
|
|
218
|
-
)
|
|
230
|
+
FlagFeaturekeyval = _pp.Group(
|
|
231
|
+
_pp.Suppress("/") + _pp.Word(_pp.alphas + _pp.nums + "_-")
|
|
232
|
+
).setParseAction(lambda s, l_, t: [[t[0][0], True]])
|
|
219
233
|
|
|
220
234
|
Feature = _pp.Group(
|
|
221
|
-
_pp.Word(_pp.alphas + _pp.nums + "_-").setParseAction(
|
|
235
|
+
_pp.Word(_pp.alphas + _pp.nums + "_-").setParseAction(
|
|
236
|
+
lambda s, l_, t: [["type", t[0]]]
|
|
237
|
+
)
|
|
222
238
|
+ featLocation.setResultsName("location")
|
|
223
|
-
+ _pp.OneOrMore(
|
|
239
|
+
+ _pp.OneOrMore(
|
|
240
|
+
NumFeaturekeyval | QuoteFeaturekeyval | NoQuoteFeaturekeyval | FlagFeaturekeyval
|
|
241
|
+
)
|
|
224
242
|
)
|
|
225
243
|
|
|
226
244
|
FeaturesEntry = (
|
|
@@ -234,7 +252,9 @@ FeaturesEntry = (
|
|
|
234
252
|
|
|
235
253
|
# sequence is just a column-spaced big table of dna nucleotides
|
|
236
254
|
# should it recognize full IUPAC alphabet? NCBI uses n for unknown region
|
|
237
|
-
Sequence = _pp.OneOrMore(
|
|
255
|
+
Sequence = _pp.OneOrMore(
|
|
256
|
+
_pp.Suppress(_pp.Word(_pp.nums)) + _pp.OneOrMore(_pp.Word("ACGTacgtNn"))
|
|
257
|
+
)
|
|
238
258
|
|
|
239
259
|
# Group( ) hides the setResultsName names def'd inside, such that one needs to first access this group and then access the dict of contents inside
|
|
240
260
|
SequenceEntry = _pp.Suppress(_pp.Literal("ORIGIN")) + Sequence.setParseAction(
|
|
@@ -352,7 +372,9 @@ def wrapstring(str_, rowstart, rowend, padfirst=True):
|
|
|
352
372
|
if linenum == 0 and not padfirst:
|
|
353
373
|
wrappedstr += str_[linenum * rowlen : (linenum + 1) * rowlen] + "\n"
|
|
354
374
|
else:
|
|
355
|
-
wrappedstr +=
|
|
375
|
+
wrappedstr += (
|
|
376
|
+
" " * leftpad + str_[linenum * rowlen : (linenum + 1) * rowlen] + "\n"
|
|
377
|
+
)
|
|
356
378
|
# if str_.startswith("/translation="):
|
|
357
379
|
# print(str_)
|
|
358
380
|
# print(wrappedstr)
|
|
@@ -480,7 +502,9 @@ def toGB(jseq):
|
|
|
480
502
|
fstr += wrapstring("/" + str(k) + "=" + str(feat[k]), 21, 80)
|
|
481
503
|
# standard: wrap val in quotes
|
|
482
504
|
else:
|
|
483
|
-
fstr += wrapstring(
|
|
505
|
+
fstr += wrapstring(
|
|
506
|
+
"/" + str(k) + "=" + '"' + str(feat[k]) + '"', 21, 80
|
|
507
|
+
)
|
|
484
508
|
featuresstr += fstr
|
|
485
509
|
|
|
486
510
|
# the spaced, numbered sequence
|
|
@@ -511,11 +535,11 @@ def gbtext_clean(gbtext):
|
|
|
511
535
|
... //'''
|
|
512
536
|
>>> from pydna.readers import read
|
|
513
537
|
>>> read(s) # doctest: +SKIP
|
|
514
|
-
/
|
|
538
|
+
... /site-packages/Bio/GenBank/Scanner.py:1388: BiopythonParserWarning: Malformed LOCUS line found - is this correct?
|
|
515
539
|
:'LOCUS New_DNA 3 bp DNA CIRCULAR SYN 19-JUN-2013\\n'
|
|
516
540
|
"correct?\\n:%r" % line, BiopythonParserWarning)
|
|
517
541
|
Traceback (most recent call last):
|
|
518
|
-
File "/
|
|
542
|
+
File "... /pydna/readers.py", line 48, in read
|
|
519
543
|
results = results.pop()
|
|
520
544
|
IndexError: pop from empty list
|
|
521
545
|
<BLANKLINE>
|
|
@@ -523,7 +547,7 @@ def gbtext_clean(gbtext):
|
|
|
523
547
|
<BLANKLINE>
|
|
524
548
|
Traceback (most recent call last):
|
|
525
549
|
File "<stdin>", line 1, in <module>
|
|
526
|
-
File "/
|
|
550
|
+
File "... /pydna/readers.py", line 50, in read
|
|
527
551
|
raise ValueError("No sequences found in data:\\n({})".format(data[:79]))
|
|
528
552
|
ValueError: No sequences found in data:
|
|
529
553
|
(LOCUS New_DNA 3 bp DNA CIRCULAR SYN 19-JUN-2013
|
|
@@ -570,14 +594,3 @@ def gbtext_clean(gbtext):
|
|
|
570
594
|
Result = _namedtuple("Result", "gbtext jseq")
|
|
571
595
|
result = Result(_pretty_str(toGB(jseq).strip()), jseq)
|
|
572
596
|
return result
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
if __name__ == "__main__":
|
|
576
|
-
import os as _os
|
|
577
|
-
|
|
578
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
579
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
580
|
-
import doctest
|
|
581
|
-
|
|
582
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
583
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/genbankrecord.py
CHANGED
|
@@ -11,7 +11,9 @@ import os as _os
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class GenbankRecord(_Dseqrecord):
|
|
14
|
-
def __init__(
|
|
14
|
+
def __init__(
|
|
15
|
+
self, record, *args, item="accession", start=None, stop=None, strand=1, **kwargs
|
|
16
|
+
):
|
|
15
17
|
super().__init__(record, *args, **kwargs)
|
|
16
18
|
self.item = item
|
|
17
19
|
self.start = start
|
|
@@ -64,7 +66,9 @@ class GenbankRecord(_Dseqrecord):
|
|
|
64
66
|
return obj
|
|
65
67
|
|
|
66
68
|
@classmethod
|
|
67
|
-
def from_SeqRecord(
|
|
69
|
+
def from_SeqRecord(
|
|
70
|
+
cls, record, *args, item="accession", start=None, stop=None, strand=1, **kwargs
|
|
71
|
+
):
|
|
68
72
|
obj = super().from_SeqRecord(record, *args, **kwargs)
|
|
69
73
|
obj.item = item
|
|
70
74
|
obj.start = start
|
|
@@ -95,7 +99,9 @@ class GenbankRecord(_Dseqrecord):
|
|
|
95
99
|
|
|
96
100
|
def __repr__(self):
|
|
97
101
|
"""returns a short string representation of the object"""
|
|
98
|
-
return "Gbnk({}{} {})".format(
|
|
102
|
+
return "Gbnk({}{} {})".format(
|
|
103
|
+
{True: "-", False: "o"}[not self.circular], len(self), self._repr
|
|
104
|
+
)
|
|
99
105
|
|
|
100
106
|
def _repr_pretty_(self, p, cycle):
|
|
101
107
|
"""returns a short string representation of the object"""
|
|
@@ -121,7 +127,7 @@ class GenbankRecord(_Dseqrecord):
|
|
|
121
127
|
|
|
122
128
|
code = (
|
|
123
129
|
"from pydna.genbank import Genbank\n"
|
|
124
|
-
f"gb = Genbank('{_os.
|
|
130
|
+
f"gb = Genbank('{_os.getenv('pydna_email')}')\n"
|
|
125
131
|
f"seq = gb.nucleotide('{self.item}'"
|
|
126
132
|
)
|
|
127
133
|
if self.start and self.start:
|
|
@@ -141,7 +147,7 @@ class GenbankRecord(_Dseqrecord):
|
|
|
141
147
|
|
|
142
148
|
code = (
|
|
143
149
|
"from Bio import Entrez, SeqIO\n"
|
|
144
|
-
f"Entrez.email = '{_os.
|
|
150
|
+
f"Entrez.email = '{_os.getenv('pydna_email')}'\n"
|
|
145
151
|
"handle = Entrez.efetch(db='nuccore',\n"
|
|
146
152
|
f" id='{self.item}',\n"
|
|
147
153
|
" rettype='gbwithparts',\n"
|
|
@@ -160,12 +166,3 @@ class GenbankRecord(_Dseqrecord):
|
|
|
160
166
|
code += "record = SeqIO.read(handle, 'genbank')"
|
|
161
167
|
|
|
162
168
|
return _ps(code)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
if __name__ == "__main__":
|
|
166
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
167
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
168
|
-
import doctest
|
|
169
|
-
|
|
170
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
171
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/goldengate.py
CHANGED
|
@@ -27,9 +27,9 @@ from Bio.Restriction import BsaI, BsmBI, BbsI, FokI
|
|
|
27
27
|
from pydna.dseqrecord import Dseqrecord as _Dseqrecord
|
|
28
28
|
|
|
29
29
|
# from copy import deepcopy as _deepcopy
|
|
30
|
-
import logging as _logging
|
|
30
|
+
# import logging as _logging
|
|
31
31
|
|
|
32
|
-
_module_logger = _logging.getLogger("pydna." + __name__)
|
|
32
|
+
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
33
33
|
|
|
34
34
|
BsaI, BsmBI, BbsI, FokI
|
|
35
35
|
|
pydna/ladders.py
CHANGED
|
@@ -19,7 +19,10 @@ a gel image. Exampel can be found in scripts/molecular_weight_standards.ods.
|
|
|
19
19
|
from pydna.fakeseq import FakeSeq as _FakeSeq
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
PennStateLadder = [
|
|
22
|
+
PennStateLadder = [
|
|
23
|
+
_FakeSeq(int(n))
|
|
24
|
+
for n in (10000, 7750, 5000, 4000, 3000, 2000, 1500, 1000, 750, 500)
|
|
25
|
+
]
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
GeneRuler_1kb = [
|
|
@@ -131,13 +134,3 @@ FakeGel = [
|
|
|
131
134
|
],
|
|
132
135
|
PennStateLadder,
|
|
133
136
|
]
|
|
134
|
-
|
|
135
|
-
if __name__ == "__main__":
|
|
136
|
-
import os as _os
|
|
137
|
-
|
|
138
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
139
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
140
|
-
import doctest
|
|
141
|
-
|
|
142
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
143
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/ligate.py
CHANGED
|
@@ -9,9 +9,10 @@ from operator import add
|
|
|
9
9
|
from functools import reduce
|
|
10
10
|
import networkx as _nx
|
|
11
11
|
from itertools import permutations
|
|
12
|
-
import logging as _logging
|
|
13
12
|
|
|
14
|
-
|
|
13
|
+
# import logging as _logging
|
|
14
|
+
|
|
15
|
+
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
def ligate(fragments: list):
|
|
@@ -51,18 +52,11 @@ def ligate(fragments: list):
|
|
|
51
52
|
|
|
52
53
|
cpaths = [p for p in sorted(_nx.simple_cycles(G), key=len) if len(p) > 1]
|
|
53
54
|
csequences = [reduce(add, x).looped() for x in cpaths]
|
|
54
|
-
lpaths = [
|
|
55
|
+
lpaths = [
|
|
56
|
+
p
|
|
57
|
+
for p in sorted(_nx.all_simple_paths(G, "begin", "end"), key=len)
|
|
58
|
+
if len(p) > 3
|
|
59
|
+
]
|
|
55
60
|
lsequences = [reduce(add, lp[1:-1]) for lp in lpaths]
|
|
56
61
|
|
|
57
62
|
return csequences, lsequences
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
if __name__ == "__main__":
|
|
61
|
-
import os as _os
|
|
62
|
-
|
|
63
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
64
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
65
|
-
import doctest
|
|
66
|
-
|
|
67
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
68
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/parsers.py
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
"""Provides two functions, parse and parse_primers"""
|
|
9
9
|
|
|
10
|
-
import os as _os
|
|
10
|
+
# import os as _os
|
|
11
11
|
import re as _re
|
|
12
12
|
import io as _io
|
|
13
13
|
import textwrap as _textwrap
|
|
@@ -40,7 +40,9 @@ except ImportError:
|
|
|
40
40
|
|
|
41
41
|
# gb_fasta_embl_regex = r"(?:>.+\n^(?:^[^>]+?)(?=\n\n|>|LOCUS|ID))|(?:(?:LOCUS|ID)(?:(?:.|\n)+?)^//)"
|
|
42
42
|
|
|
43
|
-
gb_fasta_embl_regex =
|
|
43
|
+
gb_fasta_embl_regex = (
|
|
44
|
+
r"(?:^>.+\n^(?:^[^>]+?)(?=\n\n|>|^LOCUS|^ID))|(?:(?:^LOCUS|^ID)(?:(?:.|\n)+?)^//)"
|
|
45
|
+
)
|
|
44
46
|
|
|
45
47
|
# The gb_fasta_embl_regex is meant to be able to extract sequences from
|
|
46
48
|
# text where sequences are mixed with other contents as well
|
|
@@ -95,7 +97,7 @@ def embl_gb_fasta(text):
|
|
|
95
97
|
except ValueError:
|
|
96
98
|
handle.seek(0)
|
|
97
99
|
try:
|
|
98
|
-
parsed = _SeqIO.read(handle, "fasta")
|
|
100
|
+
parsed = _SeqIO.read(handle, "fasta-blast")
|
|
99
101
|
except ValueError:
|
|
100
102
|
handle.close()
|
|
101
103
|
continue
|
|
@@ -208,10 +210,24 @@ def parse_primers(data):
|
|
|
208
210
|
return [_Primer(x) for x in parse(data, ds=False)]
|
|
209
211
|
|
|
210
212
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
214
|
-
import doctest
|
|
213
|
+
def parse_snapgene(file_path: str) -> list[_Dseqrecord]:
|
|
214
|
+
"""Parse a SnapGene file and return a Dseqrecord object.
|
|
215
215
|
|
|
216
|
-
|
|
217
|
-
|
|
216
|
+
Parameters
|
|
217
|
+
----------
|
|
218
|
+
file_path : str
|
|
219
|
+
The path to the SnapGene file to parse.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
Dseqrecord
|
|
224
|
+
The parsed SnapGene file as a Dseqrecord object.
|
|
225
|
+
|
|
226
|
+
"""
|
|
227
|
+
with open(file_path, "rb") as f:
|
|
228
|
+
parsed_seq = next(_SeqIO.parse(f, "snapgene"))
|
|
229
|
+
circular = (
|
|
230
|
+
"topology" in parsed_seq.annotations.keys()
|
|
231
|
+
and parsed_seq.annotations["topology"] == "circular"
|
|
232
|
+
)
|
|
233
|
+
return [_Dseqrecord(parsed_seq, circular=circular)]
|
pydna/primer.py
CHANGED
|
@@ -14,7 +14,9 @@ from pydna.seqrecord import SeqRecord as _SeqRecord
|
|
|
14
14
|
class Primer(_SeqRecord):
|
|
15
15
|
"""Primer and its position on a template, footprint and tail."""
|
|
16
16
|
|
|
17
|
-
def __init__(
|
|
17
|
+
def __init__(
|
|
18
|
+
self, record, *args, amplicon=None, position=None, footprint=0, **kwargs
|
|
19
|
+
):
|
|
18
20
|
if hasattr(record, "features"): # Seqrecord
|
|
19
21
|
self.__dict__.update(record.__dict__)
|
|
20
22
|
self.__dict__.update(kwargs)
|
|
@@ -57,14 +59,3 @@ class Primer(_SeqRecord):
|
|
|
57
59
|
answer.position = None
|
|
58
60
|
answer._fp = len(self)
|
|
59
61
|
return answer
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
if __name__ == "__main__":
|
|
63
|
-
import os as _os
|
|
64
|
-
|
|
65
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
66
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
67
|
-
import doctest
|
|
68
|
-
|
|
69
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
70
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/readers.py
CHANGED
|
@@ -54,14 +54,3 @@ def read_primer(data):
|
|
|
54
54
|
The usage is similar to the :func:`parse_primer` function."""
|
|
55
55
|
|
|
56
56
|
return _Primer(read(data, ds=False))
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
if __name__ == "__main__":
|
|
60
|
-
import os as _os
|
|
61
|
-
|
|
62
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
63
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
64
|
-
import doctest
|
|
65
|
-
|
|
66
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
67
|
-
_os.environ["pydna_cached_funcs"] = cached
|