pydna 5.5.1__py3-none-any.whl → 5.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +116 -134
- pydna/_pretty.py +2 -14
- pydna/all.py +10 -20
- pydna/amplicon.py +25 -20
- pydna/amplify.py +46 -26
- pydna/assembly.py +50 -27
- pydna/assembly2.py +1902 -0
- pydna/common_sub_strings.py +2 -12
- pydna/contig.py +39 -22
- pydna/crispr.py +8 -13
- pydna/design.py +89 -59
- pydna/download.py +10 -18
- pydna/dseq.py +119 -59
- pydna/dseqrecord.py +88 -45
- pydna/fakeseq.py +0 -11
- pydna/fusionpcr.py +3 -1
- pydna/gateway.py +2 -2
- pydna/gel.py +8 -13
- pydna/genbank.py +33 -32
- pydna/genbankfile.py +8 -13
- pydna/genbankfixer.py +41 -28
- pydna/genbankrecord.py +11 -14
- pydna/goldengate.py +2 -2
- pydna/ladders.py +4 -11
- pydna/ligate.py +8 -14
- pydna/parsers.py +5 -12
- pydna/primer.py +3 -12
- pydna/readers.py +0 -11
- pydna/seq.py +21 -18
- pydna/seqrecord.py +19 -19
- pydna/sequence_picker.py +3 -12
- pydna/tm.py +13 -15
- pydna/types.py +41 -0
- pydna/utils.py +173 -58
- {pydna-5.5.1.dist-info → pydna-5.5.2.dist-info}/METADATA +17 -3
- pydna-5.5.2.dist-info/RECORD +43 -0
- pydna/editor.py +0 -119
- pydna/myenzymes.py +0 -51
- pydna/myprimers.py +0 -219
- pydna-5.5.1.dist-info/RECORD +0 -44
- {pydna-5.5.1.dist-info → pydna-5.5.2.dist-info}/LICENSE.txt +0 -0
- {pydna-5.5.1.dist-info → pydna-5.5.2.dist-info}/WHEEL +0 -0
pydna/dseqrecord.py
CHANGED
|
@@ -37,9 +37,9 @@ import time as _time
|
|
|
37
37
|
import datetime as _datetime
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
import logging as _logging
|
|
40
|
+
# import logging as _logging
|
|
41
41
|
|
|
42
|
-
_module_logger = _logging.getLogger("pydna." + __name__)
|
|
42
|
+
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
try:
|
|
@@ -127,6 +127,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
127
127
|
|
|
128
128
|
"""
|
|
129
129
|
|
|
130
|
+
seq: _Dseq
|
|
131
|
+
|
|
130
132
|
def __init__(
|
|
131
133
|
self,
|
|
132
134
|
record,
|
|
@@ -135,12 +137,12 @@ class Dseqrecord(_SeqRecord):
|
|
|
135
137
|
n=5e-14, # mol ( = 0.05 pmol)
|
|
136
138
|
**kwargs,
|
|
137
139
|
):
|
|
138
|
-
_module_logger.info("### Dseqrecord initialized ###")
|
|
139
|
-
_module_logger.info("argument circular = %s", circular)
|
|
140
|
-
_module_logger.info("circular = %s", circular)
|
|
140
|
+
# _module_logger.info("### Dseqrecord initialized ###")
|
|
141
|
+
# _module_logger.info("argument circular = %s", circular)
|
|
142
|
+
# _module_logger.info("circular = %s", circular)
|
|
141
143
|
|
|
142
144
|
if isinstance(record, str):
|
|
143
|
-
_module_logger.info("record is a string")
|
|
145
|
+
# _module_logger.info("record is a string")
|
|
144
146
|
super().__init__(
|
|
145
147
|
_Dseq.from_string(
|
|
146
148
|
record,
|
|
@@ -157,12 +159,12 @@ class Dseqrecord(_SeqRecord):
|
|
|
157
159
|
record = record[:]
|
|
158
160
|
elif circular is True:
|
|
159
161
|
record = record.looped()
|
|
160
|
-
_module_logger.info("record is a Dseq object")
|
|
162
|
+
# _module_logger.info("record is a Dseq object")
|
|
161
163
|
super().__init__(record, *args, **kwargs)
|
|
162
164
|
|
|
163
165
|
# record is a Bio.Seq object ?
|
|
164
166
|
elif hasattr(record, "transcribe"):
|
|
165
|
-
_module_logger.info("record is a Seq object")
|
|
167
|
+
# _module_logger.info("record is a Seq object")
|
|
166
168
|
super().__init__(
|
|
167
169
|
_Dseq(
|
|
168
170
|
str(record),
|
|
@@ -175,7 +177,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
175
177
|
|
|
176
178
|
# record is a Bio.SeqRecord or Dseqrecord object ?
|
|
177
179
|
elif hasattr(record, "features"):
|
|
178
|
-
_module_logger.info("record is a Bio.SeqRecord or Dseqrecord object")
|
|
180
|
+
# _module_logger.info("record is a Bio.SeqRecord or Dseqrecord object")
|
|
179
181
|
for key, value in list(record.__dict__.items()):
|
|
180
182
|
setattr(self, key, value)
|
|
181
183
|
self.letter_annotations = {}
|
|
@@ -256,7 +258,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
256
258
|
obj.n = n
|
|
257
259
|
if circular is None:
|
|
258
260
|
circular = record.annotations.get("topology") == "circular"
|
|
259
|
-
obj.seq = _Dseq.quick(
|
|
261
|
+
obj.seq = _Dseq.quick(
|
|
262
|
+
str(record.seq), _rc(str(record.seq)), ovhg=0, circular=circular
|
|
263
|
+
)
|
|
260
264
|
return obj
|
|
261
265
|
|
|
262
266
|
@property
|
|
@@ -295,7 +299,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
295
299
|
"""
|
|
296
300
|
return super().extract_feature(n)
|
|
297
301
|
|
|
298
|
-
def add_feature(
|
|
302
|
+
def add_feature(
|
|
303
|
+
self, x=None, y=None, seq=None, type_="misc", strand=1, *args, **kwargs
|
|
304
|
+
):
|
|
299
305
|
"""Add a feature of type misc to the feature list of the sequence.
|
|
300
306
|
|
|
301
307
|
Parameters
|
|
@@ -392,13 +398,19 @@ class Dseqrecord(_SeqRecord):
|
|
|
392
398
|
elif five_prime[0] == "3'":
|
|
393
399
|
fn.location = fn.location + (-self.seq.ovhg)
|
|
394
400
|
if fn.location.start < 0:
|
|
395
|
-
loc1 = _SimpleLocation(
|
|
401
|
+
loc1 = _SimpleLocation(
|
|
402
|
+
len(new) + fn.location.start, len(new), strand=fn.location.strand
|
|
403
|
+
)
|
|
396
404
|
loc2 = _SimpleLocation(0, fn.location.end, strand=fn.location.strand)
|
|
397
405
|
fn.location = _CompoundLocation([loc1, loc2])
|
|
398
406
|
|
|
399
407
|
if fn.location.end > len(new):
|
|
400
|
-
loc1 = _SimpleLocation(
|
|
401
|
-
|
|
408
|
+
loc1 = _SimpleLocation(
|
|
409
|
+
fn.location.start, len(new), strand=fn.location.strand
|
|
410
|
+
)
|
|
411
|
+
loc2 = _SimpleLocation(
|
|
412
|
+
0, fn.location.end - len(new), strand=fn.location.strand
|
|
413
|
+
)
|
|
402
414
|
fn.location = _CompoundLocation([loc1, loc2])
|
|
403
415
|
|
|
404
416
|
fn.qualifiers = fo.qualifiers
|
|
@@ -428,7 +440,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
428
440
|
from pydna import _PydnaDeprecationWarning
|
|
429
441
|
|
|
430
442
|
_warnings.warn(
|
|
431
|
-
"tolinear method is obsolete; "
|
|
443
|
+
"tolinear method is obsolete; "
|
|
444
|
+
"please use obj[:] "
|
|
445
|
+
"instead of obj.tolinear().",
|
|
432
446
|
_PydnaDeprecationWarning,
|
|
433
447
|
)
|
|
434
448
|
new = _copy.copy(self)
|
|
@@ -533,13 +547,17 @@ class Dseqrecord(_SeqRecord):
|
|
|
533
547
|
if self.seq != old_file.seq:
|
|
534
548
|
# If new sequence is different, the old file is
|
|
535
549
|
# renamed with "_OLD_" suffix:
|
|
536
|
-
oldmtime = _datetime.datetime.fromtimestamp(
|
|
550
|
+
oldmtime = _datetime.datetime.fromtimestamp(
|
|
551
|
+
_os.path.getmtime(filename)
|
|
552
|
+
).isoformat()
|
|
537
553
|
tstmp = int(_time.time() * 1_000_000)
|
|
538
554
|
old_filename = f"{name}_OLD_{tstmp}{ext}"
|
|
539
555
|
_os.rename(filename, old_filename)
|
|
540
556
|
with open(filename, "w", encoding="utf8") as fp:
|
|
541
557
|
fp.write(self.format(f))
|
|
542
|
-
newmtime = _datetime.datetime.fromtimestamp(
|
|
558
|
+
newmtime = _datetime.datetime.fromtimestamp(
|
|
559
|
+
_os.path.getmtime(filename)
|
|
560
|
+
).isoformat()
|
|
543
561
|
msg = f"""
|
|
544
562
|
<table style="padding:10px 10px;
|
|
545
563
|
word-break:normal;
|
|
@@ -589,7 +607,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
589
607
|
newdescription = self.description
|
|
590
608
|
if oldstamp and newstamp:
|
|
591
609
|
if oldstamp.group(0)[:35] == newstamp.group(0)[:35]:
|
|
592
|
-
newdescription = newdescription.replace(
|
|
610
|
+
newdescription = newdescription.replace(
|
|
611
|
+
newstamp.group(0), oldstamp.group(0)
|
|
612
|
+
)
|
|
593
613
|
elif oldstamp:
|
|
594
614
|
newdescription += " " + oldstamp.group(0)
|
|
595
615
|
newobj = _copy.copy(self)
|
|
@@ -616,9 +636,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
616
636
|
return s.find(o)
|
|
617
637
|
|
|
618
638
|
def __str__(self):
|
|
619
|
-
return ("Dseqrecord\n" "circular: {}\n" "size: {}\n").format(
|
|
620
|
-
self
|
|
621
|
-
)
|
|
639
|
+
return ("Dseqrecord\n" "circular: {}\n" "size: {}\n").format(
|
|
640
|
+
self.circular, len(self)
|
|
641
|
+
) + _SeqRecord.__str__(self)
|
|
622
642
|
|
|
623
643
|
def __contains__(self, other):
|
|
624
644
|
if other.lower() in str(self.seq).lower():
|
|
@@ -757,10 +777,16 @@ class Dseqrecord(_SeqRecord):
|
|
|
757
777
|
return [x.annotations["filename"] for x in matching_reads]
|
|
758
778
|
|
|
759
779
|
def __repr__(self):
|
|
760
|
-
return "Dseqrecord({}{})".format(
|
|
780
|
+
return "Dseqrecord({}{})".format(
|
|
781
|
+
{True: "-", False: "o"}[not self.circular], len(self)
|
|
782
|
+
)
|
|
761
783
|
|
|
762
784
|
def _repr_pretty_(self, p, cycle):
|
|
763
|
-
p.text(
|
|
785
|
+
p.text(
|
|
786
|
+
"Dseqrecord({}{})".format(
|
|
787
|
+
{True: "-", False: "o"}[not self.circular], len(self)
|
|
788
|
+
)
|
|
789
|
+
)
|
|
764
790
|
|
|
765
791
|
def __add__(self, other):
|
|
766
792
|
if hasattr(other, "seq") and hasattr(other.seq, "watson"):
|
|
@@ -784,7 +810,11 @@ class Dseqrecord(_SeqRecord):
|
|
|
784
810
|
|
|
785
811
|
def __mul__(self, number):
|
|
786
812
|
if not isinstance(number, int):
|
|
787
|
-
raise TypeError(
|
|
813
|
+
raise TypeError(
|
|
814
|
+
"TypeError: can't multiply Dseqrecord by non-int of type {}".format(
|
|
815
|
+
type(number)
|
|
816
|
+
)
|
|
817
|
+
)
|
|
788
818
|
if self.circular:
|
|
789
819
|
raise TypeError("TypeError: can't multiply circular Dseqrecord.")
|
|
790
820
|
if number > 0:
|
|
@@ -821,7 +851,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
821
851
|
for f in answer.features
|
|
822
852
|
if (
|
|
823
853
|
_location_boundaries(f.location)[1] <= answer.seq.length
|
|
824
|
-
and _location_boundaries(f.location)[0]
|
|
854
|
+
and _location_boundaries(f.location)[0]
|
|
855
|
+
< _location_boundaries(f.location)[1]
|
|
825
856
|
)
|
|
826
857
|
]
|
|
827
858
|
|
|
@@ -1032,7 +1063,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1032
1063
|
result = newseq
|
|
1033
1064
|
else:
|
|
1034
1065
|
result = newseq.shifted(start)
|
|
1035
|
-
_module_logger.info("synced")
|
|
1066
|
+
# _module_logger.info("synced")
|
|
1036
1067
|
return result
|
|
1037
1068
|
|
|
1038
1069
|
def upper(self):
|
|
@@ -1118,7 +1149,10 @@ class Dseqrecord(_SeqRecord):
|
|
|
1118
1149
|
type="CDS",
|
|
1119
1150
|
qualifiers={
|
|
1120
1151
|
"note": f"{y - x}bp {(y - x) // 3}aa",
|
|
1121
|
-
"checksum": [
|
|
1152
|
+
"checksum": [
|
|
1153
|
+
orf.seguid() + " (DNA)",
|
|
1154
|
+
prt.seguid() + " (protein)",
|
|
1155
|
+
],
|
|
1122
1156
|
"codon_start": 1,
|
|
1123
1157
|
"transl_table": 11,
|
|
1124
1158
|
"translation": str(prt.seq),
|
|
@@ -1148,7 +1182,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
1148
1182
|
"""docstring."""
|
|
1149
1183
|
if self.features:
|
|
1150
1184
|
f = self.features[feature]
|
|
1151
|
-
locations = sorted(
|
|
1185
|
+
locations = sorted(
|
|
1186
|
+
self.features[feature].location.parts, key=_SimpleLocation.start.fget
|
|
1187
|
+
)
|
|
1152
1188
|
strand = f.location.strand
|
|
1153
1189
|
else:
|
|
1154
1190
|
locations = [_SimpleLocation(0, 0, 1)]
|
|
@@ -1229,7 +1265,10 @@ class Dseqrecord(_SeqRecord):
|
|
|
1229
1265
|
|
|
1230
1266
|
"""
|
|
1231
1267
|
if not self.circular:
|
|
1232
|
-
raise TypeError(
|
|
1268
|
+
raise TypeError(
|
|
1269
|
+
"Sequence is linear, origin can only be "
|
|
1270
|
+
"shifted for circular sequences.\n"
|
|
1271
|
+
)
|
|
1233
1272
|
ln = len(self)
|
|
1234
1273
|
if not shift % ln:
|
|
1235
1274
|
return _copy.deepcopy(self) # shift is a multiple of ln or 0
|
|
@@ -1311,7 +1350,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
1311
1350
|
# 000
|
|
1312
1351
|
# 2222
|
|
1313
1352
|
#
|
|
1314
|
-
left_watson, left_crick, left_ovhg = self.seq.get_cut_parameters(
|
|
1353
|
+
left_watson, left_crick, left_ovhg = self.seq.get_cut_parameters(
|
|
1354
|
+
left_cut, True
|
|
1355
|
+
)
|
|
1315
1356
|
initial_shift = left_watson if left_ovhg < 0 else left_crick
|
|
1316
1357
|
features = self.shifted(initial_shift).features
|
|
1317
1358
|
# for f in features:
|
|
@@ -1327,10 +1368,13 @@ class Dseqrecord(_SeqRecord):
|
|
|
1327
1368
|
# 2222
|
|
1328
1369
|
|
|
1329
1370
|
features_need_transfer = [
|
|
1330
|
-
f
|
|
1371
|
+
f
|
|
1372
|
+
for f in features
|
|
1373
|
+
if (_location_boundaries(f.location)[1] <= abs(left_ovhg))
|
|
1331
1374
|
]
|
|
1332
1375
|
features_need_transfer = [
|
|
1333
|
-
_shift_feature(f, -abs(left_ovhg), len(self))
|
|
1376
|
+
_shift_feature(f, -abs(left_ovhg), len(self))
|
|
1377
|
+
for f in features_need_transfer
|
|
1334
1378
|
]
|
|
1335
1379
|
|
|
1336
1380
|
# ^ ^^^^^^^^^
|
|
@@ -1345,7 +1389,10 @@ class Dseqrecord(_SeqRecord):
|
|
|
1345
1389
|
# The features 0 and 1 would have the right location if the final sequence had the same length
|
|
1346
1390
|
# as the original one. However, the final product is longer because of the overhang.
|
|
1347
1391
|
|
|
1348
|
-
features += [
|
|
1392
|
+
features += [
|
|
1393
|
+
_shift_feature(f, abs(left_ovhg), len(dseq))
|
|
1394
|
+
for f in features_need_transfer
|
|
1395
|
+
]
|
|
1349
1396
|
# ^ ^^^^^^^^^
|
|
1350
1397
|
# So we shift back by the same amount in the opposite direction, but this time we pass the
|
|
1351
1398
|
# length of the final product.
|
|
@@ -1356,24 +1403,20 @@ class Dseqrecord(_SeqRecord):
|
|
|
1356
1403
|
for f in features
|
|
1357
1404
|
if (
|
|
1358
1405
|
_location_boundaries(f.location)[1] <= len(dseq)
|
|
1359
|
-
and _location_boundaries(f.location)[0]
|
|
1406
|
+
and _location_boundaries(f.location)[0]
|
|
1407
|
+
<= _location_boundaries(f.location)[1]
|
|
1360
1408
|
)
|
|
1361
1409
|
]
|
|
1362
1410
|
else:
|
|
1363
|
-
left_watson, left_crick, left_ovhg = self.seq.get_cut_parameters(
|
|
1364
|
-
|
|
1411
|
+
left_watson, left_crick, left_ovhg = self.seq.get_cut_parameters(
|
|
1412
|
+
left_cut, True
|
|
1413
|
+
)
|
|
1414
|
+
right_watson, right_crick, right_ovhg = self.seq.get_cut_parameters(
|
|
1415
|
+
right_cut, False
|
|
1416
|
+
)
|
|
1365
1417
|
|
|
1366
1418
|
left_edge = left_crick if left_ovhg > 0 else left_watson
|
|
1367
1419
|
right_edge = right_watson if right_ovhg > 0 else right_crick
|
|
1368
1420
|
features = self[left_edge:right_edge].features
|
|
1369
1421
|
|
|
1370
1422
|
return Dseqrecord(dseq, features=features)
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
if __name__ == "__main__":
|
|
1374
|
-
cache = _os.getenv("pydna_cache")
|
|
1375
|
-
_os.environ["pydna_cache"] = "nocache"
|
|
1376
|
-
import doctest
|
|
1377
|
-
|
|
1378
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
1379
|
-
# _os.environ["pydna_cache"] = cache
|
pydna/fakeseq.py
CHANGED
|
@@ -44,14 +44,3 @@ class FakeSeq:
|
|
|
44
44
|
def __str__(self) -> str:
|
|
45
45
|
"""docstring."""
|
|
46
46
|
return self.__repr__()
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
if __name__ == "__main__":
|
|
50
|
-
import os as _os
|
|
51
|
-
|
|
52
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
53
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
54
|
-
import doctest
|
|
55
|
-
|
|
56
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
57
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/fusionpcr.py
CHANGED
|
@@ -17,7 +17,9 @@ def fuse_by_pcr(fragments, limit=15):
|
|
|
17
17
|
new = None
|
|
18
18
|
for a, b in [(x, y), (x, y.rc()), (x.rc(), y)]:
|
|
19
19
|
try:
|
|
20
|
-
((s1, s2, ln), *r) = terminal_overlap(
|
|
20
|
+
((s1, s2, ln), *r) = terminal_overlap(
|
|
21
|
+
a.seq.watson.lower(), rc(b.seq.crick.lower()), limit=limit
|
|
22
|
+
)
|
|
21
23
|
except ValueError as err:
|
|
22
24
|
if "not enough values to unpack" not in str(err):
|
|
23
25
|
raise err
|
pydna/gateway.py
CHANGED
|
@@ -28,9 +28,9 @@ graph.
|
|
|
28
28
|
# import networkx as _nx
|
|
29
29
|
# from copy import deepcopy as _deepcopy
|
|
30
30
|
# import itertools as _itertools
|
|
31
|
-
import logging as _logging
|
|
31
|
+
# import logging as _logging
|
|
32
32
|
|
|
33
|
-
_module_logger = _logging.getLogger("pydna." + __name__)
|
|
33
|
+
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
34
34
|
|
|
35
35
|
ambiguous_dna_regex = {
|
|
36
36
|
"A": "T",
|
pydna/gel.py
CHANGED
|
@@ -27,7 +27,9 @@ def interpolator(mwstd):
|
|
|
27
27
|
return interpolator
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
def gel(
|
|
30
|
+
def gel(
|
|
31
|
+
samples=None, gel_length=600, margin=50, interpolator=interpolator(mwstd=_mwstd)
|
|
32
|
+
):
|
|
31
33
|
import numpy as np
|
|
32
34
|
from PIL import Image as Image
|
|
33
35
|
from PIL import ImageDraw as ImageDraw
|
|
@@ -65,7 +67,11 @@ def gel(samples=None, gel_length=600, margin=50, interpolator=interpolator(mwstd
|
|
|
65
67
|
y1 = peak_centre - i
|
|
66
68
|
y2 = peak_centre + i
|
|
67
69
|
intensity = (
|
|
68
|
-
height
|
|
70
|
+
height
|
|
71
|
+
* _math.exp(
|
|
72
|
+
-float(((y1 - peak_centre) ** 2)) / (2 * (band_spread**2))
|
|
73
|
+
)
|
|
74
|
+
* max_intensity
|
|
69
75
|
)
|
|
70
76
|
for y in range(int(y1), int(y2)):
|
|
71
77
|
try:
|
|
@@ -95,14 +101,3 @@ def gel(samples=None, gel_length=600, margin=50, interpolator=interpolator(mwstd
|
|
|
95
101
|
# from PIL import ImageOps
|
|
96
102
|
# im_invert = ImageOps.invert(im)
|
|
97
103
|
# im.rotate(90, expand=1)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
if __name__ == "__main__":
|
|
101
|
-
import os as _os
|
|
102
|
-
|
|
103
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
104
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
105
|
-
import doctest
|
|
106
|
-
|
|
107
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
108
|
-
_os.environ["pydna_cached_funcs"] = cached
|
pydna/genbank.py
CHANGED
|
@@ -11,7 +11,7 @@ The function can be used if the environmental variable **pydna_email** has
|
|
|
11
11
|
been set to a valid email address. The easiest way to do this permanantly is to edit the
|
|
12
12
|
`pydna.ini` file. See the documentation of :func:`pydna.open_config_folder`"""
|
|
13
13
|
|
|
14
|
-
from pydna.utils import memorize as _memorize
|
|
14
|
+
# from pydna.utils import memorize as _memorize
|
|
15
15
|
from pydna.genbankrecord import GenbankRecord as _GenbankRecord
|
|
16
16
|
from pydna.readers import read as _read
|
|
17
17
|
|
|
@@ -19,9 +19,10 @@ from Bio import Entrez as _Entrez
|
|
|
19
19
|
from typing import Literal as _Literal, Optional as _Optional
|
|
20
20
|
import re as _re
|
|
21
21
|
import os as _os
|
|
22
|
-
import logging as _logging
|
|
23
22
|
|
|
24
|
-
|
|
23
|
+
# import logging as _logging
|
|
24
|
+
|
|
25
|
+
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
# TODO http://httpbin.org/ use for testing?
|
|
@@ -53,15 +54,19 @@ class Genbank:
|
|
|
53
54
|
*,
|
|
54
55
|
tool: str = "pydna",
|
|
55
56
|
) -> None:
|
|
56
|
-
if not _re.match(
|
|
57
|
+
if not _re.match(
|
|
58
|
+
r"[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}", users_email, _re.IGNORECASE
|
|
59
|
+
):
|
|
57
60
|
raise ValueError("email address {} is not valid.".format(users_email))
|
|
58
61
|
|
|
59
|
-
_module_logger.info("#### Genbank ititiation ####")
|
|
60
|
-
_module_logger.info("Genbank initiated with email: %s", users_email)
|
|
61
|
-
_module_logger.info("Genbank initiated with tool : %s", tool)
|
|
62
|
+
# _module_logger.info("#### Genbank ititiation ####")
|
|
63
|
+
# _module_logger.info("Genbank initiated with email: %s", users_email)
|
|
64
|
+
# _module_logger.info("Genbank initiated with tool : %s", tool)
|
|
62
65
|
|
|
63
66
|
if users_email == "someone@example.com":
|
|
64
|
-
raise ValueError(
|
|
67
|
+
raise ValueError(
|
|
68
|
+
"you have to set your email address in order to download from Genbank"
|
|
69
|
+
)
|
|
65
70
|
self.email = users_email
|
|
66
71
|
self.tool = tool
|
|
67
72
|
|
|
@@ -69,7 +74,7 @@ class Genbank:
|
|
|
69
74
|
"""This method returns a short representation containing the email used to initiate."""
|
|
70
75
|
return "GenbankConnection({})".format(self.email)
|
|
71
76
|
|
|
72
|
-
@_memorize("pydna.genbank.Genbank.nucleotide")
|
|
77
|
+
# @_memorize("pydna.genbank.Genbank.nucleotide")
|
|
73
78
|
def nucleotide(
|
|
74
79
|
self,
|
|
75
80
|
item: str,
|
|
@@ -127,7 +132,9 @@ class Genbank:
|
|
|
127
132
|
(1, _re.search(r"(REGION:\s(?P<start>\d+)\.\.(?P<stop>\d+))", item)),
|
|
128
133
|
(
|
|
129
134
|
2,
|
|
130
|
-
_re.search(
|
|
135
|
+
_re.search(
|
|
136
|
+
r"(REGION: complement\((?P<start>\d+)\.\.(?P<stop>\d+)\))", item
|
|
137
|
+
),
|
|
131
138
|
),
|
|
132
139
|
(1, _re.search(r"(:|\s)(?P<start>\d+)-(?P<stop>\d+)", item)),
|
|
133
140
|
(2, _re.search(r"(:|\s)c(?P<start>\d+)-(?P<stop>\d+)", item)),
|
|
@@ -143,21 +150,23 @@ class Genbank:
|
|
|
143
150
|
|
|
144
151
|
if strand not in [1, 2]:
|
|
145
152
|
try:
|
|
146
|
-
strand = {"c": 2, "crick": 2, "antisense": 2, "2": 2, "-": 2, "-1": 2}[
|
|
153
|
+
strand = {"c": 2, "crick": 2, "antisense": 2, "2": 2, "-": 2, "-1": 2}[
|
|
154
|
+
strand.lower()
|
|
155
|
+
]
|
|
147
156
|
except (KeyError, AttributeError):
|
|
148
157
|
strand = 1
|
|
149
158
|
|
|
150
|
-
_module_logger.info("#### Genbank download ####")
|
|
151
|
-
_module_logger.info("item %s", item)
|
|
152
|
-
_module_logger.info("start %s", seq_start)
|
|
153
|
-
_module_logger.info("stop %s", seq_stop)
|
|
159
|
+
# _module_logger.info("#### Genbank download ####")
|
|
160
|
+
# _module_logger.info("item %s", item)
|
|
161
|
+
# _module_logger.info("start %s", seq_start)
|
|
162
|
+
# _module_logger.info("stop %s", seq_stop)
|
|
154
163
|
|
|
155
|
-
_module_logger.info("strand %s", str(strand))
|
|
164
|
+
# _module_logger.info("strand %s", str(strand))
|
|
156
165
|
|
|
157
166
|
_Entrez.email = self.email
|
|
158
167
|
_Entrez.tool = self.tool
|
|
159
168
|
|
|
160
|
-
_module_logger.info("Entrez.email %s", self.email)
|
|
169
|
+
# _module_logger.info("Entrez.email %s", self.email)
|
|
161
170
|
text = _Entrez.efetch(
|
|
162
171
|
db="nuccore",
|
|
163
172
|
id=item,
|
|
@@ -168,9 +177,11 @@ class Genbank:
|
|
|
168
177
|
retmode="text",
|
|
169
178
|
).read()
|
|
170
179
|
|
|
171
|
-
_module_logger.info("text[:160] %s", text[:160])
|
|
180
|
+
# _module_logger.info("text[:160] %s", text[:160])
|
|
172
181
|
|
|
173
|
-
return _GenbankRecord(
|
|
182
|
+
return _GenbankRecord(
|
|
183
|
+
_read(text), item=item, start=seq_start, stop=seq_stop, strand=strand
|
|
184
|
+
)
|
|
174
185
|
|
|
175
186
|
|
|
176
187
|
def genbank(accession: str = "CS570233.1", *args, **kwargs) -> _GenbankRecord:
|
|
@@ -219,18 +230,8 @@ def genbank(accession: str = "CS570233.1", *args, **kwargs) -> _GenbankRecord:
|
|
|
219
230
|
|
|
220
231
|
"""
|
|
221
232
|
email = _os.getenv("pydna_email")
|
|
222
|
-
_module_logger.info("#### genbank function called ####")
|
|
223
|
-
_module_logger.info("email %s", email)
|
|
224
|
-
_module_logger.info("accession %s", email)
|
|
233
|
+
# _module_logger.info("#### genbank function called ####")
|
|
234
|
+
# _module_logger.info("email %s", email)
|
|
235
|
+
# _module_logger.info("accession %s", email)
|
|
225
236
|
gb = Genbank(email)
|
|
226
237
|
return gb.nucleotide(accession, *args, **kwargs)
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
if __name__ == "__main__":
|
|
230
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
231
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
232
|
-
import doctest
|
|
233
|
-
|
|
234
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
235
|
-
_os.environ["pydna_cached_funcs"] = cached
|
|
236
|
-
pass
|
pydna/genbankfile.py
CHANGED
|
@@ -20,11 +20,17 @@ class GenbankFile(_Dseqrecord):
|
|
|
20
20
|
|
|
21
21
|
def __repr__(self):
|
|
22
22
|
"""returns a short string representation of the object"""
|
|
23
|
-
return "File({})({}{})".format(
|
|
23
|
+
return "File({})({}{})".format(
|
|
24
|
+
self.id, {True: "-", False: "o"}[not self.circular], len(self)
|
|
25
|
+
)
|
|
24
26
|
|
|
25
27
|
def _repr_pretty_(self, p, cycle):
|
|
26
28
|
"""returns a short string representation of the object"""
|
|
27
|
-
p.text(
|
|
29
|
+
p.text(
|
|
30
|
+
"File({})({}{})".format(
|
|
31
|
+
self.id, {True: "-", False: "o"}[not self.circular], len(self)
|
|
32
|
+
)
|
|
33
|
+
)
|
|
28
34
|
|
|
29
35
|
def _repr_html_(self):
|
|
30
36
|
return "<a href='{path}' target='_blank'>{path}</a><br>".format(path=self.path)
|
|
@@ -34,14 +40,3 @@ class GenbankFile(_Dseqrecord):
|
|
|
34
40
|
return answer
|
|
35
41
|
|
|
36
42
|
rc = reverse_complement
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
if __name__ == "__main__":
|
|
40
|
-
import os as _os
|
|
41
|
-
|
|
42
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
43
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
44
|
-
import doctest
|
|
45
|
-
|
|
46
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
47
|
-
_os.environ["pydna_cached_funcs"] = cached
|