pydna 5.5.1__py3-none-any.whl → 5.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +116 -134
- pydna/_pretty.py +2 -14
- pydna/all.py +10 -20
- pydna/amplicon.py +25 -20
- pydna/amplify.py +46 -26
- pydna/assembly.py +50 -27
- pydna/assembly2.py +1902 -0
- pydna/common_sub_strings.py +2 -12
- pydna/contig.py +39 -22
- pydna/crispr.py +8 -13
- pydna/design.py +89 -59
- pydna/download.py +10 -18
- pydna/dseq.py +119 -59
- pydna/dseqrecord.py +88 -45
- pydna/fakeseq.py +0 -11
- pydna/fusionpcr.py +3 -1
- pydna/gateway.py +2 -2
- pydna/gel.py +8 -13
- pydna/genbank.py +33 -32
- pydna/genbankfile.py +8 -13
- pydna/genbankfixer.py +41 -28
- pydna/genbankrecord.py +11 -14
- pydna/goldengate.py +2 -2
- pydna/ladders.py +4 -11
- pydna/ligate.py +8 -14
- pydna/parsers.py +5 -12
- pydna/primer.py +3 -12
- pydna/readers.py +0 -11
- pydna/seq.py +21 -18
- pydna/seqrecord.py +19 -19
- pydna/sequence_picker.py +3 -12
- pydna/tm.py +13 -15
- pydna/types.py +41 -0
- pydna/utils.py +173 -58
- {pydna-5.5.1.dist-info → pydna-5.5.2.dist-info}/METADATA +17 -3
- pydna-5.5.2.dist-info/RECORD +43 -0
- pydna/editor.py +0 -119
- pydna/myenzymes.py +0 -51
- pydna/myprimers.py +0 -219
- pydna-5.5.1.dist-info/RECORD +0 -44
- {pydna-5.5.1.dist-info → pydna-5.5.2.dist-info}/LICENSE.txt +0 -0
- {pydna-5.5.1.dist-info → pydna-5.5.2.dist-info}/WHEEL +0 -0
pydna/dseq.py
CHANGED
|
@@ -36,23 +36,10 @@ from pydna.common_sub_strings import common_sub_strings as _common_sub_strings
|
|
|
36
36
|
from Bio.Restriction import RestrictionBatch as _RestrictionBatch
|
|
37
37
|
from Bio.Restriction import CommOnly
|
|
38
38
|
|
|
39
|
-
from typing import (
|
|
40
|
-
TYPE_CHECKING,
|
|
41
|
-
List as _List,
|
|
42
|
-
Tuple as _Tuple,
|
|
43
|
-
Union as _Union,
|
|
44
|
-
TypeVar as _TypeVar,
|
|
45
|
-
Iterable as _Iterable,
|
|
46
|
-
)
|
|
47
39
|
|
|
48
|
-
|
|
49
|
-
from Bio.Restriction import AbstractCut as _AbstractCut
|
|
40
|
+
from .types import DseqType, EnzymesType, CutSiteType
|
|
50
41
|
|
|
51
|
-
|
|
52
|
-
# To represent any subclass of Dseq
|
|
53
|
-
DseqType = _TypeVar("DseqType", bound="Dseq")
|
|
54
|
-
EnzymesType = _TypeVar("EnzymesType", _RestrictionBatch, _Iterable["_AbstractCut"], "_AbstractCut")
|
|
55
|
-
CutSiteType = _Tuple[_Tuple[int, int], _Union["_AbstractCut", None]]
|
|
42
|
+
from typing import List as _List, Tuple as _Tuple, Union as _Union
|
|
56
43
|
|
|
57
44
|
|
|
58
45
|
class Dseq(_Seq):
|
|
@@ -338,7 +325,9 @@ class Dseq(_Seq):
|
|
|
338
325
|
int(_math.log(len(watson)) / _math.log(4)),
|
|
339
326
|
)
|
|
340
327
|
if len(olaps) == 0:
|
|
341
|
-
raise ValueError(
|
|
328
|
+
raise ValueError(
|
|
329
|
+
"Could not anneal the two strands." " Please provide ovhg value"
|
|
330
|
+
)
|
|
342
331
|
|
|
343
332
|
# We extract the positions and length of the first (longest) overlap, since
|
|
344
333
|
# common_sub_strings sorts the overlaps by length.
|
|
@@ -346,7 +335,10 @@ class Dseq(_Seq):
|
|
|
346
335
|
|
|
347
336
|
# We see if there is another overlap of the same length
|
|
348
337
|
if any(olap[2] >= longest_olap_length for olap in olaps[1:]):
|
|
349
|
-
raise ValueError(
|
|
338
|
+
raise ValueError(
|
|
339
|
+
"More than one way of annealing the"
|
|
340
|
+
" strands. Please provide ovhg value"
|
|
341
|
+
)
|
|
350
342
|
|
|
351
343
|
ovhg = pos_crick - pos_watson
|
|
352
344
|
|
|
@@ -354,7 +346,12 @@ class Dseq(_Seq):
|
|
|
354
346
|
asn = (-ovhg * " ") + _pretty_str(_rc(crick))
|
|
355
347
|
|
|
356
348
|
self._data = bytes(
|
|
357
|
-
"".join(
|
|
349
|
+
"".join(
|
|
350
|
+
[
|
|
351
|
+
a.strip() or b.strip()
|
|
352
|
+
for a, b in _itertools.zip_longest(sns, asn, fillvalue=" ")
|
|
353
|
+
]
|
|
354
|
+
),
|
|
358
355
|
encoding="ASCII",
|
|
359
356
|
)
|
|
360
357
|
|
|
@@ -369,10 +366,14 @@ class Dseq(_Seq):
|
|
|
369
366
|
)
|
|
370
367
|
elif ovhg > 0:
|
|
371
368
|
if ovhg + len(watson) > len(crick):
|
|
372
|
-
self._data = bytes(
|
|
369
|
+
self._data = bytes(
|
|
370
|
+
_rc(crick[-ovhg:]) + watson, encoding="ASCII"
|
|
371
|
+
)
|
|
373
372
|
else:
|
|
374
373
|
self._data = bytes(
|
|
375
|
-
_rc(crick[-ovhg:])
|
|
374
|
+
_rc(crick[-ovhg:])
|
|
375
|
+
+ watson
|
|
376
|
+
+ _rc(crick[: len(crick) - ovhg - len(watson)]),
|
|
376
377
|
encoding="ASCII",
|
|
377
378
|
)
|
|
378
379
|
else: # ovhg < 0
|
|
@@ -409,7 +410,11 @@ class Dseq(_Seq):
|
|
|
409
410
|
obj.pos = pos
|
|
410
411
|
wb = bytes(watson, encoding="ASCII")
|
|
411
412
|
cb = bytes(crick, encoding="ASCII")
|
|
412
|
-
obj._data =
|
|
413
|
+
obj._data = (
|
|
414
|
+
_rc(cb[-max(0, ovhg) or len(cb) :])
|
|
415
|
+
+ wb
|
|
416
|
+
+ _rc(cb[: max(0, len(cb) - ovhg - len(wb))])
|
|
417
|
+
)
|
|
413
418
|
return obj
|
|
414
419
|
|
|
415
420
|
@classmethod
|
|
@@ -445,11 +450,17 @@ class Dseq(_Seq):
|
|
|
445
450
|
obj.pos = 0
|
|
446
451
|
wb = bytes(watson, encoding="ASCII")
|
|
447
452
|
cb = bytes(crick, encoding="ASCII")
|
|
448
|
-
obj._data =
|
|
453
|
+
obj._data = (
|
|
454
|
+
_rc(cb[-max(0, ovhg) or len(cb) :])
|
|
455
|
+
+ wb
|
|
456
|
+
+ _rc(cb[: max(0, len(cb) - ovhg - len(wb))])
|
|
457
|
+
)
|
|
449
458
|
return obj
|
|
450
459
|
|
|
451
460
|
@classmethod
|
|
452
|
-
def from_full_sequence_and_overhangs(
|
|
461
|
+
def from_full_sequence_and_overhangs(
|
|
462
|
+
cls, full_sequence: str, crick_ovhg: int, watson_ovhg: int
|
|
463
|
+
):
|
|
453
464
|
"""Create a linear Dseq object from a full sequence and the 3' overhangs of each strand.
|
|
454
465
|
|
|
455
466
|
The order of the parameters is like this because the 3' overhang of the crick strand is the one
|
|
@@ -614,7 +625,9 @@ class Dseq(_Seq):
|
|
|
614
625
|
pos=self.pos,
|
|
615
626
|
)
|
|
616
627
|
|
|
617
|
-
def find(
|
|
628
|
+
def find(
|
|
629
|
+
self, sub: _Union[_SeqAbstractBaseClass, str, bytes], start=0, end=_sys.maxsize
|
|
630
|
+
) -> int:
|
|
618
631
|
"""This method behaves like the python string method of the same name.
|
|
619
632
|
|
|
620
633
|
Returns an integer, the index of the first occurrence of substring
|
|
@@ -667,7 +680,9 @@ class Dseq(_Seq):
|
|
|
667
680
|
sns = (self.ovhg * " " + self.watson + x * " ")[sl]
|
|
668
681
|
asn = (-self.ovhg * " " + self.crick[::-1] + -x * " ")[sl]
|
|
669
682
|
|
|
670
|
-
ovhg = max(
|
|
683
|
+
ovhg = max(
|
|
684
|
+
(len(sns) - len(sns.lstrip()), -len(asn) + len(asn.lstrip())), key=abs
|
|
685
|
+
)
|
|
671
686
|
|
|
672
687
|
return Dseq(
|
|
673
688
|
sns.strip(),
|
|
@@ -694,8 +709,14 @@ class Dseq(_Seq):
|
|
|
694
709
|
start = sl.start
|
|
695
710
|
stop = sl.stop
|
|
696
711
|
|
|
697
|
-
w =
|
|
698
|
-
|
|
712
|
+
w = (
|
|
713
|
+
self.watson[(start or len(self)) :: stp]
|
|
714
|
+
+ self.watson[: (stop or 0) : stp]
|
|
715
|
+
)
|
|
716
|
+
c = (
|
|
717
|
+
self.crick[len(self) - stop :: stp]
|
|
718
|
+
+ self.crick[: len(self) - start : stp]
|
|
719
|
+
)
|
|
699
720
|
|
|
700
721
|
return Dseq(w, c, ovhg=0) # , linear=True)
|
|
701
722
|
|
|
@@ -772,7 +793,9 @@ class Dseq(_Seq):
|
|
|
772
793
|
b = "{}..{}".format(b[:4], b[-4:])
|
|
773
794
|
e = "{}..{}".format(e[:4], e[-4:])
|
|
774
795
|
|
|
775
|
-
return _pretty_str(
|
|
796
|
+
return _pretty_str(
|
|
797
|
+
"{klass}({top}{size})\n" "{a}{b}{c}\n" "{d}{e}{f}"
|
|
798
|
+
).format(
|
|
776
799
|
klass=self.__class__.__name__,
|
|
777
800
|
top={False: "-", True: "o"}[self.circular],
|
|
778
801
|
size=len(self),
|
|
@@ -890,7 +913,9 @@ class Dseq(_Seq):
|
|
|
890
913
|
# assert len(nseq.crick) == len(nseq.watson)
|
|
891
914
|
return nseq
|
|
892
915
|
else:
|
|
893
|
-
raise TypeError(
|
|
916
|
+
raise TypeError(
|
|
917
|
+
"DNA cannot be circularized.\n" "5' and 3' sticky ends not compatible!"
|
|
918
|
+
)
|
|
894
919
|
|
|
895
920
|
def tolinear(self: DseqType) -> DseqType: # pragma: no cover
|
|
896
921
|
"""Returns a blunt, linear copy of a circular Dseq object. This can
|
|
@@ -919,7 +944,9 @@ class Dseq(_Seq):
|
|
|
919
944
|
from pydna import _PydnaDeprecationWarning
|
|
920
945
|
|
|
921
946
|
_warnings.warn(
|
|
922
|
-
"tolinear method is obsolete; "
|
|
947
|
+
"tolinear method is obsolete; "
|
|
948
|
+
"please use obj[:] "
|
|
949
|
+
"instead of obj.tolinear().",
|
|
923
950
|
_PydnaDeprecationWarning,
|
|
924
951
|
)
|
|
925
952
|
if not self.circular:
|
|
@@ -1060,7 +1087,9 @@ class Dseq(_Seq):
|
|
|
1060
1087
|
other_type, other_tail = other.five_prime_end()
|
|
1061
1088
|
|
|
1062
1089
|
if self_type == other_type and str(self_tail) == str(_rc(other_tail)):
|
|
1063
|
-
answer = Dseq.quick(
|
|
1090
|
+
answer = Dseq.quick(
|
|
1091
|
+
self.watson + other.watson, other.crick + self.crick, self.ovhg
|
|
1092
|
+
)
|
|
1064
1093
|
elif not self:
|
|
1065
1094
|
answer = _copy.deepcopy(other)
|
|
1066
1095
|
elif not other:
|
|
@@ -1071,7 +1100,11 @@ class Dseq(_Seq):
|
|
|
1071
1100
|
|
|
1072
1101
|
def __mul__(self: DseqType, number: int) -> DseqType:
|
|
1073
1102
|
if not isinstance(number, int):
|
|
1074
|
-
raise TypeError(
|
|
1103
|
+
raise TypeError(
|
|
1104
|
+
"TypeError: can't multiply Dseq by non-int of type {}".format(
|
|
1105
|
+
type(number)
|
|
1106
|
+
)
|
|
1107
|
+
)
|
|
1075
1108
|
if number <= 0:
|
|
1076
1109
|
return self.__class__("")
|
|
1077
1110
|
new = _copy.deepcopy(self)
|
|
@@ -1169,8 +1202,12 @@ class Dseq(_Seq):
|
|
|
1169
1202
|
def transcribe(self) -> _Seq:
|
|
1170
1203
|
return _Seq(self.watson).transcribe()
|
|
1171
1204
|
|
|
1172
|
-
def translate(
|
|
1173
|
-
|
|
1205
|
+
def translate(
|
|
1206
|
+
self, table="Standard", stop_symbol="*", to_stop=False, cds=False, gap="-"
|
|
1207
|
+
) -> _Seq:
|
|
1208
|
+
return _Seq(
|
|
1209
|
+
_translate_str(str(self), table, stop_symbol, to_stop, cds, gap=gap)
|
|
1210
|
+
)
|
|
1174
1211
|
|
|
1175
1212
|
def mung(self) -> "Dseq":
|
|
1176
1213
|
"""
|
|
@@ -1213,7 +1250,11 @@ class Dseq(_Seq):
|
|
|
1213
1250
|
|
|
1214
1251
|
|
|
1215
1252
|
"""
|
|
1216
|
-
return Dseq(
|
|
1253
|
+
return Dseq(
|
|
1254
|
+
self.watson[
|
|
1255
|
+
max(0, -self.ovhg) : min(len(self.watson), len(self.crick) - self.ovhg)
|
|
1256
|
+
]
|
|
1257
|
+
)
|
|
1217
1258
|
|
|
1218
1259
|
def T4(self, nucleotides=None) -> "Dseq":
|
|
1219
1260
|
"""Fill in five prime protruding ends and chewing back
|
|
@@ -1309,7 +1350,9 @@ class Dseq(_Seq):
|
|
|
1309
1350
|
d.crick = d.crick[n:]
|
|
1310
1351
|
return d
|
|
1311
1352
|
|
|
1312
|
-
def no_cutters(
|
|
1353
|
+
def no_cutters(
|
|
1354
|
+
self, batch: _Union[_RestrictionBatch, None] = None
|
|
1355
|
+
) -> _RestrictionBatch:
|
|
1313
1356
|
"""Enzymes in a RestrictionBatch not cutting sequence."""
|
|
1314
1357
|
if batch is None:
|
|
1315
1358
|
batch = CommOnly
|
|
@@ -1317,7 +1360,9 @@ class Dseq(_Seq):
|
|
|
1317
1360
|
ncut = {enz: sitelist for (enz, sitelist) in ana.items() if not sitelist}
|
|
1318
1361
|
return _RestrictionBatch(ncut)
|
|
1319
1362
|
|
|
1320
|
-
def unique_cutters(
|
|
1363
|
+
def unique_cutters(
|
|
1364
|
+
self, batch: _Union[_RestrictionBatch, None] = None
|
|
1365
|
+
) -> _RestrictionBatch:
|
|
1321
1366
|
"""Enzymes in a RestrictionBatch cutting sequence once."""
|
|
1322
1367
|
if batch is None:
|
|
1323
1368
|
batch = CommOnly
|
|
@@ -1325,13 +1370,17 @@ class Dseq(_Seq):
|
|
|
1325
1370
|
|
|
1326
1371
|
once_cutters = unique_cutters # alias for unique_cutters
|
|
1327
1372
|
|
|
1328
|
-
def twice_cutters(
|
|
1373
|
+
def twice_cutters(
|
|
1374
|
+
self, batch: _Union[_RestrictionBatch, None] = None
|
|
1375
|
+
) -> _RestrictionBatch:
|
|
1329
1376
|
"""Enzymes in a RestrictionBatch cutting sequence twice."""
|
|
1330
1377
|
if batch is None:
|
|
1331
1378
|
batch = CommOnly
|
|
1332
1379
|
return self.n_cutters(n=2, batch=batch)
|
|
1333
1380
|
|
|
1334
|
-
def n_cutters(
|
|
1381
|
+
def n_cutters(
|
|
1382
|
+
self, n=3, batch: _Union[_RestrictionBatch, None] = None
|
|
1383
|
+
) -> _RestrictionBatch:
|
|
1335
1384
|
"""Enzymes in a RestrictionBatch cutting n times."""
|
|
1336
1385
|
if batch is None:
|
|
1337
1386
|
batch = CommOnly
|
|
@@ -1339,7 +1388,9 @@ class Dseq(_Seq):
|
|
|
1339
1388
|
ncut = {enz: sitelist for (enz, sitelist) in ana.items() if len(sitelist) == n}
|
|
1340
1389
|
return _RestrictionBatch(ncut)
|
|
1341
1390
|
|
|
1342
|
-
def cutters(
|
|
1391
|
+
def cutters(
|
|
1392
|
+
self, batch: _Union[_RestrictionBatch, None] = None
|
|
1393
|
+
) -> _RestrictionBatch:
|
|
1343
1394
|
"""Enzymes in a RestrictionBatch cutting sequence at least once."""
|
|
1344
1395
|
if batch is None:
|
|
1345
1396
|
batch = CommOnly
|
|
@@ -1350,7 +1401,9 @@ class Dseq(_Seq):
|
|
|
1350
1401
|
def seguid(self) -> str:
|
|
1351
1402
|
"""SEGUID checksum for the sequence."""
|
|
1352
1403
|
if self.circular:
|
|
1353
|
-
cs = _cdseguid(
|
|
1404
|
+
cs = _cdseguid(
|
|
1405
|
+
self.watson.upper(), self.crick.upper(), alphabet="{DNA-extended}"
|
|
1406
|
+
)
|
|
1354
1407
|
else:
|
|
1355
1408
|
"""docstring."""
|
|
1356
1409
|
w = f"{self.ovhg * '-'}{self.watson}{'-' * (-self.ovhg + len(self.crick) - len(self.watson))}".upper()
|
|
@@ -1396,7 +1449,9 @@ class Dseq(_Seq):
|
|
|
1396
1449
|
>>> a.isblunt()
|
|
1397
1450
|
False
|
|
1398
1451
|
"""
|
|
1399
|
-
return
|
|
1452
|
+
return (
|
|
1453
|
+
self.ovhg == 0 and len(self.watson) == len(self.crick) and not self.circular
|
|
1454
|
+
)
|
|
1400
1455
|
|
|
1401
1456
|
def cas9(self, RNA: str) -> _Tuple[slice, ...]:
|
|
1402
1457
|
"""docstring."""
|
|
@@ -1492,7 +1547,11 @@ class Dseq(_Seq):
|
|
|
1492
1547
|
if self.circular:
|
|
1493
1548
|
end_of_recognition_site %= len(self)
|
|
1494
1549
|
recognition_site = self[start_of_recognition_site:end_of_recognition_site]
|
|
1495
|
-
if
|
|
1550
|
+
if (
|
|
1551
|
+
len(recognition_site) == 0
|
|
1552
|
+
or recognition_site.ovhg != 0
|
|
1553
|
+
or recognition_site.watson_ovhg() != 0
|
|
1554
|
+
):
|
|
1496
1555
|
if enz is None or enz.scd5 is None:
|
|
1497
1556
|
return False
|
|
1498
1557
|
else:
|
|
@@ -1503,9 +1562,15 @@ class Dseq(_Seq):
|
|
|
1503
1562
|
end_of_recognition_site = start_of_recognition_site + enz.size
|
|
1504
1563
|
if self.circular:
|
|
1505
1564
|
end_of_recognition_site %= len(self)
|
|
1506
|
-
recognition_site = self[
|
|
1507
|
-
|
|
1508
|
-
|
|
1565
|
+
recognition_site = self[
|
|
1566
|
+
start_of_recognition_site:end_of_recognition_site
|
|
1567
|
+
]
|
|
1568
|
+
|
|
1569
|
+
if (
|
|
1570
|
+
len(recognition_site) == 0
|
|
1571
|
+
or recognition_site.ovhg != 0
|
|
1572
|
+
or recognition_site.watson_ovhg() != 0
|
|
1573
|
+
):
|
|
1509
1574
|
return False
|
|
1510
1575
|
|
|
1511
1576
|
return True
|
|
@@ -1611,7 +1676,9 @@ class Dseq(_Seq):
|
|
|
1611
1676
|
return len(self) + self.watson_ovhg(), len(self)
|
|
1612
1677
|
return len(self), len(self) - self.watson_ovhg()
|
|
1613
1678
|
|
|
1614
|
-
def get_cut_parameters(
|
|
1679
|
+
def get_cut_parameters(
|
|
1680
|
+
self, cut: _Union[CutSiteType, None], is_left: bool
|
|
1681
|
+
) -> _Tuple[int, int, int]:
|
|
1615
1682
|
"""For a given cut expressed as ((cut_watson, ovhg), enz), returns
|
|
1616
1683
|
a tuple (cut_watson, cut_crick, ovhg).
|
|
1617
1684
|
|
|
@@ -1701,7 +1768,11 @@ class Dseq(_Seq):
|
|
|
1701
1768
|
return Dseq(
|
|
1702
1769
|
str(self[left_watson:right_watson]),
|
|
1703
1770
|
# The line below could be easier to understand as _rc(str(self[left_crick:right_crick])), but it does not preserve the case
|
|
1704
|
-
str(
|
|
1771
|
+
str(
|
|
1772
|
+
self.reverse_complement()[
|
|
1773
|
+
len(self) - right_crick : len(self) - left_crick
|
|
1774
|
+
]
|
|
1775
|
+
),
|
|
1705
1776
|
ovhg=ovhg_left,
|
|
1706
1777
|
)
|
|
1707
1778
|
|
|
@@ -1757,14 +1828,3 @@ class Dseq(_Seq):
|
|
|
1757
1828
|
cutsites.append(cutsites[0])
|
|
1758
1829
|
|
|
1759
1830
|
return list(zip(cutsites, cutsites[1:]))
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
if __name__ == "__main__":
|
|
1763
|
-
import os as _os
|
|
1764
|
-
|
|
1765
|
-
cached = _os.getenv("pydna_cached_funcs", "")
|
|
1766
|
-
_os.environ["pydna_cached_funcs"] = ""
|
|
1767
|
-
import doctest
|
|
1768
|
-
|
|
1769
|
-
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
|
|
1770
|
-
_os.environ["pydna_cached_funcs"] = cached
|