pydna 5.5.1__py3-none-any.whl → 5.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydna/dseq.py CHANGED
@@ -36,23 +36,10 @@ from pydna.common_sub_strings import common_sub_strings as _common_sub_strings
36
36
  from Bio.Restriction import RestrictionBatch as _RestrictionBatch
37
37
  from Bio.Restriction import CommOnly
38
38
 
39
- from typing import (
40
- TYPE_CHECKING,
41
- List as _List,
42
- Tuple as _Tuple,
43
- Union as _Union,
44
- TypeVar as _TypeVar,
45
- Iterable as _Iterable,
46
- )
47
39
 
48
- if TYPE_CHECKING:
49
- from Bio.Restriction import AbstractCut as _AbstractCut
40
+ from .types import DseqType, EnzymesType, CutSiteType
50
41
 
51
-
52
- # To represent any subclass of Dseq
53
- DseqType = _TypeVar("DseqType", bound="Dseq")
54
- EnzymesType = _TypeVar("EnzymesType", _RestrictionBatch, _Iterable["_AbstractCut"], "_AbstractCut")
55
- CutSiteType = _Tuple[_Tuple[int, int], _Union["_AbstractCut", None]]
42
+ from typing import List as _List, Tuple as _Tuple, Union as _Union
56
43
 
57
44
 
58
45
  class Dseq(_Seq):
@@ -338,7 +325,9 @@ class Dseq(_Seq):
338
325
  int(_math.log(len(watson)) / _math.log(4)),
339
326
  )
340
327
  if len(olaps) == 0:
341
- raise ValueError("Could not anneal the two strands." " Please provide ovhg value")
328
+ raise ValueError(
329
+ "Could not anneal the two strands." " Please provide ovhg value"
330
+ )
342
331
 
343
332
  # We extract the positions and length of the first (longest) overlap, since
344
333
  # common_sub_strings sorts the overlaps by length.
@@ -346,7 +335,10 @@ class Dseq(_Seq):
346
335
 
347
336
  # We see if there is another overlap of the same length
348
337
  if any(olap[2] >= longest_olap_length for olap in olaps[1:]):
349
- raise ValueError("More than one way of annealing the" " strands. Please provide ovhg value")
338
+ raise ValueError(
339
+ "More than one way of annealing the"
340
+ " strands. Please provide ovhg value"
341
+ )
350
342
 
351
343
  ovhg = pos_crick - pos_watson
352
344
 
@@ -354,7 +346,12 @@ class Dseq(_Seq):
354
346
  asn = (-ovhg * " ") + _pretty_str(_rc(crick))
355
347
 
356
348
  self._data = bytes(
357
- "".join([a.strip() or b.strip() for a, b in _itertools.zip_longest(sns, asn, fillvalue=" ")]),
349
+ "".join(
350
+ [
351
+ a.strip() or b.strip()
352
+ for a, b in _itertools.zip_longest(sns, asn, fillvalue=" ")
353
+ ]
354
+ ),
358
355
  encoding="ASCII",
359
356
  )
360
357
 
@@ -369,10 +366,14 @@ class Dseq(_Seq):
369
366
  )
370
367
  elif ovhg > 0:
371
368
  if ovhg + len(watson) > len(crick):
372
- self._data = bytes(_rc(crick[-ovhg:]) + watson, encoding="ASCII")
369
+ self._data = bytes(
370
+ _rc(crick[-ovhg:]) + watson, encoding="ASCII"
371
+ )
373
372
  else:
374
373
  self._data = bytes(
375
- _rc(crick[-ovhg:]) + watson + _rc(crick[: len(crick) - ovhg - len(watson)]),
374
+ _rc(crick[-ovhg:])
375
+ + watson
376
+ + _rc(crick[: len(crick) - ovhg - len(watson)]),
376
377
  encoding="ASCII",
377
378
  )
378
379
  else: # ovhg < 0
@@ -409,7 +410,11 @@ class Dseq(_Seq):
409
410
  obj.pos = pos
410
411
  wb = bytes(watson, encoding="ASCII")
411
412
  cb = bytes(crick, encoding="ASCII")
412
- obj._data = _rc(cb[-max(0, ovhg) or len(cb) :]) + wb + _rc(cb[: max(0, len(cb) - ovhg - len(wb))])
413
+ obj._data = (
414
+ _rc(cb[-max(0, ovhg) or len(cb) :])
415
+ + wb
416
+ + _rc(cb[: max(0, len(cb) - ovhg - len(wb))])
417
+ )
413
418
  return obj
414
419
 
415
420
  @classmethod
@@ -445,11 +450,17 @@ class Dseq(_Seq):
445
450
  obj.pos = 0
446
451
  wb = bytes(watson, encoding="ASCII")
447
452
  cb = bytes(crick, encoding="ASCII")
448
- obj._data = _rc(cb[-max(0, ovhg) or len(cb) :]) + wb + _rc(cb[: max(0, len(cb) - ovhg - len(wb))])
453
+ obj._data = (
454
+ _rc(cb[-max(0, ovhg) or len(cb) :])
455
+ + wb
456
+ + _rc(cb[: max(0, len(cb) - ovhg - len(wb))])
457
+ )
449
458
  return obj
450
459
 
451
460
  @classmethod
452
- def from_full_sequence_and_overhangs(cls, full_sequence: str, crick_ovhg: int, watson_ovhg: int):
461
+ def from_full_sequence_and_overhangs(
462
+ cls, full_sequence: str, crick_ovhg: int, watson_ovhg: int
463
+ ):
453
464
  """Create a linear Dseq object from a full sequence and the 3' overhangs of each strand.
454
465
 
455
466
  The order of the parameters is like this because the 3' overhang of the crick strand is the one
@@ -614,7 +625,9 @@ class Dseq(_Seq):
614
625
  pos=self.pos,
615
626
  )
616
627
 
617
- def find(self, sub: _Union[_SeqAbstractBaseClass, str, bytes], start=0, end=_sys.maxsize) -> int:
628
+ def find(
629
+ self, sub: _Union[_SeqAbstractBaseClass, str, bytes], start=0, end=_sys.maxsize
630
+ ) -> int:
618
631
  """This method behaves like the python string method of the same name.
619
632
 
620
633
  Returns an integer, the index of the first occurrence of substring
@@ -667,7 +680,9 @@ class Dseq(_Seq):
667
680
  sns = (self.ovhg * " " + self.watson + x * " ")[sl]
668
681
  asn = (-self.ovhg * " " + self.crick[::-1] + -x * " ")[sl]
669
682
 
670
- ovhg = max((len(sns) - len(sns.lstrip()), -len(asn) + len(asn.lstrip())), key=abs)
683
+ ovhg = max(
684
+ (len(sns) - len(sns.lstrip()), -len(asn) + len(asn.lstrip())), key=abs
685
+ )
671
686
 
672
687
  return Dseq(
673
688
  sns.strip(),
@@ -694,8 +709,14 @@ class Dseq(_Seq):
694
709
  start = sl.start
695
710
  stop = sl.stop
696
711
 
697
- w = self.watson[(start or len(self)) :: stp] + self.watson[: (stop or 0) : stp]
698
- c = self.crick[len(self) - stop :: stp] + self.crick[: len(self) - start : stp]
712
+ w = (
713
+ self.watson[(start or len(self)) :: stp]
714
+ + self.watson[: (stop or 0) : stp]
715
+ )
716
+ c = (
717
+ self.crick[len(self) - stop :: stp]
718
+ + self.crick[: len(self) - start : stp]
719
+ )
699
720
 
700
721
  return Dseq(w, c, ovhg=0) # , linear=True)
701
722
 
@@ -772,7 +793,9 @@ class Dseq(_Seq):
772
793
  b = "{}..{}".format(b[:4], b[-4:])
773
794
  e = "{}..{}".format(e[:4], e[-4:])
774
795
 
775
- return _pretty_str("{klass}({top}{size})\n" "{a}{b}{c}\n" "{d}{e}{f}").format(
796
+ return _pretty_str(
797
+ "{klass}({top}{size})\n" "{a}{b}{c}\n" "{d}{e}{f}"
798
+ ).format(
776
799
  klass=self.__class__.__name__,
777
800
  top={False: "-", True: "o"}[self.circular],
778
801
  size=len(self),
@@ -890,7 +913,9 @@ class Dseq(_Seq):
890
913
  # assert len(nseq.crick) == len(nseq.watson)
891
914
  return nseq
892
915
  else:
893
- raise TypeError("DNA cannot be circularized.\n" "5' and 3' sticky ends not compatible!")
916
+ raise TypeError(
917
+ "DNA cannot be circularized.\n" "5' and 3' sticky ends not compatible!"
918
+ )
894
919
 
895
920
  def tolinear(self: DseqType) -> DseqType: # pragma: no cover
896
921
  """Returns a blunt, linear copy of a circular Dseq object. This can
@@ -919,7 +944,9 @@ class Dseq(_Seq):
919
944
  from pydna import _PydnaDeprecationWarning
920
945
 
921
946
  _warnings.warn(
922
- "tolinear method is obsolete; " "please use obj[:] " "instead of obj.tolinear().",
947
+ "tolinear method is obsolete; "
948
+ "please use obj[:] "
949
+ "instead of obj.tolinear().",
923
950
  _PydnaDeprecationWarning,
924
951
  )
925
952
  if not self.circular:
@@ -1060,7 +1087,9 @@ class Dseq(_Seq):
1060
1087
  other_type, other_tail = other.five_prime_end()
1061
1088
 
1062
1089
  if self_type == other_type and str(self_tail) == str(_rc(other_tail)):
1063
- answer = Dseq.quick(self.watson + other.watson, other.crick + self.crick, self.ovhg)
1090
+ answer = Dseq.quick(
1091
+ self.watson + other.watson, other.crick + self.crick, self.ovhg
1092
+ )
1064
1093
  elif not self:
1065
1094
  answer = _copy.deepcopy(other)
1066
1095
  elif not other:
@@ -1071,7 +1100,11 @@ class Dseq(_Seq):
1071
1100
 
1072
1101
  def __mul__(self: DseqType, number: int) -> DseqType:
1073
1102
  if not isinstance(number, int):
1074
- raise TypeError("TypeError: can't multiply Dseq by non-int of type {}".format(type(number)))
1103
+ raise TypeError(
1104
+ "TypeError: can't multiply Dseq by non-int of type {}".format(
1105
+ type(number)
1106
+ )
1107
+ )
1075
1108
  if number <= 0:
1076
1109
  return self.__class__("")
1077
1110
  new = _copy.deepcopy(self)
@@ -1169,8 +1202,12 @@ class Dseq(_Seq):
1169
1202
  def transcribe(self) -> _Seq:
1170
1203
  return _Seq(self.watson).transcribe()
1171
1204
 
1172
- def translate(self, table="Standard", stop_symbol="*", to_stop=False, cds=False, gap="-") -> _Seq:
1173
- return _Seq(_translate_str(str(self), table, stop_symbol, to_stop, cds, gap=gap))
1205
+ def translate(
1206
+ self, table="Standard", stop_symbol="*", to_stop=False, cds=False, gap="-"
1207
+ ) -> _Seq:
1208
+ return _Seq(
1209
+ _translate_str(str(self), table, stop_symbol, to_stop, cds, gap=gap)
1210
+ )
1174
1211
 
1175
1212
  def mung(self) -> "Dseq":
1176
1213
  """
@@ -1213,7 +1250,11 @@ class Dseq(_Seq):
1213
1250
 
1214
1251
 
1215
1252
  """
1216
- return Dseq(self.watson[max(0, -self.ovhg) : min(len(self.watson), len(self.crick) - self.ovhg)])
1253
+ return Dseq(
1254
+ self.watson[
1255
+ max(0, -self.ovhg) : min(len(self.watson), len(self.crick) - self.ovhg)
1256
+ ]
1257
+ )
1217
1258
 
1218
1259
  def T4(self, nucleotides=None) -> "Dseq":
1219
1260
  """Fill in five prime protruding ends and chewing back
@@ -1309,7 +1350,9 @@ class Dseq(_Seq):
1309
1350
  d.crick = d.crick[n:]
1310
1351
  return d
1311
1352
 
1312
- def no_cutters(self, batch: _Union[_RestrictionBatch, None] = None) -> _RestrictionBatch:
1353
+ def no_cutters(
1354
+ self, batch: _Union[_RestrictionBatch, None] = None
1355
+ ) -> _RestrictionBatch:
1313
1356
  """Enzymes in a RestrictionBatch not cutting sequence."""
1314
1357
  if batch is None:
1315
1358
  batch = CommOnly
@@ -1317,7 +1360,9 @@ class Dseq(_Seq):
1317
1360
  ncut = {enz: sitelist for (enz, sitelist) in ana.items() if not sitelist}
1318
1361
  return _RestrictionBatch(ncut)
1319
1362
 
1320
- def unique_cutters(self, batch: _Union[_RestrictionBatch, None] = None) -> _RestrictionBatch:
1363
+ def unique_cutters(
1364
+ self, batch: _Union[_RestrictionBatch, None] = None
1365
+ ) -> _RestrictionBatch:
1321
1366
  """Enzymes in a RestrictionBatch cutting sequence once."""
1322
1367
  if batch is None:
1323
1368
  batch = CommOnly
@@ -1325,13 +1370,17 @@ class Dseq(_Seq):
1325
1370
 
1326
1371
  once_cutters = unique_cutters # alias for unique_cutters
1327
1372
 
1328
- def twice_cutters(self, batch: _Union[_RestrictionBatch, None] = None) -> _RestrictionBatch:
1373
+ def twice_cutters(
1374
+ self, batch: _Union[_RestrictionBatch, None] = None
1375
+ ) -> _RestrictionBatch:
1329
1376
  """Enzymes in a RestrictionBatch cutting sequence twice."""
1330
1377
  if batch is None:
1331
1378
  batch = CommOnly
1332
1379
  return self.n_cutters(n=2, batch=batch)
1333
1380
 
1334
- def n_cutters(self, n=3, batch: _Union[_RestrictionBatch, None] = None) -> _RestrictionBatch:
1381
+ def n_cutters(
1382
+ self, n=3, batch: _Union[_RestrictionBatch, None] = None
1383
+ ) -> _RestrictionBatch:
1335
1384
  """Enzymes in a RestrictionBatch cutting n times."""
1336
1385
  if batch is None:
1337
1386
  batch = CommOnly
@@ -1339,7 +1388,9 @@ class Dseq(_Seq):
1339
1388
  ncut = {enz: sitelist for (enz, sitelist) in ana.items() if len(sitelist) == n}
1340
1389
  return _RestrictionBatch(ncut)
1341
1390
 
1342
- def cutters(self, batch: _Union[_RestrictionBatch, None] = None) -> _RestrictionBatch:
1391
+ def cutters(
1392
+ self, batch: _Union[_RestrictionBatch, None] = None
1393
+ ) -> _RestrictionBatch:
1343
1394
  """Enzymes in a RestrictionBatch cutting sequence at least once."""
1344
1395
  if batch is None:
1345
1396
  batch = CommOnly
@@ -1350,7 +1401,9 @@ class Dseq(_Seq):
1350
1401
  def seguid(self) -> str:
1351
1402
  """SEGUID checksum for the sequence."""
1352
1403
  if self.circular:
1353
- cs = _cdseguid(self.watson.upper(), self.crick.upper(), alphabet="{DNA-extended}")
1404
+ cs = _cdseguid(
1405
+ self.watson.upper(), self.crick.upper(), alphabet="{DNA-extended}"
1406
+ )
1354
1407
  else:
1355
1408
  """docstring."""
1356
1409
  w = f"{self.ovhg * '-'}{self.watson}{'-' * (-self.ovhg + len(self.crick) - len(self.watson))}".upper()
@@ -1396,7 +1449,9 @@ class Dseq(_Seq):
1396
1449
  >>> a.isblunt()
1397
1450
  False
1398
1451
  """
1399
- return self.ovhg == 0 and len(self.watson) == len(self.crick) and not self.circular
1452
+ return (
1453
+ self.ovhg == 0 and len(self.watson) == len(self.crick) and not self.circular
1454
+ )
1400
1455
 
1401
1456
  def cas9(self, RNA: str) -> _Tuple[slice, ...]:
1402
1457
  """docstring."""
@@ -1492,7 +1547,11 @@ class Dseq(_Seq):
1492
1547
  if self.circular:
1493
1548
  end_of_recognition_site %= len(self)
1494
1549
  recognition_site = self[start_of_recognition_site:end_of_recognition_site]
1495
- if len(recognition_site) == 0 or recognition_site.ovhg != 0 or recognition_site.watson_ovhg() != 0:
1550
+ if (
1551
+ len(recognition_site) == 0
1552
+ or recognition_site.ovhg != 0
1553
+ or recognition_site.watson_ovhg() != 0
1554
+ ):
1496
1555
  if enz is None or enz.scd5 is None:
1497
1556
  return False
1498
1557
  else:
@@ -1503,9 +1562,15 @@ class Dseq(_Seq):
1503
1562
  end_of_recognition_site = start_of_recognition_site + enz.size
1504
1563
  if self.circular:
1505
1564
  end_of_recognition_site %= len(self)
1506
- recognition_site = self[start_of_recognition_site:end_of_recognition_site]
1507
-
1508
- if len(recognition_site) == 0 or recognition_site.ovhg != 0 or recognition_site.watson_ovhg() != 0:
1565
+ recognition_site = self[
1566
+ start_of_recognition_site:end_of_recognition_site
1567
+ ]
1568
+
1569
+ if (
1570
+ len(recognition_site) == 0
1571
+ or recognition_site.ovhg != 0
1572
+ or recognition_site.watson_ovhg() != 0
1573
+ ):
1509
1574
  return False
1510
1575
 
1511
1576
  return True
@@ -1611,7 +1676,9 @@ class Dseq(_Seq):
1611
1676
  return len(self) + self.watson_ovhg(), len(self)
1612
1677
  return len(self), len(self) - self.watson_ovhg()
1613
1678
 
1614
- def get_cut_parameters(self, cut: _Union[CutSiteType, None], is_left: bool) -> _Tuple[int, int, int]:
1679
+ def get_cut_parameters(
1680
+ self, cut: _Union[CutSiteType, None], is_left: bool
1681
+ ) -> _Tuple[int, int, int]:
1615
1682
  """For a given cut expressed as ((cut_watson, ovhg), enz), returns
1616
1683
  a tuple (cut_watson, cut_crick, ovhg).
1617
1684
 
@@ -1701,7 +1768,11 @@ class Dseq(_Seq):
1701
1768
  return Dseq(
1702
1769
  str(self[left_watson:right_watson]),
1703
1770
  # The line below could be easier to understand as _rc(str(self[left_crick:right_crick])), but it does not preserve the case
1704
- str(self.reverse_complement()[len(self) - right_crick : len(self) - left_crick]),
1771
+ str(
1772
+ self.reverse_complement()[
1773
+ len(self) - right_crick : len(self) - left_crick
1774
+ ]
1775
+ ),
1705
1776
  ovhg=ovhg_left,
1706
1777
  )
1707
1778
 
@@ -1757,14 +1828,3 @@ class Dseq(_Seq):
1757
1828
  cutsites.append(cutsites[0])
1758
1829
 
1759
1830
  return list(zip(cutsites, cutsites[1:]))
1760
-
1761
-
1762
- if __name__ == "__main__":
1763
- import os as _os
1764
-
1765
- cached = _os.getenv("pydna_cached_funcs", "")
1766
- _os.environ["pydna_cached_funcs"] = ""
1767
- import doctest
1768
-
1769
- doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
1770
- _os.environ["pydna_cached_funcs"] = cached