pydna 5.5.4__py3-none-any.whl → 5.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydna/dseqrecord.py CHANGED
@@ -11,30 +11,28 @@ Seq and SeqRecord classes, respectively.
11
11
 
12
12
  The Dseq and Dseqrecord classes support the notion of circular and linear DNA topology.
13
13
  """
14
- from Bio.Restriction import RestrictionBatch as _RestrictionBatch
14
+ from Bio.Restriction import RestrictionBatch
15
15
  from Bio.Restriction import CommOnly
16
- from pydna.dseq import Dseq as _Dseq
17
- from pydna._pretty import pretty_str as _pretty_str
18
- from pydna.utils import flatten as _flatten, location_boundaries as _location_boundaries
19
-
20
- # from pydna.utils import memorize as _memorize
21
- from pydna.utils import rc as _rc
22
- from pydna.utils import shift_location as _shift_location
23
- from pydna.utils import shift_feature as _shift_feature
24
- from pydna.common_sub_strings import common_sub_strings as _common_sub_strings
25
- from Bio.SeqFeature import SeqFeature as _SeqFeature
16
+ from pydna.dseq import Dseq
17
+ from pydna._pretty import pretty_str
18
+ from pydna.utils import flatten, location_boundaries
19
+
20
+ from pydna.utils import shift_location
21
+ from pydna.utils import shift_feature
22
+ from pydna.common_sub_strings import common_sub_strings
23
+ from Bio.SeqFeature import SeqFeature
26
24
  from Bio import SeqIO
27
- from Bio.SeqFeature import CompoundLocation as _CompoundLocation
28
- from Bio.SeqFeature import SimpleLocation as _SimpleLocation
29
- from pydna.seqrecord import SeqRecord as _SeqRecord
30
- from Bio.Seq import translate as _translate
31
- from pydna.utils import identifier_from_string as _identifier_from_string
32
- import copy as _copy
33
- import operator as _operator
34
- import os as _os
35
- import re as _re
36
- import time as _time
37
- import datetime as _datetime
25
+ from Bio.SeqFeature import CompoundLocation
26
+ from Bio.SeqFeature import SimpleLocation
27
+ from pydna.seqrecord import SeqRecord
28
+ from Bio.Seq import translate
29
+ from pydna.utils import identifier_from_string
30
+ import copy
31
+ import operator
32
+ import os
33
+ import re
34
+ import time
35
+ import datetime
38
36
  from typing import Union, TYPE_CHECKING
39
37
  from pydna.opencloning_models import SequenceCutSource
40
38
 
@@ -42,20 +40,15 @@ if TYPE_CHECKING: # pragma: no cover
42
40
  from pydna.opencloning_models import Source
43
41
 
44
42
 
45
- # import logging as _logging
46
-
47
- # _module_logger = _logging.getLogger("pydna." + __name__)
48
-
49
-
50
43
  try:
51
- from IPython.display import display_html as _display_html
44
+ from IPython.display import display_html
52
45
  except ImportError:
53
46
 
54
- def _display_html(item, raw=None):
47
+ def display_html(item, raw=None):
55
48
  return item
56
49
 
57
50
 
58
- class Dseqrecord(_SeqRecord):
51
+ class Dseqrecord(SeqRecord):
59
52
  """Dseqrecord is a double stranded version of the Biopython SeqRecord [#]_ class.
60
53
  The Dseqrecord object holds a Dseq object describing the sequence.
61
54
  Additionally, Dseqrecord hold meta information about the sequence in the
@@ -132,7 +125,7 @@ class Dseqrecord(_SeqRecord):
132
125
 
133
126
  """
134
127
 
135
- seq: _Dseq
128
+ seq: Dseq
136
129
  source: Union["Source", None] = None
137
130
 
138
131
  def __init__(
@@ -144,15 +137,12 @@ class Dseqrecord(_SeqRecord):
144
137
  source=None,
145
138
  **kwargs,
146
139
  ):
147
- # _module_logger.info("### Dseqrecord initialized ###")
148
- # _module_logger.info("argument circular = %s", circular)
149
- # _module_logger.info("circular = %s", circular)
150
140
 
151
141
  if isinstance(record, str):
152
- # _module_logger.info("record is a string")
142
+
153
143
  super().__init__(
154
- _Dseq.from_string(
155
- record,
144
+ Dseq.quick(
145
+ record.encode("ascii"),
156
146
  # linear=linear,
157
147
  circular=bool(circular),
158
148
  ),
@@ -166,14 +156,14 @@ class Dseqrecord(_SeqRecord):
166
156
  record = record[:]
167
157
  elif circular is True:
168
158
  record = record.looped()
169
- # _module_logger.info("record is a Dseq object")
159
+
170
160
  super().__init__(record, *args, **kwargs)
171
161
 
172
162
  # record is a Bio.Seq object ?
173
163
  elif hasattr(record, "transcribe"):
174
- # _module_logger.info("record is a Seq object")
164
+
175
165
  super().__init__(
176
- _Dseq(
166
+ Dseq(
177
167
  str(record),
178
168
  # linear=linear,
179
169
  circular=bool(circular),
@@ -184,13 +174,13 @@ class Dseqrecord(_SeqRecord):
184
174
 
185
175
  # record is a Bio.SeqRecord or Dseqrecord object ?
186
176
  elif hasattr(record, "features"):
187
- # _module_logger.info("record is a Bio.SeqRecord or Dseqrecord object")
177
+
188
178
  for key, value in list(record.__dict__.items()):
189
179
  setattr(self, key, value)
190
180
  self.letter_annotations = {}
191
181
  # record.seq is a Dseq object ?
192
182
  if hasattr(record.seq, "watson"):
193
- new_seq = _copy.copy(record.seq)
183
+ new_seq = copy.copy(record.seq)
194
184
  if circular is False:
195
185
  new_seq = new_seq[:]
196
186
  elif circular is True:
@@ -198,7 +188,7 @@ class Dseqrecord(_SeqRecord):
198
188
  self.seq = new_seq
199
189
  # record.seq is Bio.SeqRecord object ?
200
190
  else:
201
- self.seq = _Dseq(
191
+ self.seq = Dseq(
202
192
  str(record.seq),
203
193
  # linear=linear,
204
194
  circular=bool(circular),
@@ -226,16 +216,14 @@ class Dseqrecord(_SeqRecord):
226
216
  # linear=True, circular=False, n = 5E-14, **kwargs):
227
217
  obj = cls.__new__(cls) # Does not call __init__
228
218
  obj._per_letter_annotations = {}
229
- obj.seq = _Dseq.quick(
230
- record,
231
- _rc(record),
232
- ovhg=0,
219
+ obj.seq = Dseq.quick(
220
+ record.encode("ascii"),
233
221
  # linear=linear,
234
222
  circular=circular,
235
223
  )
236
- obj.id = _pretty_str("id")
237
- obj.name = _pretty_str("name")
238
- obj.description = _pretty_str("description")
224
+ obj.id = pretty_str("id")
225
+ obj.name = pretty_str("name")
226
+ obj.description = pretty_str("description")
239
227
  obj.dbxrefs = []
240
228
  obj.annotations = {"molecule_type": "DNA"}
241
229
  obj.features = []
@@ -247,7 +235,7 @@ class Dseqrecord(_SeqRecord):
247
235
  @classmethod
248
236
  def from_SeqRecord(
249
237
  cls,
250
- record: _SeqRecord,
238
+ record: SeqRecord,
251
239
  *args,
252
240
  circular=None,
253
241
  n=5e-14,
@@ -267,9 +255,7 @@ class Dseqrecord(_SeqRecord):
267
255
  obj.source = None
268
256
  if circular is None:
269
257
  circular = record.annotations.get("topology") == "circular"
270
- obj.seq = _Dseq.quick(
271
- str(record.seq), _rc(str(record.seq)), ovhg=0, circular=circular
272
- )
258
+ obj.seq = Dseq.quick(record.seq._data, ovhg=0, circular=circular)
273
259
  return obj
274
260
 
275
261
  @property
@@ -339,14 +325,14 @@ class Dseqrecord(_SeqRecord):
339
325
  qualifiers = {}
340
326
  qualifiers.update(kwargs)
341
327
 
342
- location = _CompoundLocation(
328
+ location = CompoundLocation(
343
329
  (
344
- _SimpleLocation(x, self.seq.length, strand=strand),
345
- _SimpleLocation(0, y, strand=strand),
330
+ SimpleLocation(x, len(self.seq), strand=strand),
331
+ SimpleLocation(0, y, strand=strand),
346
332
  )
347
333
  )
348
334
 
349
- sf = _SeqFeature(location, type=type_, qualifiers=qualifiers)
335
+ sf = SeqFeature(location, type=type_, qualifiers=qualifiers)
350
336
 
351
337
  if "label" not in qualifiers:
352
338
  qualifiers["label"] = [f"ft{len(location)}"]
@@ -395,35 +381,31 @@ class Dseqrecord(_SeqRecord):
395
381
  --------
396
382
  pydna.dseq.Dseq.looped
397
383
  """
398
- new = _copy.copy(self)
399
- # for key, value in list(self.__dict__.items()):
400
- # setattr(new, key, value)
401
- new._seq = self.seq.looped()
402
- five_prime = self.seq.five_prime_end()
403
- for fn, fo in zip(new.features, self.features):
404
- if five_prime[0] == "5'":
405
- pass
406
- # fn.location = fn.location + self.seq.ovhg
407
- elif five_prime[0] == "3'":
408
- fn.location = fn.location + (-self.seq.ovhg)
409
- if fn.location.start < 0:
410
- loc1 = _SimpleLocation(
411
- len(new) + fn.location.start, len(new), strand=fn.location.strand
412
- )
413
- loc2 = _SimpleLocation(0, fn.location.end, strand=fn.location.strand)
414
- fn.location = _CompoundLocation([loc1, loc2])
415
-
416
- if fn.location.end > len(new):
417
- loc1 = _SimpleLocation(
418
- fn.location.start, len(new), strand=fn.location.strand
419
- )
420
- loc2 = _SimpleLocation(
421
- 0, fn.location.end - len(new), strand=fn.location.strand
422
- )
423
- fn.location = _CompoundLocation([loc1, loc2])
424
-
425
- fn.qualifiers = fo.qualifiers
426
-
384
+ new = copy.deepcopy(self)
385
+ new.seq = self.seq.looped()
386
+
387
+ old_length = len(self) # Possibly longer, including sticky ends if any.
388
+ new_length = len(new) # Possibly shorter, with blunt ends.
389
+ if old_length != new_length: # Only False if self was blunt.
390
+ new_features = []
391
+ for fn in new.features:
392
+ if len(fn.location) > new_length:
393
+ # Edge case: if the feature is longer than the sequence, it should be
394
+ # dropped. This can happen in a sequence with overhangs, where the feature
395
+ # spans both overhangs.
396
+ #
397
+ # Example:
398
+ # feature
399
+ # <------>
400
+ # aaACGT
401
+ # TGCAtt
402
+ #
403
+ # Circular sequence ACGTtt should not have that feature, so we drop it
404
+ continue
405
+ fn.location = shift_location(fn.location, 0, new_length)
406
+ new_features.append(fn)
407
+
408
+ new.features = new_features
427
409
  return new
428
410
 
429
411
  def tolinear(self): # pragma: no cover
@@ -445,16 +427,16 @@ class Dseqrecord(_SeqRecord):
445
427
  >>>
446
428
 
447
429
  """
448
- import warnings as _warnings
430
+ import warnings
449
431
  from pydna import _PydnaDeprecationWarning
450
432
 
451
- _warnings.warn(
433
+ warnings.warn(
452
434
  "tolinear method is obsolete; "
453
435
  "please use obj[:] "
454
436
  "instead of obj.tolinear().",
455
437
  _PydnaDeprecationWarning,
456
438
  )
457
- new = _copy.copy(self)
439
+ new = copy.copy(self)
458
440
  for key, value in list(self.__dict__.items()):
459
441
  setattr(new, key, value)
460
442
  # new._seq = self.seq.tolinear()
@@ -465,7 +447,7 @@ class Dseqrecord(_SeqRecord):
465
447
 
466
448
  def terminal_transferase(self, nucleotides="a"):
467
449
  """docstring."""
468
- newseq = _copy.deepcopy(self)
450
+ newseq = copy.deepcopy(self)
469
451
  newseq.seq = self.seq.terminal_transferase(nucleotides)
470
452
  for feature in newseq.features:
471
453
  feature.location += len(nucleotides)
@@ -505,12 +487,12 @@ class Dseqrecord(_SeqRecord):
505
487
 
506
488
  """
507
489
 
508
- record = _copy.deepcopy(self)
490
+ record = copy.deepcopy(self)
509
491
  if f in ("genbank", "gb") and self.circular:
510
492
  record.annotations["topology"] = "circular"
511
493
  else:
512
494
  record.annotations["topology"] = "linear"
513
- return _SeqRecord.format(record, f).strip()
495
+ return SeqRecord.format(record, f).strip()
514
496
 
515
497
  def write(self, filename=None, f="gb"):
516
498
  """Writes the Dseqrecord to a file using the format f, which must
@@ -543,9 +525,9 @@ class Dseqrecord(_SeqRecord):
543
525
  # generate a name if no name was given
544
526
  # if not isinstance(filename, str): # is filename a string???
545
527
  # raise ValueError("filename has to be a string, got", type(filename))
546
- name, ext = _os.path.splitext(filename)
528
+ name, ext = os.path.splitext(filename)
547
529
  msg = f"<font face=monospace><a href='{filename}' target='_blank'>{filename}</a></font><br>"
548
- if not _os.path.isfile(filename):
530
+ if not os.path.isfile(filename):
549
531
  with open(filename, "w", encoding="utf8") as fp:
550
532
  fp.write(self.format(f))
551
533
  else:
@@ -556,16 +538,16 @@ class Dseqrecord(_SeqRecord):
556
538
  if self.seq != old_file.seq:
557
539
  # If new sequence is different, the old file is
558
540
  # renamed with "_OLD_" suffix:
559
- oldmtime = _datetime.datetime.fromtimestamp(
560
- _os.path.getmtime(filename)
541
+ oldmtime = datetime.datetime.fromtimestamp(
542
+ os.path.getmtime(filename)
561
543
  ).isoformat()
562
- tstmp = int(_time.time() * 1_000_000)
544
+ tstmp = int(time.time() * 1_000_000)
563
545
  old_filename = f"{name}_OLD_{tstmp}{ext}"
564
- _os.rename(filename, old_filename)
546
+ os.rename(filename, old_filename)
565
547
  with open(filename, "w", encoding="utf8") as fp:
566
548
  fp.write(self.format(f))
567
- newmtime = _datetime.datetime.fromtimestamp(
568
- _os.path.getmtime(filename)
549
+ newmtime = datetime.datetime.fromtimestamp(
550
+ os.path.getmtime(filename)
569
551
  ).isoformat()
570
552
  msg = f"""
571
553
  <table style="padding:10px 10px;
@@ -611,8 +593,8 @@ class Dseqrecord(_SeqRecord):
611
593
  elif "seguid" in old_file.annotations.get("comment", ""):
612
594
  pattern = r"(ldseguid|cdseguid)-(\S{27})(_[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}){0,1}"
613
595
  # seguid=NNNNNNNNNNNNNNNNNNNNNNNNNNN_2020-10-10T11:11:11.111111
614
- oldstamp = _re.search(pattern, old_file.description)
615
- newstamp = _re.search(pattern, self.description)
596
+ oldstamp = re.search(pattern, old_file.description)
597
+ newstamp = re.search(pattern, self.description)
616
598
  newdescription = self.description
617
599
  if oldstamp and newstamp:
618
600
  if oldstamp.group(0)[:35] == newstamp.group(0)[:35]:
@@ -621,7 +603,7 @@ class Dseqrecord(_SeqRecord):
621
603
  )
622
604
  elif oldstamp:
623
605
  newdescription += " " + oldstamp.group(0)
624
- newobj = _copy.copy(self)
606
+ newobj = copy.copy(self)
625
607
  newobj.description = newdescription
626
608
 
627
609
  with open(filename, "w", encoding="utf8") as fp:
@@ -629,7 +611,7 @@ class Dseqrecord(_SeqRecord):
629
611
  else:
630
612
  with open(filename, "w", encoding="utf8") as fp:
631
613
  fp.write(self.format(f))
632
- return _display_html(msg, raw=True)
614
+ return display_html(msg, raw=True)
633
615
 
634
616
  def find(self, other):
635
617
  # TODO allow strings, seqs, seqrecords or Dseqrecords
@@ -647,7 +629,7 @@ class Dseqrecord(_SeqRecord):
647
629
  def __str__(self):
648
630
  return ("Dseqrecord\n" "circular: {}\n" "size: {}\n").format(
649
631
  self.circular, len(self)
650
- ) + _SeqRecord.__str__(self)
632
+ ) + SeqRecord.__str__(self)
651
633
 
652
634
  def __contains__(self, other):
653
635
  if other.lower() in str(self.seq).lower():
@@ -658,7 +640,7 @@ class Dseqrecord(_SeqRecord):
658
640
  spc = 3 - ln % 3 if ln % 3 else 0
659
641
  s = "n" * spc + s + "nnn"
660
642
  for frame in range(3):
661
- if other.lower() in _translate(s[frame : frame + spc + ln]).lower():
643
+ if other.lower() in translate(s[frame : frame + spc + ln]).lower():
662
644
  return True
663
645
  return False
664
646
 
@@ -667,13 +649,13 @@ class Dseqrecord(_SeqRecord):
667
649
  >>> from pydna.dseqrecord import Dseqrecord
668
650
  >>> s=Dseqrecord("atgtacgatcgtatgctggttatattttag")
669
651
  >>> s.seq.translate()
670
- Seq('MYDRMLVIF*')
652
+ ProteinSeq('MYDRMLVIF*')
671
653
  >>> "RML" in s
672
654
  True
673
655
  >>> "MMM" in s
674
656
  False
675
657
  >>> s.seq.rc().translate()
676
- Seq('LKYNQHTIVH')
658
+ ProteinSeq('LKYNQHTIVH')
677
659
  >>> "QHT" in s.rc()
678
660
  True
679
661
  >>> "QHT" in s
@@ -689,7 +671,7 @@ class Dseqrecord(_SeqRecord):
689
671
  cgtatgctg
690
672
  gcatacgac
691
673
  >>> code.translate()
692
- Seq('RML')
674
+ ProteinSeq('RML')
693
675
  """
694
676
  other = str(other).lower()
695
677
  assert self.seq.watson == "".join(self.seq.watson.split())
@@ -700,7 +682,7 @@ class Dseqrecord(_SeqRecord):
700
682
  start = None
701
683
  for frame in range(3):
702
684
  try:
703
- start = _translate(s[frame : frame + ln + spc]).lower().index(other)
685
+ start = translate(s[frame : frame + ln + spc]).lower().index(other)
704
686
  break
705
687
  except ValueError:
706
688
  pass
@@ -748,7 +730,7 @@ class Dseqrecord(_SeqRecord):
748
730
  matching_reads = []
749
731
 
750
732
  for read_ in reads:
751
- matches = _common_sub_strings(str(self.seq).lower(), str(read_.seq), limit)
733
+ matches = common_sub_strings(str(self.seq).lower(), str(read_.seq), limit)
752
734
 
753
735
  if not matches:
754
736
  continue
@@ -769,14 +751,14 @@ class Dseqrecord(_SeqRecord):
769
751
  if len(newmatches) > 1:
770
752
  ms = []
771
753
  for m in newmatches:
772
- ms.append(_SimpleLocation(m[0], m[0] + m[2]))
773
- loc = _CompoundLocation(ms)
754
+ ms.append(SimpleLocation(m[0], m[0] + m[2]))
755
+ loc = CompoundLocation(ms)
774
756
  else:
775
757
  a, b, c = newmatches[0]
776
- loc = _SimpleLocation(a, a + c)
758
+ loc = SimpleLocation(a, a + c)
777
759
 
778
760
  self.features.append(
779
- _SeqFeature(
761
+ SeqFeature(
780
762
  loc,
781
763
  qualifiers={"label": [read_.annotations["filename"]]},
782
764
  type="trace",
@@ -786,9 +768,8 @@ class Dseqrecord(_SeqRecord):
786
768
  return [x.annotations["filename"] for x in matching_reads]
787
769
 
788
770
  def __repr__(self):
789
- return "Dseqrecord({}{})".format(
790
- {True: "-", False: "o"}[not self.circular], len(self)
791
- )
771
+ top = {True: "-", False: "o"}[not self.circular]
772
+ return f"{self.__class__.__name__}({top}{len(self)})"
792
773
 
793
774
  def _repr_pretty_(self, p, cycle):
794
775
  p.text(
@@ -799,7 +780,7 @@ class Dseqrecord(_SeqRecord):
799
780
 
800
781
  def __add__(self, other):
801
782
  if hasattr(other, "seq") and hasattr(other.seq, "watson"):
802
- other = _copy.deepcopy(other)
783
+ other = copy.deepcopy(other)
803
784
  other_five_prime = other.seq.five_prime_end()
804
785
  if other_five_prime[0] == "5'":
805
786
  # add other.seq.ovhg
@@ -810,10 +791,10 @@ class Dseqrecord(_SeqRecord):
810
791
  for f in other.features:
811
792
  f.location = f.location + (-other.seq.ovhg)
812
793
 
813
- answer = Dseqrecord(_SeqRecord.__add__(self, other))
794
+ answer = Dseqrecord(SeqRecord.__add__(self, other))
814
795
  answer.n = min(self.n, other.n)
815
796
  else:
816
- answer = Dseqrecord(_SeqRecord.__add__(self, Dseqrecord(other)))
797
+ answer = Dseqrecord(SeqRecord.__add__(self, Dseqrecord(other)))
817
798
  answer.n = self.n
818
799
  return answer
819
800
 
@@ -827,7 +808,7 @@ class Dseqrecord(_SeqRecord):
827
808
  if self.circular:
828
809
  raise TypeError("TypeError: can't multiply circular Dseqrecord.")
829
810
  if number > 0:
830
- new = _copy.deepcopy(self)
811
+ new = copy.deepcopy(self)
831
812
  for i in range(1, number):
832
813
  new += self
833
814
  new._per_letter_annotations = self._per_letter_annotations
@@ -837,7 +818,7 @@ class Dseqrecord(_SeqRecord):
837
818
 
838
819
  def __getitem__(self, sl):
839
820
  """docstring."""
840
- answer = Dseqrecord(_copy.copy(self))
821
+ answer = Dseqrecord(copy.copy(self))
841
822
  answer.seq = self.seq.__getitem__(sl)
842
823
  # answer.seq.alphabet = self.seq.alphabet
843
824
  # breakpoint()
@@ -859,9 +840,9 @@ class Dseqrecord(_SeqRecord):
859
840
  f
860
841
  for f in answer.features
861
842
  if (
862
- _location_boundaries(f.location)[1] <= answer.seq.length
863
- and _location_boundaries(f.location)[0]
864
- < _location_boundaries(f.location)[1]
843
+ location_boundaries(f.location)[1] <= len(answer.seq)
844
+ and location_boundaries(f.location)[0]
845
+ < location_boundaries(f.location)[1]
865
846
  )
866
847
  ]
867
848
 
@@ -877,8 +858,8 @@ class Dseqrecord(_SeqRecord):
877
858
  identifier = " ".join(sf.qualifiers["label"])
878
859
  elif "note" in sf.qualifiers:
879
860
  identifier = " ".join(sf.qualifiers["note"])
880
- answer.id = _identifier_from_string(identifier)[:16]
881
- answer.name = _identifier_from_string("part_{name}".format(name=self.name))[:16]
861
+ answer.id = identifier_from_string(identifier)[:16]
862
+ answer.name = identifier_from_string("part_{name}".format(name=self.name))[:16]
882
863
  return answer
883
864
 
884
865
  def __eq__(self, other):
@@ -920,43 +901,34 @@ class Dseqrecord(_SeqRecord):
920
901
  answer.name = answer.id[:16]
921
902
  return fragments[0]
922
903
 
923
- def no_cutters(self, batch: _RestrictionBatch = None):
904
+ def no_cutters(self, batch: RestrictionBatch = None):
924
905
  """docstring."""
925
906
  return self.seq.no_cutters(batch=batch or CommOnly)
926
907
 
927
- def unique_cutters(self, batch: _RestrictionBatch = None):
908
+ def unique_cutters(self, batch: RestrictionBatch = None):
928
909
  """docstring."""
929
910
  return self.seq.unique_cutters(batch=batch or CommOnly)
930
911
 
931
- def once_cutters(self, batch: _RestrictionBatch = None):
912
+ def once_cutters(self, batch: RestrictionBatch = None):
932
913
  """docstring."""
933
914
  return self.seq.once_cutters(batch=batch or CommOnly)
934
915
 
935
- def twice_cutters(self, batch: _RestrictionBatch = None):
916
+ def twice_cutters(self, batch: RestrictionBatch = None):
936
917
  """docstring."""
937
918
  return self.seq.twice_cutters(batch=batch or CommOnly)
938
919
 
939
- def n_cutters(self, n=3, batch: _RestrictionBatch = None):
920
+ def n_cutters(self, n=3, batch: RestrictionBatch = None):
940
921
  """docstring."""
941
922
  return self.seq.n_cutters(n=n, batch=batch or CommOnly)
942
923
 
943
- def cutters(self, batch: _RestrictionBatch = None):
924
+ def cutters(self, batch: RestrictionBatch = None):
944
925
  """docstring."""
945
926
  return self.seq.cutters(batch=batch or CommOnly)
946
927
 
947
928
  def number_of_cuts(self, *enzymes):
948
929
  """The number of cuts by digestion with the Restriction enzymes
949
930
  contained in the iterable."""
950
- return sum([len(enzyme.search(self.seq)) for enzyme in _flatten(enzymes)])
951
-
952
- def cas9(self, RNA: str):
953
- """docstring."""
954
- fragments = []
955
- result = []
956
- for target in (self.seq, self.seq.rc()):
957
- fragments = [self[sl.start : sl.stop] for sl in target.cas9(RNA)]
958
- result.append(fragments)
959
- return result
931
+ return sum([len(enzyme.search(self.seq)) for enzyme in flatten(enzymes)])
960
932
 
961
933
  def reverse_complement(self):
962
934
  """Reverse complement.
@@ -1033,7 +1005,7 @@ class Dseqrecord(_SeqRecord):
1033
1005
  if not self.circular:
1034
1006
  raise TypeError("Only circular DNA can be synced!")
1035
1007
 
1036
- newseq = _copy.copy(self)
1008
+ newseq = copy.copy(self)
1037
1009
 
1038
1010
  s = str(self.seq.watson).lower()
1039
1011
  s_rc = str(self.seq.crick).lower()
@@ -1049,8 +1021,8 @@ class Dseqrecord(_SeqRecord):
1049
1021
 
1050
1022
  lim = min(limit, limit * (len(s) // limit) + 1)
1051
1023
 
1052
- c = _common_sub_strings(s + s, r, limit=lim)
1053
- d = _common_sub_strings(s_rc + s_rc, r, limit=lim)
1024
+ c = common_sub_strings(s + s, r, limit=lim)
1025
+ d = common_sub_strings(s_rc + s_rc, r, limit=lim)
1054
1026
 
1055
1027
  c = [(x[0], x[2]) for x in c if x[1] == 0]
1056
1028
  d = [(x[0], x[2]) for x in d if x[1] == 0]
@@ -1076,7 +1048,7 @@ class Dseqrecord(_SeqRecord):
1076
1048
  result = newseq
1077
1049
  else:
1078
1050
  result = newseq.shifted(start)
1079
- # _module_logger.info("synced")
1051
+
1080
1052
  return result
1081
1053
 
1082
1054
  def upper(self):
@@ -1105,7 +1077,7 @@ class Dseqrecord(_SeqRecord):
1105
1077
  --------
1106
1078
  pydna.dseqrecord.Dseqrecord.lower"""
1107
1079
 
1108
- upper = _copy.deepcopy(self)
1080
+ upper = copy.deepcopy(self)
1109
1081
  # This is because the @seq.setter methods otherwise sets the _per_letter_annotations to an empty dict
1110
1082
  prev_per_letter_annotation = upper._per_letter_annotations
1111
1083
  upper.seq = upper.seq.upper()
@@ -1139,7 +1111,7 @@ class Dseqrecord(_SeqRecord):
1139
1111
  pydna.dseqrecord.Dseqrecord.upper
1140
1112
 
1141
1113
  """
1142
- lower = _copy.deepcopy(self)
1114
+ lower = copy.deepcopy(self)
1143
1115
  prev_per_letter_annotation = lower._per_letter_annotations
1144
1116
  lower.seq = lower.seq.lower()
1145
1117
  lower._per_letter_annotations = prev_per_letter_annotation
@@ -1157,8 +1129,8 @@ class Dseqrecord(_SeqRecord):
1157
1129
  orf = self[x:y]
1158
1130
  prt = orf.translate()
1159
1131
  features.append(
1160
- _SeqFeature(
1161
- _SimpleLocation(x, y, strand=strand),
1132
+ SeqFeature(
1133
+ SimpleLocation(x, y, strand=strand),
1162
1134
  type="CDS",
1163
1135
  qualifiers={
1164
1136
  "note": f"{y - x}bp {(y - x) // 3}aa",
@@ -1196,11 +1168,11 @@ class Dseqrecord(_SeqRecord):
1196
1168
  if self.features:
1197
1169
  f = self.features[feature]
1198
1170
  locations = sorted(
1199
- self.features[feature].location.parts, key=_SimpleLocation.start.fget
1171
+ self.features[feature].location.parts, key=SimpleLocation.start.fget
1200
1172
  )
1201
1173
  strand = f.location.strand
1202
1174
  else:
1203
- locations = [_SimpleLocation(0, 0, 1)]
1175
+ locations = [SimpleLocation(0, 0, 1)]
1204
1176
  strand = 1
1205
1177
 
1206
1178
  ovhg = self.seq.ovhg + len(self.seq.watson) - len(self.seq.crick)
@@ -1231,7 +1203,7 @@ class Dseqrecord(_SeqRecord):
1231
1203
  result += f"{s1}\n{s2}"
1232
1204
  else:
1233
1205
  result += f"{s2}\n{s1}"
1234
- return _pretty_str(result)
1206
+ return pretty_str(result)
1235
1207
 
1236
1208
  def shifted(self, shift):
1237
1209
  """Circular Dseqrecord with a new origin <shift>.
@@ -1284,15 +1256,15 @@ class Dseqrecord(_SeqRecord):
1284
1256
  )
1285
1257
  ln = len(self)
1286
1258
  if not shift % ln:
1287
- return _copy.deepcopy(self) # shift is a multiple of ln or 0
1259
+ return copy.deepcopy(self) # shift is a multiple of ln or 0
1288
1260
  else:
1289
1261
  shift %= ln # 0<=shift<=ln
1290
1262
  newseq = (self.seq[shift:] + self.seq[:shift]).looped()
1291
- newfeatures = _copy.deepcopy(self.features)
1263
+ newfeatures = copy.deepcopy(self.features)
1292
1264
  for feature in newfeatures:
1293
- feature.location = _shift_location(feature.location, -shift, ln)
1294
- newfeatures.sort(key=_operator.attrgetter("location.start"))
1295
- answer = _copy.deepcopy(self)
1265
+ feature.location = shift_location(feature.location, -shift, ln)
1266
+ newfeatures.sort(key=operator.attrgetter("location.start"))
1267
+ answer = copy.deepcopy(self)
1296
1268
  answer.features = newfeatures
1297
1269
  answer.seq = newseq
1298
1270
  return answer
@@ -1346,7 +1318,7 @@ class Dseqrecord(_SeqRecord):
1346
1318
  if left_cut == right_cut:
1347
1319
  # Not really a cut, but to handle the general case
1348
1320
  if left_cut is None:
1349
- features = _copy.deepcopy(self.features)
1321
+ features = copy.deepcopy(self.features)
1350
1322
  else:
1351
1323
  # The features that span the origin if shifting with left_cut, but that do not cross
1352
1324
  # the cut site should be included, and if there is a feature within the cut site, it should
@@ -1369,7 +1341,7 @@ class Dseqrecord(_SeqRecord):
1369
1341
  initial_shift = left_watson if left_ovhg < 0 else left_crick
1370
1342
  features = self.shifted(initial_shift).features
1371
1343
  # for f in features:
1372
- # print(f.id, f.location, _location_boundaries(f.location))
1344
+ # print(f.id, f.location, location_boundaries(f.location))
1373
1345
  # Here, we have done what's shown below (* indicates the origin).
1374
1346
  # The features 0 and 2 have the right location for the final product:
1375
1347
  #
@@ -1383,10 +1355,10 @@ class Dseqrecord(_SeqRecord):
1383
1355
  features_need_transfer = [
1384
1356
  f
1385
1357
  for f in features
1386
- if (_location_boundaries(f.location)[1] <= abs(left_ovhg))
1358
+ if (location_boundaries(f.location)[1] <= abs(left_ovhg))
1387
1359
  ]
1388
1360
  features_need_transfer = [
1389
- _shift_feature(f, -abs(left_ovhg), len(self))
1361
+ shift_feature(f, -abs(left_ovhg), len(self))
1390
1362
  for f in features_need_transfer
1391
1363
  ]
1392
1364
 
@@ -1403,7 +1375,7 @@ class Dseqrecord(_SeqRecord):
1403
1375
  # as the original one. However, the final product is longer because of the overhang.
1404
1376
 
1405
1377
  features += [
1406
- _shift_feature(f, abs(left_ovhg), len(dseq))
1378
+ shift_feature(f, abs(left_ovhg), len(dseq))
1407
1379
  for f in features_need_transfer
1408
1380
  ]
1409
1381
  # ^ ^^^^^^^^^
@@ -1415,9 +1387,9 @@ class Dseqrecord(_SeqRecord):
1415
1387
  f
1416
1388
  for f in features
1417
1389
  if (
1418
- _location_boundaries(f.location)[1] <= len(dseq)
1419
- and _location_boundaries(f.location)[0]
1420
- <= _location_boundaries(f.location)[1]
1390
+ location_boundaries(f.location)[1] <= len(dseq)
1391
+ and location_boundaries(f.location)[0]
1392
+ <= location_boundaries(f.location)[1]
1421
1393
  )
1422
1394
  ]
1423
1395
  else:
@@ -1468,3 +1440,32 @@ class Dseqrecord(_SeqRecord):
1468
1440
  if self.source is None:
1469
1441
  return ""
1470
1442
  return self.source.history_string(self)
1443
+
1444
+ def join(self, fragments):
1445
+ """
1446
+ Join an iterable of Dseqrecords with this instance as the separator.
1447
+
1448
+ Example:
1449
+
1450
+ >>> sep = Dseqrecord("a")
1451
+ >>> joined = sep.join([Dseqrecord("A"), Dseqrecord("B"), Dseqrecord("C")])
1452
+ >>> joined
1453
+ Dseqrecord(-5)
1454
+ >>> joined.seq
1455
+ Dseq(-5)
1456
+ AaBaC
1457
+ TtVtG
1458
+
1459
+ """
1460
+ it = iter(fragments)
1461
+ try:
1462
+ result = next(it) # first element (no leading separator)
1463
+ except StopIteration:
1464
+ # Empty iterable -> return empty Dseqrecord in analogy with
1465
+ # str.join
1466
+ return Dseqrecord("")
1467
+
1468
+ # Interleave: result = first + sep + x + sep + y + ...
1469
+ for x in it:
1470
+ result = result + self + x
1471
+ return result