pydna 5.5.4__py3-none-any.whl → 5.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydna/dseqrecord.py CHANGED
@@ -11,30 +11,28 @@ Seq and SeqRecord classes, respectively.
11
11
 
12
12
  The Dseq and Dseqrecord classes support the notion of circular and linear DNA topology.
13
13
  """
14
- from Bio.Restriction import RestrictionBatch as _RestrictionBatch
14
+ from Bio.Restriction import RestrictionBatch
15
15
  from Bio.Restriction import CommOnly
16
- from pydna.dseq import Dseq as _Dseq
17
- from pydna._pretty import pretty_str as _pretty_str
18
- from pydna.utils import flatten as _flatten, location_boundaries as _location_boundaries
19
-
20
- # from pydna.utils import memorize as _memorize
21
- from pydna.utils import rc as _rc
22
- from pydna.utils import shift_location as _shift_location
23
- from pydna.utils import shift_feature as _shift_feature
24
- from pydna.common_sub_strings import common_sub_strings as _common_sub_strings
25
- from Bio.SeqFeature import SeqFeature as _SeqFeature
16
+ from pydna.dseq import Dseq
17
+ from pydna._pretty import pretty_str
18
+ from pydna.utils import flatten, location_boundaries
19
+
20
+ from pydna.utils import shift_location
21
+ from pydna.utils import shift_feature
22
+ from pydna.common_sub_strings import common_sub_strings
23
+ from Bio.SeqFeature import SeqFeature
26
24
  from Bio import SeqIO
27
- from Bio.SeqFeature import CompoundLocation as _CompoundLocation
28
- from Bio.SeqFeature import SimpleLocation as _SimpleLocation
29
- from pydna.seqrecord import SeqRecord as _SeqRecord
30
- from Bio.Seq import translate as _translate
31
- from pydna.utils import identifier_from_string as _identifier_from_string
32
- import copy as _copy
33
- import operator as _operator
34
- import os as _os
35
- import re as _re
36
- import time as _time
37
- import datetime as _datetime
25
+ from Bio.SeqFeature import CompoundLocation
26
+ from Bio.SeqFeature import SimpleLocation
27
+ from pydna.seqrecord import SeqRecord
28
+ from Bio.Seq import translate
29
+ from Bio.Seq import Seq as BPSeq
30
+ import copy
31
+ import operator
32
+ import os
33
+ import re
34
+ import time
35
+ import datetime
38
36
  from typing import Union, TYPE_CHECKING
39
37
  from pydna.opencloning_models import SequenceCutSource
40
38
 
@@ -42,20 +40,15 @@ if TYPE_CHECKING: # pragma: no cover
42
40
  from pydna.opencloning_models import Source
43
41
 
44
42
 
45
- # import logging as _logging
46
-
47
- # _module_logger = _logging.getLogger("pydna." + __name__)
48
-
49
-
50
43
  try:
51
- from IPython.display import display_html as _display_html
44
+ from IPython.display import display_html
52
45
  except ImportError:
53
46
 
54
- def _display_html(item, raw=None):
47
+ def display_html(item, raw=None):
55
48
  return item
56
49
 
57
50
 
58
- class Dseqrecord(_SeqRecord):
51
+ class Dseqrecord(SeqRecord):
59
52
  """Dseqrecord is a double stranded version of the Biopython SeqRecord [#]_ class.
60
53
  The Dseqrecord object holds a Dseq object describing the sequence.
61
54
  Additionally, Dseqrecord hold meta information about the sequence in the
@@ -132,7 +125,7 @@ class Dseqrecord(_SeqRecord):
132
125
 
133
126
  """
134
127
 
135
- seq: _Dseq
128
+ seq: Dseq
136
129
  source: Union["Source", None] = None
137
130
 
138
131
  def __init__(
@@ -144,15 +137,12 @@ class Dseqrecord(_SeqRecord):
144
137
  source=None,
145
138
  **kwargs,
146
139
  ):
147
- # _module_logger.info("### Dseqrecord initialized ###")
148
- # _module_logger.info("argument circular = %s", circular)
149
- # _module_logger.info("circular = %s", circular)
150
140
 
151
141
  if isinstance(record, str):
152
- # _module_logger.info("record is a string")
142
+
153
143
  super().__init__(
154
- _Dseq.from_string(
155
- record,
144
+ Dseq.quick(
145
+ record.encode("ascii"),
156
146
  # linear=linear,
157
147
  circular=bool(circular),
158
148
  ),
@@ -166,14 +156,14 @@ class Dseqrecord(_SeqRecord):
166
156
  record = record[:]
167
157
  elif circular is True:
168
158
  record = record.looped()
169
- # _module_logger.info("record is a Dseq object")
159
+
170
160
  super().__init__(record, *args, **kwargs)
171
161
 
172
162
  # record is a Bio.Seq object ?
173
163
  elif hasattr(record, "transcribe"):
174
- # _module_logger.info("record is a Seq object")
164
+
175
165
  super().__init__(
176
- _Dseq(
166
+ Dseq(
177
167
  str(record),
178
168
  # linear=linear,
179
169
  circular=bool(circular),
@@ -184,13 +174,13 @@ class Dseqrecord(_SeqRecord):
184
174
 
185
175
  # record is a Bio.SeqRecord or Dseqrecord object ?
186
176
  elif hasattr(record, "features"):
187
- # _module_logger.info("record is a Bio.SeqRecord or Dseqrecord object")
177
+
188
178
  for key, value in list(record.__dict__.items()):
189
179
  setattr(self, key, value)
190
180
  self.letter_annotations = {}
191
181
  # record.seq is a Dseq object ?
192
182
  if hasattr(record.seq, "watson"):
193
- new_seq = _copy.copy(record.seq)
183
+ new_seq = copy.copy(record.seq)
194
184
  if circular is False:
195
185
  new_seq = new_seq[:]
196
186
  elif circular is True:
@@ -198,7 +188,7 @@ class Dseqrecord(_SeqRecord):
198
188
  self.seq = new_seq
199
189
  # record.seq is Bio.SeqRecord object ?
200
190
  else:
201
- self.seq = _Dseq(
191
+ self.seq = Dseq(
202
192
  str(record.seq),
203
193
  # linear=linear,
204
194
  circular=bool(circular),
@@ -226,16 +216,14 @@ class Dseqrecord(_SeqRecord):
226
216
  # linear=True, circular=False, n = 5E-14, **kwargs):
227
217
  obj = cls.__new__(cls) # Does not call __init__
228
218
  obj._per_letter_annotations = {}
229
- obj.seq = _Dseq.quick(
230
- record,
231
- _rc(record),
232
- ovhg=0,
219
+ obj.seq = Dseq.quick(
220
+ record.encode("ascii"),
233
221
  # linear=linear,
234
222
  circular=circular,
235
223
  )
236
- obj.id = _pretty_str("id")
237
- obj.name = _pretty_str("name")
238
- obj.description = _pretty_str("description")
224
+ obj.id = pretty_str("id")
225
+ obj.name = pretty_str("name")
226
+ obj.description = pretty_str("description")
239
227
  obj.dbxrefs = []
240
228
  obj.annotations = {"molecule_type": "DNA"}
241
229
  obj.features = []
@@ -247,7 +235,7 @@ class Dseqrecord(_SeqRecord):
247
235
  @classmethod
248
236
  def from_SeqRecord(
249
237
  cls,
250
- record: _SeqRecord,
238
+ record: SeqRecord,
251
239
  *args,
252
240
  circular=None,
253
241
  n=5e-14,
@@ -267,9 +255,7 @@ class Dseqrecord(_SeqRecord):
267
255
  obj.source = None
268
256
  if circular is None:
269
257
  circular = record.annotations.get("topology") == "circular"
270
- obj.seq = _Dseq.quick(
271
- str(record.seq), _rc(str(record.seq)), ovhg=0, circular=circular
272
- )
258
+ obj.seq = Dseq.quick(record.seq._data, ovhg=0, circular=circular)
273
259
  return obj
274
260
 
275
261
  @property
@@ -339,14 +325,14 @@ class Dseqrecord(_SeqRecord):
339
325
  qualifiers = {}
340
326
  qualifiers.update(kwargs)
341
327
 
342
- location = _CompoundLocation(
328
+ location = CompoundLocation(
343
329
  (
344
- _SimpleLocation(x, self.seq.length, strand=strand),
345
- _SimpleLocation(0, y, strand=strand),
330
+ SimpleLocation(x, len(self.seq), strand=strand),
331
+ SimpleLocation(0, y, strand=strand),
346
332
  )
347
333
  )
348
334
 
349
- sf = _SeqFeature(location, type=type_, qualifiers=qualifiers)
335
+ sf = SeqFeature(location, type=type_, qualifiers=qualifiers)
350
336
 
351
337
  if "label" not in qualifiers:
352
338
  qualifiers["label"] = [f"ft{len(location)}"]
@@ -395,35 +381,31 @@ class Dseqrecord(_SeqRecord):
395
381
  --------
396
382
  pydna.dseq.Dseq.looped
397
383
  """
398
- new = _copy.copy(self)
399
- # for key, value in list(self.__dict__.items()):
400
- # setattr(new, key, value)
401
- new._seq = self.seq.looped()
402
- five_prime = self.seq.five_prime_end()
403
- for fn, fo in zip(new.features, self.features):
404
- if five_prime[0] == "5'":
405
- pass
406
- # fn.location = fn.location + self.seq.ovhg
407
- elif five_prime[0] == "3'":
408
- fn.location = fn.location + (-self.seq.ovhg)
409
- if fn.location.start < 0:
410
- loc1 = _SimpleLocation(
411
- len(new) + fn.location.start, len(new), strand=fn.location.strand
412
- )
413
- loc2 = _SimpleLocation(0, fn.location.end, strand=fn.location.strand)
414
- fn.location = _CompoundLocation([loc1, loc2])
415
-
416
- if fn.location.end > len(new):
417
- loc1 = _SimpleLocation(
418
- fn.location.start, len(new), strand=fn.location.strand
419
- )
420
- loc2 = _SimpleLocation(
421
- 0, fn.location.end - len(new), strand=fn.location.strand
422
- )
423
- fn.location = _CompoundLocation([loc1, loc2])
424
-
425
- fn.qualifiers = fo.qualifiers
426
-
384
+ new = copy.deepcopy(self)
385
+ new.seq = self.seq.looped()
386
+
387
+ old_length = len(self) # Possibly longer, including sticky ends if any.
388
+ new_length = len(new) # Possibly shorter, with blunt ends.
389
+ if old_length != new_length: # Only False if self was blunt.
390
+ new_features = []
391
+ for fn in new.features:
392
+ if len(fn.location) > new_length:
393
+ # Edge case: if the feature is longer than the sequence, it should be
394
+ # dropped. This can happen in a sequence with overhangs, where the feature
395
+ # spans both overhangs.
396
+ #
397
+ # Example:
398
+ # feature
399
+ # <------>
400
+ # aaACGT
401
+ # TGCAtt
402
+ #
403
+ # Circular sequence ACGTtt should not have that feature, so we drop it
404
+ continue
405
+ fn.location = shift_location(fn.location, 0, new_length)
406
+ new_features.append(fn)
407
+
408
+ new.features = new_features
427
409
  return new
428
410
 
429
411
  def tolinear(self): # pragma: no cover
@@ -445,16 +427,16 @@ class Dseqrecord(_SeqRecord):
445
427
  >>>
446
428
 
447
429
  """
448
- import warnings as _warnings
430
+ import warnings
449
431
  from pydna import _PydnaDeprecationWarning
450
432
 
451
- _warnings.warn(
433
+ warnings.warn(
452
434
  "tolinear method is obsolete; "
453
435
  "please use obj[:] "
454
436
  "instead of obj.tolinear().",
455
437
  _PydnaDeprecationWarning,
456
438
  )
457
- new = _copy.copy(self)
439
+ new = copy.copy(self)
458
440
  for key, value in list(self.__dict__.items()):
459
441
  setattr(new, key, value)
460
442
  # new._seq = self.seq.tolinear()
@@ -465,15 +447,29 @@ class Dseqrecord(_SeqRecord):
465
447
 
466
448
  def terminal_transferase(self, nucleotides="a"):
467
449
  """docstring."""
468
- newseq = _copy.deepcopy(self)
450
+ newseq = copy.deepcopy(self)
469
451
  newseq.seq = self.seq.terminal_transferase(nucleotides)
470
452
  for feature in newseq.features:
471
453
  feature.location += len(nucleotides)
472
454
  return newseq
473
455
 
474
- def format(self, f="gb"):
456
+ def format(self, format: str = "gb"):
475
457
  """Returns the sequence as a string using a format supported by Biopython
476
458
  SeqIO [#]_. Default is "gb" which is short for Genbank.
459
+ Allowed Formats are for example:
460
+
461
+ * "fasta": The standard FASTA format.
462
+ * "fasta-2line": No line wrapping and exactly two lines per record.
463
+ * "genbank" (or "gb"): The GenBank flat file format.
464
+ * "embl": The EMBL flat file format.
465
+ * "imgt": The IMGT variant of the EMBL format.
466
+
467
+ The format string can be modified with the keyword "dscode" if
468
+ the underlying dscode string is desired in the output. for example:
469
+ ::
470
+
471
+ Dseqrecord("PEXIGATCQFZJ").format("fasta-2line dscode")
472
+
477
473
 
478
474
  Examples
479
475
  --------
@@ -495,6 +491,12 @@ class Dseqrecord(_SeqRecord):
495
491
  ORIGIN
496
492
  1 aaa
497
493
  //
494
+ >>> print(Dseqrecord("PEXIGATCQFZJ").format("fasta-2line"))
495
+ >id description
496
+ GATCGATCGATC
497
+ >>> print(Dseqrecord("PEXIGATCQFZJ").format("fasta-2line dscode"))
498
+ >id description
499
+ PEXIGATCQFZJ
498
500
 
499
501
 
500
502
  References
@@ -504,13 +506,19 @@ class Dseqrecord(_SeqRecord):
504
506
 
505
507
 
506
508
  """
507
-
508
- record = _copy.deepcopy(self)
509
- if f in ("genbank", "gb") and self.circular:
509
+ record = copy.deepcopy(self)
510
+ if "dscode" in format:
511
+ format = format.replace("dscode", "")
512
+ obj = BPSeq("")
513
+ obj._data = record.seq._data
514
+ record.seq = obj
515
+ format = format.strip(" -")
516
+ if format in ("genbank", "gb") and self.circular:
510
517
  record.annotations["topology"] = "circular"
511
518
  else:
512
519
  record.annotations["topology"] = "linear"
513
- return _SeqRecord.format(record, f).strip()
520
+
521
+ return SeqRecord.format(record, format).strip()
514
522
 
515
523
  def write(self, filename=None, f="gb"):
516
524
  """Writes the Dseqrecord to a file using the format f, which must
@@ -543,9 +551,9 @@ class Dseqrecord(_SeqRecord):
543
551
  # generate a name if no name was given
544
552
  # if not isinstance(filename, str): # is filename a string???
545
553
  # raise ValueError("filename has to be a string, got", type(filename))
546
- name, ext = _os.path.splitext(filename)
554
+ name, ext = os.path.splitext(filename)
547
555
  msg = f"<font face=monospace><a href='{filename}' target='_blank'>{filename}</a></font><br>"
548
- if not _os.path.isfile(filename):
556
+ if not os.path.isfile(filename):
549
557
  with open(filename, "w", encoding="utf8") as fp:
550
558
  fp.write(self.format(f))
551
559
  else:
@@ -556,16 +564,16 @@ class Dseqrecord(_SeqRecord):
556
564
  if self.seq != old_file.seq:
557
565
  # If new sequence is different, the old file is
558
566
  # renamed with "_OLD_" suffix:
559
- oldmtime = _datetime.datetime.fromtimestamp(
560
- _os.path.getmtime(filename)
567
+ oldmtime = datetime.datetime.fromtimestamp(
568
+ os.path.getmtime(filename)
561
569
  ).isoformat()
562
- tstmp = int(_time.time() * 1_000_000)
570
+ tstmp = int(time.time() * 1_000_000)
563
571
  old_filename = f"{name}_OLD_{tstmp}{ext}"
564
- _os.rename(filename, old_filename)
572
+ os.rename(filename, old_filename)
565
573
  with open(filename, "w", encoding="utf8") as fp:
566
574
  fp.write(self.format(f))
567
- newmtime = _datetime.datetime.fromtimestamp(
568
- _os.path.getmtime(filename)
575
+ newmtime = datetime.datetime.fromtimestamp(
576
+ os.path.getmtime(filename)
569
577
  ).isoformat()
570
578
  msg = f"""
571
579
  <table style="padding:10px 10px;
@@ -611,8 +619,8 @@ class Dseqrecord(_SeqRecord):
611
619
  elif "seguid" in old_file.annotations.get("comment", ""):
612
620
  pattern = r"(ldseguid|cdseguid)-(\S{27})(_[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}){0,1}"
613
621
  # seguid=NNNNNNNNNNNNNNNNNNNNNNNNNNN_2020-10-10T11:11:11.111111
614
- oldstamp = _re.search(pattern, old_file.description)
615
- newstamp = _re.search(pattern, self.description)
622
+ oldstamp = re.search(pattern, old_file.description)
623
+ newstamp = re.search(pattern, self.description)
616
624
  newdescription = self.description
617
625
  if oldstamp and newstamp:
618
626
  if oldstamp.group(0)[:35] == newstamp.group(0)[:35]:
@@ -621,7 +629,7 @@ class Dseqrecord(_SeqRecord):
621
629
  )
622
630
  elif oldstamp:
623
631
  newdescription += " " + oldstamp.group(0)
624
- newobj = _copy.copy(self)
632
+ newobj = copy.copy(self)
625
633
  newobj.description = newdescription
626
634
 
627
635
  with open(filename, "w", encoding="utf8") as fp:
@@ -629,7 +637,7 @@ class Dseqrecord(_SeqRecord):
629
637
  else:
630
638
  with open(filename, "w", encoding="utf8") as fp:
631
639
  fp.write(self.format(f))
632
- return _display_html(msg, raw=True)
640
+ return display_html(msg, raw=True)
633
641
 
634
642
  def find(self, other):
635
643
  # TODO allow strings, seqs, seqrecords or Dseqrecords
@@ -647,7 +655,7 @@ class Dseqrecord(_SeqRecord):
647
655
  def __str__(self):
648
656
  return ("Dseqrecord\n" "circular: {}\n" "size: {}\n").format(
649
657
  self.circular, len(self)
650
- ) + _SeqRecord.__str__(self)
658
+ ) + SeqRecord.__str__(self)
651
659
 
652
660
  def __contains__(self, other):
653
661
  if other.lower() in str(self.seq).lower():
@@ -658,7 +666,7 @@ class Dseqrecord(_SeqRecord):
658
666
  spc = 3 - ln % 3 if ln % 3 else 0
659
667
  s = "n" * spc + s + "nnn"
660
668
  for frame in range(3):
661
- if other.lower() in _translate(s[frame : frame + spc + ln]).lower():
669
+ if other.lower() in translate(s[frame : frame + spc + ln]).lower():
662
670
  return True
663
671
  return False
664
672
 
@@ -667,13 +675,13 @@ class Dseqrecord(_SeqRecord):
667
675
  >>> from pydna.dseqrecord import Dseqrecord
668
676
  >>> s=Dseqrecord("atgtacgatcgtatgctggttatattttag")
669
677
  >>> s.seq.translate()
670
- Seq('MYDRMLVIF*')
678
+ ProteinSeq('MYDRMLVIF*')
671
679
  >>> "RML" in s
672
680
  True
673
681
  >>> "MMM" in s
674
682
  False
675
683
  >>> s.seq.rc().translate()
676
- Seq('LKYNQHTIVH')
684
+ ProteinSeq('LKYNQHTIVH')
677
685
  >>> "QHT" in s.rc()
678
686
  True
679
687
  >>> "QHT" in s
@@ -689,7 +697,7 @@ class Dseqrecord(_SeqRecord):
689
697
  cgtatgctg
690
698
  gcatacgac
691
699
  >>> code.translate()
692
- Seq('RML')
700
+ ProteinSeq('RML')
693
701
  """
694
702
  other = str(other).lower()
695
703
  assert self.seq.watson == "".join(self.seq.watson.split())
@@ -700,7 +708,7 @@ class Dseqrecord(_SeqRecord):
700
708
  start = None
701
709
  for frame in range(3):
702
710
  try:
703
- start = _translate(s[frame : frame + ln + spc]).lower().index(other)
711
+ start = translate(s[frame : frame + ln + spc]).lower().index(other)
704
712
  break
705
713
  except ValueError:
706
714
  pass
@@ -748,7 +756,7 @@ class Dseqrecord(_SeqRecord):
748
756
  matching_reads = []
749
757
 
750
758
  for read_ in reads:
751
- matches = _common_sub_strings(str(self.seq).lower(), str(read_.seq), limit)
759
+ matches = common_sub_strings(str(self.seq).lower(), str(read_.seq), limit)
752
760
 
753
761
  if not matches:
754
762
  continue
@@ -769,14 +777,14 @@ class Dseqrecord(_SeqRecord):
769
777
  if len(newmatches) > 1:
770
778
  ms = []
771
779
  for m in newmatches:
772
- ms.append(_SimpleLocation(m[0], m[0] + m[2]))
773
- loc = _CompoundLocation(ms)
780
+ ms.append(SimpleLocation(m[0], m[0] + m[2]))
781
+ loc = CompoundLocation(ms)
774
782
  else:
775
783
  a, b, c = newmatches[0]
776
- loc = _SimpleLocation(a, a + c)
784
+ loc = SimpleLocation(a, a + c)
777
785
 
778
786
  self.features.append(
779
- _SeqFeature(
787
+ SeqFeature(
780
788
  loc,
781
789
  qualifiers={"label": [read_.annotations["filename"]]},
782
790
  type="trace",
@@ -786,9 +794,8 @@ class Dseqrecord(_SeqRecord):
786
794
  return [x.annotations["filename"] for x in matching_reads]
787
795
 
788
796
  def __repr__(self):
789
- return "Dseqrecord({}{})".format(
790
- {True: "-", False: "o"}[not self.circular], len(self)
791
- )
797
+ top = {True: "-", False: "o"}[not self.circular]
798
+ return f"{self.__class__.__name__}({top}{len(self)})"
792
799
 
793
800
  def _repr_pretty_(self, p, cycle):
794
801
  p.text(
@@ -799,7 +806,7 @@ class Dseqrecord(_SeqRecord):
799
806
 
800
807
  def __add__(self, other):
801
808
  if hasattr(other, "seq") and hasattr(other.seq, "watson"):
802
- other = _copy.deepcopy(other)
809
+ other = copy.deepcopy(other)
803
810
  other_five_prime = other.seq.five_prime_end()
804
811
  if other_five_prime[0] == "5'":
805
812
  # add other.seq.ovhg
@@ -810,10 +817,10 @@ class Dseqrecord(_SeqRecord):
810
817
  for f in other.features:
811
818
  f.location = f.location + (-other.seq.ovhg)
812
819
 
813
- answer = Dseqrecord(_SeqRecord.__add__(self, other))
820
+ answer = Dseqrecord(SeqRecord.__add__(self, other))
814
821
  answer.n = min(self.n, other.n)
815
822
  else:
816
- answer = Dseqrecord(_SeqRecord.__add__(self, Dseqrecord(other)))
823
+ answer = Dseqrecord(SeqRecord.__add__(self, Dseqrecord(other)))
817
824
  answer.n = self.n
818
825
  return answer
819
826
 
@@ -827,7 +834,7 @@ class Dseqrecord(_SeqRecord):
827
834
  if self.circular:
828
835
  raise TypeError("TypeError: can't multiply circular Dseqrecord.")
829
836
  if number > 0:
830
- new = _copy.deepcopy(self)
837
+ new = copy.deepcopy(self)
831
838
  for i in range(1, number):
832
839
  new += self
833
840
  new._per_letter_annotations = self._per_letter_annotations
@@ -837,7 +844,7 @@ class Dseqrecord(_SeqRecord):
837
844
 
838
845
  def __getitem__(self, sl):
839
846
  """docstring."""
840
- answer = Dseqrecord(_copy.copy(self))
847
+ answer = Dseqrecord(copy.copy(self))
841
848
  answer.seq = self.seq.__getitem__(sl)
842
849
  # answer.seq.alphabet = self.seq.alphabet
843
850
  # breakpoint()
@@ -859,9 +866,9 @@ class Dseqrecord(_SeqRecord):
859
866
  f
860
867
  for f in answer.features
861
868
  if (
862
- _location_boundaries(f.location)[1] <= answer.seq.length
863
- and _location_boundaries(f.location)[0]
864
- < _location_boundaries(f.location)[1]
869
+ location_boundaries(f.location)[1] <= len(answer.seq)
870
+ and location_boundaries(f.location)[0]
871
+ < location_boundaries(f.location)[1]
865
872
  )
866
873
  ]
867
874
 
@@ -870,15 +877,6 @@ class Dseqrecord(_SeqRecord):
870
877
  return self.apply_cut(cut, cut)
871
878
  else:
872
879
  answer = Dseqrecord("")
873
- identifier = "part_{id}".format(id=self.id)
874
- if answer.features:
875
- sf = max(answer.features, key=len) # default
876
- if "label" in sf.qualifiers:
877
- identifier = " ".join(sf.qualifiers["label"])
878
- elif "note" in sf.qualifiers:
879
- identifier = " ".join(sf.qualifiers["note"])
880
- answer.id = _identifier_from_string(identifier)[:16]
881
- answer.name = _identifier_from_string("part_{name}".format(name=self.name))[:16]
882
880
  return answer
883
881
 
884
882
  def __eq__(self, other):
@@ -920,43 +918,34 @@ class Dseqrecord(_SeqRecord):
920
918
  answer.name = answer.id[:16]
921
919
  return fragments[0]
922
920
 
923
- def no_cutters(self, batch: _RestrictionBatch = None):
921
+ def no_cutters(self, batch: RestrictionBatch = None):
924
922
  """docstring."""
925
923
  return self.seq.no_cutters(batch=batch or CommOnly)
926
924
 
927
- def unique_cutters(self, batch: _RestrictionBatch = None):
925
+ def unique_cutters(self, batch: RestrictionBatch = None):
928
926
  """docstring."""
929
927
  return self.seq.unique_cutters(batch=batch or CommOnly)
930
928
 
931
- def once_cutters(self, batch: _RestrictionBatch = None):
929
+ def once_cutters(self, batch: RestrictionBatch = None):
932
930
  """docstring."""
933
931
  return self.seq.once_cutters(batch=batch or CommOnly)
934
932
 
935
- def twice_cutters(self, batch: _RestrictionBatch = None):
933
+ def twice_cutters(self, batch: RestrictionBatch = None):
936
934
  """docstring."""
937
935
  return self.seq.twice_cutters(batch=batch or CommOnly)
938
936
 
939
- def n_cutters(self, n=3, batch: _RestrictionBatch = None):
937
+ def n_cutters(self, n=3, batch: RestrictionBatch = None):
940
938
  """docstring."""
941
939
  return self.seq.n_cutters(n=n, batch=batch or CommOnly)
942
940
 
943
- def cutters(self, batch: _RestrictionBatch = None):
941
+ def cutters(self, batch: RestrictionBatch = None):
944
942
  """docstring."""
945
943
  return self.seq.cutters(batch=batch or CommOnly)
946
944
 
947
945
  def number_of_cuts(self, *enzymes):
948
946
  """The number of cuts by digestion with the Restriction enzymes
949
947
  contained in the iterable."""
950
- return sum([len(enzyme.search(self.seq)) for enzyme in _flatten(enzymes)])
951
-
952
- def cas9(self, RNA: str):
953
- """docstring."""
954
- fragments = []
955
- result = []
956
- for target in (self.seq, self.seq.rc()):
957
- fragments = [self[sl.start : sl.stop] for sl in target.cas9(RNA)]
958
- result.append(fragments)
959
- return result
948
+ return sum([len(enzyme.search(self.seq)) for enzyme in flatten(enzymes)])
960
949
 
961
950
  def reverse_complement(self):
962
951
  """Reverse complement.
@@ -1033,7 +1022,7 @@ class Dseqrecord(_SeqRecord):
1033
1022
  if not self.circular:
1034
1023
  raise TypeError("Only circular DNA can be synced!")
1035
1024
 
1036
- newseq = _copy.copy(self)
1025
+ newseq = copy.copy(self)
1037
1026
 
1038
1027
  s = str(self.seq.watson).lower()
1039
1028
  s_rc = str(self.seq.crick).lower()
@@ -1049,8 +1038,8 @@ class Dseqrecord(_SeqRecord):
1049
1038
 
1050
1039
  lim = min(limit, limit * (len(s) // limit) + 1)
1051
1040
 
1052
- c = _common_sub_strings(s + s, r, limit=lim)
1053
- d = _common_sub_strings(s_rc + s_rc, r, limit=lim)
1041
+ c = common_sub_strings(s + s, r, limit=lim)
1042
+ d = common_sub_strings(s_rc + s_rc, r, limit=lim)
1054
1043
 
1055
1044
  c = [(x[0], x[2]) for x in c if x[1] == 0]
1056
1045
  d = [(x[0], x[2]) for x in d if x[1] == 0]
@@ -1076,7 +1065,7 @@ class Dseqrecord(_SeqRecord):
1076
1065
  result = newseq
1077
1066
  else:
1078
1067
  result = newseq.shifted(start)
1079
- # _module_logger.info("synced")
1068
+
1080
1069
  return result
1081
1070
 
1082
1071
  def upper(self):
@@ -1105,7 +1094,7 @@ class Dseqrecord(_SeqRecord):
1105
1094
  --------
1106
1095
  pydna.dseqrecord.Dseqrecord.lower"""
1107
1096
 
1108
- upper = _copy.deepcopy(self)
1097
+ upper = copy.deepcopy(self)
1109
1098
  # This is because the @seq.setter methods otherwise sets the _per_letter_annotations to an empty dict
1110
1099
  prev_per_letter_annotation = upper._per_letter_annotations
1111
1100
  upper.seq = upper.seq.upper()
@@ -1139,7 +1128,7 @@ class Dseqrecord(_SeqRecord):
1139
1128
  pydna.dseqrecord.Dseqrecord.upper
1140
1129
 
1141
1130
  """
1142
- lower = _copy.deepcopy(self)
1131
+ lower = copy.deepcopy(self)
1143
1132
  prev_per_letter_annotation = lower._per_letter_annotations
1144
1133
  lower.seq = lower.seq.lower()
1145
1134
  lower._per_letter_annotations = prev_per_letter_annotation
@@ -1157,8 +1146,8 @@ class Dseqrecord(_SeqRecord):
1157
1146
  orf = self[x:y]
1158
1147
  prt = orf.translate()
1159
1148
  features.append(
1160
- _SeqFeature(
1161
- _SimpleLocation(x, y, strand=strand),
1149
+ SeqFeature(
1150
+ SimpleLocation(x, y, strand=strand),
1162
1151
  type="CDS",
1163
1152
  qualifiers={
1164
1153
  "note": f"{y - x}bp {(y - x) // 3}aa",
@@ -1196,11 +1185,11 @@ class Dseqrecord(_SeqRecord):
1196
1185
  if self.features:
1197
1186
  f = self.features[feature]
1198
1187
  locations = sorted(
1199
- self.features[feature].location.parts, key=_SimpleLocation.start.fget
1188
+ self.features[feature].location.parts, key=SimpleLocation.start.fget
1200
1189
  )
1201
1190
  strand = f.location.strand
1202
1191
  else:
1203
- locations = [_SimpleLocation(0, 0, 1)]
1192
+ locations = [SimpleLocation(0, 0, 1)]
1204
1193
  strand = 1
1205
1194
 
1206
1195
  ovhg = self.seq.ovhg + len(self.seq.watson) - len(self.seq.crick)
@@ -1231,7 +1220,7 @@ class Dseqrecord(_SeqRecord):
1231
1220
  result += f"{s1}\n{s2}"
1232
1221
  else:
1233
1222
  result += f"{s2}\n{s1}"
1234
- return _pretty_str(result)
1223
+ return pretty_str(result)
1235
1224
 
1236
1225
  def shifted(self, shift):
1237
1226
  """Circular Dseqrecord with a new origin <shift>.
@@ -1284,15 +1273,15 @@ class Dseqrecord(_SeqRecord):
1284
1273
  )
1285
1274
  ln = len(self)
1286
1275
  if not shift % ln:
1287
- return _copy.deepcopy(self) # shift is a multiple of ln or 0
1276
+ return copy.deepcopy(self) # shift is a multiple of ln or 0
1288
1277
  else:
1289
1278
  shift %= ln # 0<=shift<=ln
1290
1279
  newseq = (self.seq[shift:] + self.seq[:shift]).looped()
1291
- newfeatures = _copy.deepcopy(self.features)
1280
+ newfeatures = copy.deepcopy(self.features)
1292
1281
  for feature in newfeatures:
1293
- feature.location = _shift_location(feature.location, -shift, ln)
1294
- newfeatures.sort(key=_operator.attrgetter("location.start"))
1295
- answer = _copy.deepcopy(self)
1282
+ feature.location = shift_location(feature.location, -shift, ln)
1283
+ newfeatures.sort(key=operator.attrgetter("location.start"))
1284
+ answer = copy.deepcopy(self)
1296
1285
  answer.features = newfeatures
1297
1286
  answer.seq = newseq
1298
1287
  return answer
@@ -1346,7 +1335,7 @@ class Dseqrecord(_SeqRecord):
1346
1335
  if left_cut == right_cut:
1347
1336
  # Not really a cut, but to handle the general case
1348
1337
  if left_cut is None:
1349
- features = _copy.deepcopy(self.features)
1338
+ features = copy.deepcopy(self.features)
1350
1339
  else:
1351
1340
  # The features that span the origin if shifting with left_cut, but that do not cross
1352
1341
  # the cut site should be included, and if there is a feature within the cut site, it should
@@ -1369,7 +1358,7 @@ class Dseqrecord(_SeqRecord):
1369
1358
  initial_shift = left_watson if left_ovhg < 0 else left_crick
1370
1359
  features = self.shifted(initial_shift).features
1371
1360
  # for f in features:
1372
- # print(f.id, f.location, _location_boundaries(f.location))
1361
+ # print(f.id, f.location, location_boundaries(f.location))
1373
1362
  # Here, we have done what's shown below (* indicates the origin).
1374
1363
  # The features 0 and 2 have the right location for the final product:
1375
1364
  #
@@ -1383,10 +1372,10 @@ class Dseqrecord(_SeqRecord):
1383
1372
  features_need_transfer = [
1384
1373
  f
1385
1374
  for f in features
1386
- if (_location_boundaries(f.location)[1] <= abs(left_ovhg))
1375
+ if (location_boundaries(f.location)[1] <= abs(left_ovhg))
1387
1376
  ]
1388
1377
  features_need_transfer = [
1389
- _shift_feature(f, -abs(left_ovhg), len(self))
1378
+ shift_feature(f, -abs(left_ovhg), len(self))
1390
1379
  for f in features_need_transfer
1391
1380
  ]
1392
1381
 
@@ -1403,7 +1392,7 @@ class Dseqrecord(_SeqRecord):
1403
1392
  # as the original one. However, the final product is longer because of the overhang.
1404
1393
 
1405
1394
  features += [
1406
- _shift_feature(f, abs(left_ovhg), len(dseq))
1395
+ shift_feature(f, abs(left_ovhg), len(dseq))
1407
1396
  for f in features_need_transfer
1408
1397
  ]
1409
1398
  # ^ ^^^^^^^^^
@@ -1415,9 +1404,9 @@ class Dseqrecord(_SeqRecord):
1415
1404
  f
1416
1405
  for f in features
1417
1406
  if (
1418
- _location_boundaries(f.location)[1] <= len(dseq)
1419
- and _location_boundaries(f.location)[0]
1420
- <= _location_boundaries(f.location)[1]
1407
+ location_boundaries(f.location)[1] <= len(dseq)
1408
+ and location_boundaries(f.location)[0]
1409
+ <= location_boundaries(f.location)[1]
1421
1410
  )
1422
1411
  ]
1423
1412
  else:
@@ -1468,3 +1457,32 @@ class Dseqrecord(_SeqRecord):
1468
1457
  if self.source is None:
1469
1458
  return ""
1470
1459
  return self.source.history_string(self)
1460
+
1461
+ def join(self, fragments):
1462
+ """
1463
+ Join an iterable of Dseqrecords with this instance as the separator.
1464
+
1465
+ Example:
1466
+
1467
+ >>> sep = Dseqrecord("a")
1468
+ >>> joined = sep.join([Dseqrecord("A"), Dseqrecord("B"), Dseqrecord("C")])
1469
+ >>> joined
1470
+ Dseqrecord(-5)
1471
+ >>> joined.seq
1472
+ Dseq(-5)
1473
+ AaBaC
1474
+ TtVtG
1475
+
1476
+ """
1477
+ it = iter(fragments)
1478
+ try:
1479
+ result = next(it) # first element (no leading separator)
1480
+ except StopIteration:
1481
+ # Empty iterable -> return empty Dseqrecord in analogy with
1482
+ # str.join
1483
+ return Dseqrecord("")
1484
+
1485
+ # Interleave: result = first + sep + x + sep + y + ...
1486
+ for x in it:
1487
+ result = result + self + x
1488
+ return result