pydna 5.5.3__py3-none-any.whl → 5.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydna/dseqrecord.py CHANGED
@@ -11,46 +11,44 @@ Seq and SeqRecord classes, respectively.
11
11
 
12
12
  The Dseq and Dseqrecord classes support the notion of circular and linear DNA topology.
13
13
  """
14
- from Bio.Restriction import RestrictionBatch as _RestrictionBatch
14
+ from Bio.Restriction import RestrictionBatch
15
15
  from Bio.Restriction import CommOnly
16
- from pydna.dseq import Dseq as _Dseq
17
- from pydna._pretty import pretty_str as _pretty_str
18
- from pydna.utils import flatten as _flatten, location_boundaries as _location_boundaries
19
-
20
- # from pydna.utils import memorize as _memorize
21
- from pydna.utils import rc as _rc
22
- from pydna.utils import shift_location as _shift_location
23
- from pydna.utils import shift_feature as _shift_feature
24
- from pydna.common_sub_strings import common_sub_strings as _common_sub_strings
25
- from Bio.SeqFeature import SeqFeature as _SeqFeature
16
+ from pydna.dseq import Dseq
17
+ from pydna._pretty import pretty_str
18
+ from pydna.utils import flatten, location_boundaries
19
+
20
+ from pydna.utils import shift_location
21
+ from pydna.utils import shift_feature
22
+ from pydna.common_sub_strings import common_sub_strings
23
+ from Bio.SeqFeature import SeqFeature
26
24
  from Bio import SeqIO
27
- from Bio.SeqFeature import CompoundLocation as _CompoundLocation
28
- from Bio.SeqFeature import SimpleLocation as _SimpleLocation
29
- from pydna.seqrecord import SeqRecord as _SeqRecord
30
- from Bio.Seq import translate as _translate
31
- from pydna.utils import identifier_from_string as _identifier_from_string
32
- import copy as _copy
33
- import operator as _operator
34
- import os as _os
35
- import re as _re
36
- import time as _time
37
- import datetime as _datetime
38
-
39
-
40
- # import logging as _logging
41
-
42
- # _module_logger = _logging.getLogger("pydna." + __name__)
25
+ from Bio.SeqFeature import CompoundLocation
26
+ from Bio.SeqFeature import SimpleLocation
27
+ from pydna.seqrecord import SeqRecord
28
+ from Bio.Seq import translate
29
+ from pydna.utils import identifier_from_string
30
+ import copy
31
+ import operator
32
+ import os
33
+ import re
34
+ import time
35
+ import datetime
36
+ from typing import Union, TYPE_CHECKING
37
+ from pydna.opencloning_models import SequenceCutSource
38
+
39
+ if TYPE_CHECKING: # pragma: no cover
40
+ from pydna.opencloning_models import Source
43
41
 
44
42
 
45
43
  try:
46
- from IPython.display import display_html as _display_html
44
+ from IPython.display import display_html
47
45
  except ImportError:
48
46
 
49
- def _display_html(item, raw=None):
47
+ def display_html(item, raw=None):
50
48
  return item
51
49
 
52
50
 
53
- class Dseqrecord(_SeqRecord):
51
+ class Dseqrecord(SeqRecord):
54
52
  """Dseqrecord is a double stranded version of the Biopython SeqRecord [#]_ class.
55
53
  The Dseqrecord object holds a Dseq object describing the sequence.
56
54
  Additionally, Dseqrecord hold meta information about the sequence in the
@@ -127,7 +125,8 @@ class Dseqrecord(_SeqRecord):
127
125
 
128
126
  """
129
127
 
130
- seq: _Dseq
128
+ seq: Dseq
129
+ source: Union["Source", None] = None
131
130
 
132
131
  def __init__(
133
132
  self,
@@ -135,17 +134,15 @@ class Dseqrecord(_SeqRecord):
135
134
  *args,
136
135
  circular=None,
137
136
  n=5e-14, # mol ( = 0.05 pmol)
137
+ source=None,
138
138
  **kwargs,
139
139
  ):
140
- # _module_logger.info("### Dseqrecord initialized ###")
141
- # _module_logger.info("argument circular = %s", circular)
142
- # _module_logger.info("circular = %s", circular)
143
140
 
144
141
  if isinstance(record, str):
145
- # _module_logger.info("record is a string")
142
+
146
143
  super().__init__(
147
- _Dseq.from_string(
148
- record,
144
+ Dseq.quick(
145
+ record.encode("ascii"),
149
146
  # linear=linear,
150
147
  circular=bool(circular),
151
148
  ),
@@ -159,14 +156,14 @@ class Dseqrecord(_SeqRecord):
159
156
  record = record[:]
160
157
  elif circular is True:
161
158
  record = record.looped()
162
- # _module_logger.info("record is a Dseq object")
159
+
163
160
  super().__init__(record, *args, **kwargs)
164
161
 
165
162
  # record is a Bio.Seq object ?
166
163
  elif hasattr(record, "transcribe"):
167
- # _module_logger.info("record is a Seq object")
164
+
168
165
  super().__init__(
169
- _Dseq(
166
+ Dseq(
170
167
  str(record),
171
168
  # linear=linear,
172
169
  circular=bool(circular),
@@ -177,13 +174,13 @@ class Dseqrecord(_SeqRecord):
177
174
 
178
175
  # record is a Bio.SeqRecord or Dseqrecord object ?
179
176
  elif hasattr(record, "features"):
180
- # _module_logger.info("record is a Bio.SeqRecord or Dseqrecord object")
177
+
181
178
  for key, value in list(record.__dict__.items()):
182
179
  setattr(self, key, value)
183
180
  self.letter_annotations = {}
184
181
  # record.seq is a Dseq object ?
185
182
  if hasattr(record.seq, "watson"):
186
- new_seq = _copy.copy(record.seq)
183
+ new_seq = copy.copy(record.seq)
187
184
  if circular is False:
188
185
  new_seq = new_seq[:]
189
186
  elif circular is True:
@@ -191,7 +188,7 @@ class Dseqrecord(_SeqRecord):
191
188
  self.seq = new_seq
192
189
  # record.seq is Bio.SeqRecord object ?
193
190
  else:
194
- self.seq = _Dseq(
191
+ self.seq = Dseq(
195
192
  str(record.seq),
196
193
  # linear=linear,
197
194
  circular=bool(circular),
@@ -202,6 +199,7 @@ class Dseqrecord(_SeqRecord):
202
199
  self.map_target = None
203
200
  self.n = n # amount, set to 5E-14 which is 5 pmols
204
201
  self.annotations.update({"molecule_type": "DNA"})
202
+ self.source = source
205
203
 
206
204
  @classmethod
207
205
  def from_string(
@@ -218,16 +216,14 @@ class Dseqrecord(_SeqRecord):
218
216
  # linear=True, circular=False, n = 5E-14, **kwargs):
219
217
  obj = cls.__new__(cls) # Does not call __init__
220
218
  obj._per_letter_annotations = {}
221
- obj.seq = _Dseq.quick(
222
- record,
223
- _rc(record),
224
- ovhg=0,
219
+ obj.seq = Dseq.quick(
220
+ record.encode("ascii"),
225
221
  # linear=linear,
226
222
  circular=circular,
227
223
  )
228
- obj.id = _pretty_str("id")
229
- obj.name = _pretty_str("name")
230
- obj.description = _pretty_str("description")
224
+ obj.id = pretty_str("id")
225
+ obj.name = pretty_str("name")
226
+ obj.description = pretty_str("description")
231
227
  obj.dbxrefs = []
232
228
  obj.annotations = {"molecule_type": "DNA"}
233
229
  obj.features = []
@@ -239,7 +235,7 @@ class Dseqrecord(_SeqRecord):
239
235
  @classmethod
240
236
  def from_SeqRecord(
241
237
  cls,
242
- record: _SeqRecord,
238
+ record: SeqRecord,
243
239
  *args,
244
240
  circular=None,
245
241
  n=5e-14,
@@ -256,11 +252,10 @@ class Dseqrecord(_SeqRecord):
256
252
  obj.features = record.features
257
253
  obj.map_target = None
258
254
  obj.n = n
255
+ obj.source = None
259
256
  if circular is None:
260
257
  circular = record.annotations.get("topology") == "circular"
261
- obj.seq = _Dseq.quick(
262
- str(record.seq), _rc(str(record.seq)), ovhg=0, circular=circular
263
- )
258
+ obj.seq = Dseq.quick(record.seq._data, ovhg=0, circular=circular)
264
259
  return obj
265
260
 
266
261
  @property
@@ -330,14 +325,14 @@ class Dseqrecord(_SeqRecord):
330
325
  qualifiers = {}
331
326
  qualifiers.update(kwargs)
332
327
 
333
- location = _CompoundLocation(
328
+ location = CompoundLocation(
334
329
  (
335
- _SimpleLocation(x, self.seq.length, strand=strand),
336
- _SimpleLocation(0, y, strand=strand),
330
+ SimpleLocation(x, len(self.seq), strand=strand),
331
+ SimpleLocation(0, y, strand=strand),
337
332
  )
338
333
  )
339
334
 
340
- sf = _SeqFeature(location, type=type_, qualifiers=qualifiers)
335
+ sf = SeqFeature(location, type=type_, qualifiers=qualifiers)
341
336
 
342
337
  if "label" not in qualifiers:
343
338
  qualifiers["label"] = [f"ft{len(location)}"]
@@ -386,35 +381,31 @@ class Dseqrecord(_SeqRecord):
386
381
  --------
387
382
  pydna.dseq.Dseq.looped
388
383
  """
389
- new = _copy.copy(self)
390
- # for key, value in list(self.__dict__.items()):
391
- # setattr(new, key, value)
392
- new._seq = self.seq.looped()
393
- five_prime = self.seq.five_prime_end()
394
- for fn, fo in zip(new.features, self.features):
395
- if five_prime[0] == "5'":
396
- pass
397
- # fn.location = fn.location + self.seq.ovhg
398
- elif five_prime[0] == "3'":
399
- fn.location = fn.location + (-self.seq.ovhg)
400
- if fn.location.start < 0:
401
- loc1 = _SimpleLocation(
402
- len(new) + fn.location.start, len(new), strand=fn.location.strand
403
- )
404
- loc2 = _SimpleLocation(0, fn.location.end, strand=fn.location.strand)
405
- fn.location = _CompoundLocation([loc1, loc2])
406
-
407
- if fn.location.end > len(new):
408
- loc1 = _SimpleLocation(
409
- fn.location.start, len(new), strand=fn.location.strand
410
- )
411
- loc2 = _SimpleLocation(
412
- 0, fn.location.end - len(new), strand=fn.location.strand
413
- )
414
- fn.location = _CompoundLocation([loc1, loc2])
415
-
416
- fn.qualifiers = fo.qualifiers
417
-
384
+ new = copy.deepcopy(self)
385
+ new.seq = self.seq.looped()
386
+
387
+ old_length = len(self) # Possibly longer, including sticky ends if any.
388
+ new_length = len(new) # Possibly shorter, with blunt ends.
389
+ if old_length != new_length: # Only False if self was blunt.
390
+ new_features = []
391
+ for fn in new.features:
392
+ if len(fn.location) > new_length:
393
+ # Edge case: if the feature is longer than the sequence, it should be
394
+ # dropped. This can happen in a sequence with overhangs, where the feature
395
+ # spans both overhangs.
396
+ #
397
+ # Example:
398
+ # feature
399
+ # <------>
400
+ # aaACGT
401
+ # TGCAtt
402
+ #
403
+ # Circular sequence ACGTtt should not have that feature, so we drop it
404
+ continue
405
+ fn.location = shift_location(fn.location, 0, new_length)
406
+ new_features.append(fn)
407
+
408
+ new.features = new_features
418
409
  return new
419
410
 
420
411
  def tolinear(self): # pragma: no cover
@@ -436,16 +427,16 @@ class Dseqrecord(_SeqRecord):
436
427
  >>>
437
428
 
438
429
  """
439
- import warnings as _warnings
430
+ import warnings
440
431
  from pydna import _PydnaDeprecationWarning
441
432
 
442
- _warnings.warn(
433
+ warnings.warn(
443
434
  "tolinear method is obsolete; "
444
435
  "please use obj[:] "
445
436
  "instead of obj.tolinear().",
446
437
  _PydnaDeprecationWarning,
447
438
  )
448
- new = _copy.copy(self)
439
+ new = copy.copy(self)
449
440
  for key, value in list(self.__dict__.items()):
450
441
  setattr(new, key, value)
451
442
  # new._seq = self.seq.tolinear()
@@ -456,7 +447,7 @@ class Dseqrecord(_SeqRecord):
456
447
 
457
448
  def terminal_transferase(self, nucleotides="a"):
458
449
  """docstring."""
459
- newseq = _copy.deepcopy(self)
450
+ newseq = copy.deepcopy(self)
460
451
  newseq.seq = self.seq.terminal_transferase(nucleotides)
461
452
  for feature in newseq.features:
462
453
  feature.location += len(nucleotides)
@@ -496,12 +487,12 @@ class Dseqrecord(_SeqRecord):
496
487
 
497
488
  """
498
489
 
499
- record = _copy.deepcopy(self)
490
+ record = copy.deepcopy(self)
500
491
  if f in ("genbank", "gb") and self.circular:
501
492
  record.annotations["topology"] = "circular"
502
493
  else:
503
494
  record.annotations["topology"] = "linear"
504
- return _SeqRecord.format(record, f).strip()
495
+ return SeqRecord.format(record, f).strip()
505
496
 
506
497
  def write(self, filename=None, f="gb"):
507
498
  """Writes the Dseqrecord to a file using the format f, which must
@@ -534,9 +525,9 @@ class Dseqrecord(_SeqRecord):
534
525
  # generate a name if no name was given
535
526
  # if not isinstance(filename, str): # is filename a string???
536
527
  # raise ValueError("filename has to be a string, got", type(filename))
537
- name, ext = _os.path.splitext(filename)
528
+ name, ext = os.path.splitext(filename)
538
529
  msg = f"<font face=monospace><a href='{filename}' target='_blank'>{filename}</a></font><br>"
539
- if not _os.path.isfile(filename):
530
+ if not os.path.isfile(filename):
540
531
  with open(filename, "w", encoding="utf8") as fp:
541
532
  fp.write(self.format(f))
542
533
  else:
@@ -547,16 +538,16 @@ class Dseqrecord(_SeqRecord):
547
538
  if self.seq != old_file.seq:
548
539
  # If new sequence is different, the old file is
549
540
  # renamed with "_OLD_" suffix:
550
- oldmtime = _datetime.datetime.fromtimestamp(
551
- _os.path.getmtime(filename)
541
+ oldmtime = datetime.datetime.fromtimestamp(
542
+ os.path.getmtime(filename)
552
543
  ).isoformat()
553
- tstmp = int(_time.time() * 1_000_000)
544
+ tstmp = int(time.time() * 1_000_000)
554
545
  old_filename = f"{name}_OLD_{tstmp}{ext}"
555
- _os.rename(filename, old_filename)
546
+ os.rename(filename, old_filename)
556
547
  with open(filename, "w", encoding="utf8") as fp:
557
548
  fp.write(self.format(f))
558
- newmtime = _datetime.datetime.fromtimestamp(
559
- _os.path.getmtime(filename)
549
+ newmtime = datetime.datetime.fromtimestamp(
550
+ os.path.getmtime(filename)
560
551
  ).isoformat()
561
552
  msg = f"""
562
553
  <table style="padding:10px 10px;
@@ -602,8 +593,8 @@ class Dseqrecord(_SeqRecord):
602
593
  elif "seguid" in old_file.annotations.get("comment", ""):
603
594
  pattern = r"(ldseguid|cdseguid)-(\S{27})(_[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}){0,1}"
604
595
  # seguid=NNNNNNNNNNNNNNNNNNNNNNNNNNN_2020-10-10T11:11:11.111111
605
- oldstamp = _re.search(pattern, old_file.description)
606
- newstamp = _re.search(pattern, self.description)
596
+ oldstamp = re.search(pattern, old_file.description)
597
+ newstamp = re.search(pattern, self.description)
607
598
  newdescription = self.description
608
599
  if oldstamp and newstamp:
609
600
  if oldstamp.group(0)[:35] == newstamp.group(0)[:35]:
@@ -612,7 +603,7 @@ class Dseqrecord(_SeqRecord):
612
603
  )
613
604
  elif oldstamp:
614
605
  newdescription += " " + oldstamp.group(0)
615
- newobj = _copy.copy(self)
606
+ newobj = copy.copy(self)
616
607
  newobj.description = newdescription
617
608
 
618
609
  with open(filename, "w", encoding="utf8") as fp:
@@ -620,7 +611,7 @@ class Dseqrecord(_SeqRecord):
620
611
  else:
621
612
  with open(filename, "w", encoding="utf8") as fp:
622
613
  fp.write(self.format(f))
623
- return _display_html(msg, raw=True)
614
+ return display_html(msg, raw=True)
624
615
 
625
616
  def find(self, other):
626
617
  # TODO allow strings, seqs, seqrecords or Dseqrecords
@@ -638,7 +629,7 @@ class Dseqrecord(_SeqRecord):
638
629
  def __str__(self):
639
630
  return ("Dseqrecord\n" "circular: {}\n" "size: {}\n").format(
640
631
  self.circular, len(self)
641
- ) + _SeqRecord.__str__(self)
632
+ ) + SeqRecord.__str__(self)
642
633
 
643
634
  def __contains__(self, other):
644
635
  if other.lower() in str(self.seq).lower():
@@ -649,7 +640,7 @@ class Dseqrecord(_SeqRecord):
649
640
  spc = 3 - ln % 3 if ln % 3 else 0
650
641
  s = "n" * spc + s + "nnn"
651
642
  for frame in range(3):
652
- if other.lower() in _translate(s[frame : frame + spc + ln]).lower():
643
+ if other.lower() in translate(s[frame : frame + spc + ln]).lower():
653
644
  return True
654
645
  return False
655
646
 
@@ -658,13 +649,13 @@ class Dseqrecord(_SeqRecord):
658
649
  >>> from pydna.dseqrecord import Dseqrecord
659
650
  >>> s=Dseqrecord("atgtacgatcgtatgctggttatattttag")
660
651
  >>> s.seq.translate()
661
- Seq('MYDRMLVIF*')
652
+ ProteinSeq('MYDRMLVIF*')
662
653
  >>> "RML" in s
663
654
  True
664
655
  >>> "MMM" in s
665
656
  False
666
657
  >>> s.seq.rc().translate()
667
- Seq('LKYNQHTIVH')
658
+ ProteinSeq('LKYNQHTIVH')
668
659
  >>> "QHT" in s.rc()
669
660
  True
670
661
  >>> "QHT" in s
@@ -680,7 +671,7 @@ class Dseqrecord(_SeqRecord):
680
671
  cgtatgctg
681
672
  gcatacgac
682
673
  >>> code.translate()
683
- Seq('RML')
674
+ ProteinSeq('RML')
684
675
  """
685
676
  other = str(other).lower()
686
677
  assert self.seq.watson == "".join(self.seq.watson.split())
@@ -691,7 +682,7 @@ class Dseqrecord(_SeqRecord):
691
682
  start = None
692
683
  for frame in range(3):
693
684
  try:
694
- start = _translate(s[frame : frame + ln + spc]).lower().index(other)
685
+ start = translate(s[frame : frame + ln + spc]).lower().index(other)
695
686
  break
696
687
  except ValueError:
697
688
  pass
@@ -739,7 +730,7 @@ class Dseqrecord(_SeqRecord):
739
730
  matching_reads = []
740
731
 
741
732
  for read_ in reads:
742
- matches = _common_sub_strings(str(self.seq).lower(), str(read_.seq), limit)
733
+ matches = common_sub_strings(str(self.seq).lower(), str(read_.seq), limit)
743
734
 
744
735
  if not matches:
745
736
  continue
@@ -760,14 +751,14 @@ class Dseqrecord(_SeqRecord):
760
751
  if len(newmatches) > 1:
761
752
  ms = []
762
753
  for m in newmatches:
763
- ms.append(_SimpleLocation(m[0], m[0] + m[2]))
764
- loc = _CompoundLocation(ms)
754
+ ms.append(SimpleLocation(m[0], m[0] + m[2]))
755
+ loc = CompoundLocation(ms)
765
756
  else:
766
757
  a, b, c = newmatches[0]
767
- loc = _SimpleLocation(a, a + c)
758
+ loc = SimpleLocation(a, a + c)
768
759
 
769
760
  self.features.append(
770
- _SeqFeature(
761
+ SeqFeature(
771
762
  loc,
772
763
  qualifiers={"label": [read_.annotations["filename"]]},
773
764
  type="trace",
@@ -777,9 +768,8 @@ class Dseqrecord(_SeqRecord):
777
768
  return [x.annotations["filename"] for x in matching_reads]
778
769
 
779
770
  def __repr__(self):
780
- return "Dseqrecord({}{})".format(
781
- {True: "-", False: "o"}[not self.circular], len(self)
782
- )
771
+ top = {True: "-", False: "o"}[not self.circular]
772
+ return f"{self.__class__.__name__}({top}{len(self)})"
783
773
 
784
774
  def _repr_pretty_(self, p, cycle):
785
775
  p.text(
@@ -790,7 +780,7 @@ class Dseqrecord(_SeqRecord):
790
780
 
791
781
  def __add__(self, other):
792
782
  if hasattr(other, "seq") and hasattr(other.seq, "watson"):
793
- other = _copy.deepcopy(other)
783
+ other = copy.deepcopy(other)
794
784
  other_five_prime = other.seq.five_prime_end()
795
785
  if other_five_prime[0] == "5'":
796
786
  # add other.seq.ovhg
@@ -801,10 +791,10 @@ class Dseqrecord(_SeqRecord):
801
791
  for f in other.features:
802
792
  f.location = f.location + (-other.seq.ovhg)
803
793
 
804
- answer = Dseqrecord(_SeqRecord.__add__(self, other))
794
+ answer = Dseqrecord(SeqRecord.__add__(self, other))
805
795
  answer.n = min(self.n, other.n)
806
796
  else:
807
- answer = Dseqrecord(_SeqRecord.__add__(self, Dseqrecord(other)))
797
+ answer = Dseqrecord(SeqRecord.__add__(self, Dseqrecord(other)))
808
798
  answer.n = self.n
809
799
  return answer
810
800
 
@@ -818,7 +808,7 @@ class Dseqrecord(_SeqRecord):
818
808
  if self.circular:
819
809
  raise TypeError("TypeError: can't multiply circular Dseqrecord.")
820
810
  if number > 0:
821
- new = _copy.deepcopy(self)
811
+ new = copy.deepcopy(self)
822
812
  for i in range(1, number):
823
813
  new += self
824
814
  new._per_letter_annotations = self._per_letter_annotations
@@ -828,7 +818,7 @@ class Dseqrecord(_SeqRecord):
828
818
 
829
819
  def __getitem__(self, sl):
830
820
  """docstring."""
831
- answer = Dseqrecord(_copy.copy(self))
821
+ answer = Dseqrecord(copy.copy(self))
832
822
  answer.seq = self.seq.__getitem__(sl)
833
823
  # answer.seq.alphabet = self.seq.alphabet
834
824
  # breakpoint()
@@ -850,9 +840,9 @@ class Dseqrecord(_SeqRecord):
850
840
  f
851
841
  for f in answer.features
852
842
  if (
853
- _location_boundaries(f.location)[1] <= answer.seq.length
854
- and _location_boundaries(f.location)[0]
855
- < _location_boundaries(f.location)[1]
843
+ location_boundaries(f.location)[1] <= len(answer.seq)
844
+ and location_boundaries(f.location)[0]
845
+ < location_boundaries(f.location)[1]
856
846
  )
857
847
  ]
858
848
 
@@ -868,14 +858,18 @@ class Dseqrecord(_SeqRecord):
868
858
  identifier = " ".join(sf.qualifiers["label"])
869
859
  elif "note" in sf.qualifiers:
870
860
  identifier = " ".join(sf.qualifiers["note"])
871
- answer.id = _identifier_from_string(identifier)[:16]
872
- answer.name = _identifier_from_string("part_{name}".format(name=self.name))[:16]
861
+ answer.id = identifier_from_string(identifier)[:16]
862
+ answer.name = identifier_from_string("part_{name}".format(name=self.name))[:16]
873
863
  return answer
874
864
 
875
865
  def __eq__(self, other):
876
866
  """docstring."""
877
867
  try:
878
- if self.seq == other.seq and str(self.__dict__) == str(other.__dict__):
868
+ this_dict = self.__dict__.copy()
869
+ other_dict = other.__dict__.copy()
870
+ del this_dict["source"]
871
+ del other_dict["source"]
872
+ if self.seq == other.seq and str(this_dict) == str(other_dict):
879
873
  return True
880
874
  except AttributeError:
881
875
  pass
@@ -907,43 +901,34 @@ class Dseqrecord(_SeqRecord):
907
901
  answer.name = answer.id[:16]
908
902
  return fragments[0]
909
903
 
910
- def no_cutters(self, batch: _RestrictionBatch = None):
904
+ def no_cutters(self, batch: RestrictionBatch = None):
911
905
  """docstring."""
912
906
  return self.seq.no_cutters(batch=batch or CommOnly)
913
907
 
914
- def unique_cutters(self, batch: _RestrictionBatch = None):
908
+ def unique_cutters(self, batch: RestrictionBatch = None):
915
909
  """docstring."""
916
910
  return self.seq.unique_cutters(batch=batch or CommOnly)
917
911
 
918
- def once_cutters(self, batch: _RestrictionBatch = None):
912
+ def once_cutters(self, batch: RestrictionBatch = None):
919
913
  """docstring."""
920
914
  return self.seq.once_cutters(batch=batch or CommOnly)
921
915
 
922
- def twice_cutters(self, batch: _RestrictionBatch = None):
916
+ def twice_cutters(self, batch: RestrictionBatch = None):
923
917
  """docstring."""
924
918
  return self.seq.twice_cutters(batch=batch or CommOnly)
925
919
 
926
- def n_cutters(self, n=3, batch: _RestrictionBatch = None):
920
+ def n_cutters(self, n=3, batch: RestrictionBatch = None):
927
921
  """docstring."""
928
922
  return self.seq.n_cutters(n=n, batch=batch or CommOnly)
929
923
 
930
- def cutters(self, batch: _RestrictionBatch = None):
924
+ def cutters(self, batch: RestrictionBatch = None):
931
925
  """docstring."""
932
926
  return self.seq.cutters(batch=batch or CommOnly)
933
927
 
934
928
  def number_of_cuts(self, *enzymes):
935
929
  """The number of cuts by digestion with the Restriction enzymes
936
930
  contained in the iterable."""
937
- return sum([len(enzyme.search(self.seq)) for enzyme in _flatten(enzymes)])
938
-
939
- def cas9(self, RNA: str):
940
- """docstring."""
941
- fragments = []
942
- result = []
943
- for target in (self.seq, self.seq.rc()):
944
- fragments = [self[sl.start : sl.stop] for sl in target.cas9(RNA)]
945
- result.append(fragments)
946
- return result
931
+ return sum([len(enzyme.search(self.seq)) for enzyme in flatten(enzymes)])
947
932
 
948
933
  def reverse_complement(self):
949
934
  """Reverse complement.
@@ -1020,7 +1005,7 @@ class Dseqrecord(_SeqRecord):
1020
1005
  if not self.circular:
1021
1006
  raise TypeError("Only circular DNA can be synced!")
1022
1007
 
1023
- newseq = _copy.copy(self)
1008
+ newseq = copy.copy(self)
1024
1009
 
1025
1010
  s = str(self.seq.watson).lower()
1026
1011
  s_rc = str(self.seq.crick).lower()
@@ -1036,8 +1021,8 @@ class Dseqrecord(_SeqRecord):
1036
1021
 
1037
1022
  lim = min(limit, limit * (len(s) // limit) + 1)
1038
1023
 
1039
- c = _common_sub_strings(s + s, r, limit=lim)
1040
- d = _common_sub_strings(s_rc + s_rc, r, limit=lim)
1024
+ c = common_sub_strings(s + s, r, limit=lim)
1025
+ d = common_sub_strings(s_rc + s_rc, r, limit=lim)
1041
1026
 
1042
1027
  c = [(x[0], x[2]) for x in c if x[1] == 0]
1043
1028
  d = [(x[0], x[2]) for x in d if x[1] == 0]
@@ -1063,7 +1048,7 @@ class Dseqrecord(_SeqRecord):
1063
1048
  result = newseq
1064
1049
  else:
1065
1050
  result = newseq.shifted(start)
1066
- # _module_logger.info("synced")
1051
+
1067
1052
  return result
1068
1053
 
1069
1054
  def upper(self):
@@ -1092,7 +1077,7 @@ class Dseqrecord(_SeqRecord):
1092
1077
  --------
1093
1078
  pydna.dseqrecord.Dseqrecord.lower"""
1094
1079
 
1095
- upper = _copy.deepcopy(self)
1080
+ upper = copy.deepcopy(self)
1096
1081
  # This is because the @seq.setter methods otherwise sets the _per_letter_annotations to an empty dict
1097
1082
  prev_per_letter_annotation = upper._per_letter_annotations
1098
1083
  upper.seq = upper.seq.upper()
@@ -1126,7 +1111,7 @@ class Dseqrecord(_SeqRecord):
1126
1111
  pydna.dseqrecord.Dseqrecord.upper
1127
1112
 
1128
1113
  """
1129
- lower = _copy.deepcopy(self)
1114
+ lower = copy.deepcopy(self)
1130
1115
  prev_per_letter_annotation = lower._per_letter_annotations
1131
1116
  lower.seq = lower.seq.lower()
1132
1117
  lower._per_letter_annotations = prev_per_letter_annotation
@@ -1144,8 +1129,8 @@ class Dseqrecord(_SeqRecord):
1144
1129
  orf = self[x:y]
1145
1130
  prt = orf.translate()
1146
1131
  features.append(
1147
- _SeqFeature(
1148
- _SimpleLocation(x, y, strand=strand),
1132
+ SeqFeature(
1133
+ SimpleLocation(x, y, strand=strand),
1149
1134
  type="CDS",
1150
1135
  qualifiers={
1151
1136
  "note": f"{y - x}bp {(y - x) // 3}aa",
@@ -1183,11 +1168,11 @@ class Dseqrecord(_SeqRecord):
1183
1168
  if self.features:
1184
1169
  f = self.features[feature]
1185
1170
  locations = sorted(
1186
- self.features[feature].location.parts, key=_SimpleLocation.start.fget
1171
+ self.features[feature].location.parts, key=SimpleLocation.start.fget
1187
1172
  )
1188
1173
  strand = f.location.strand
1189
1174
  else:
1190
- locations = [_SimpleLocation(0, 0, 1)]
1175
+ locations = [SimpleLocation(0, 0, 1)]
1191
1176
  strand = 1
1192
1177
 
1193
1178
  ovhg = self.seq.ovhg + len(self.seq.watson) - len(self.seq.crick)
@@ -1218,7 +1203,7 @@ class Dseqrecord(_SeqRecord):
1218
1203
  result += f"{s1}\n{s2}"
1219
1204
  else:
1220
1205
  result += f"{s2}\n{s1}"
1221
- return _pretty_str(result)
1206
+ return pretty_str(result)
1222
1207
 
1223
1208
  def shifted(self, shift):
1224
1209
  """Circular Dseqrecord with a new origin <shift>.
@@ -1271,15 +1256,15 @@ class Dseqrecord(_SeqRecord):
1271
1256
  )
1272
1257
  ln = len(self)
1273
1258
  if not shift % ln:
1274
- return _copy.deepcopy(self) # shift is a multiple of ln or 0
1259
+ return copy.deepcopy(self) # shift is a multiple of ln or 0
1275
1260
  else:
1276
1261
  shift %= ln # 0<=shift<=ln
1277
1262
  newseq = (self.seq[shift:] + self.seq[:shift]).looped()
1278
- newfeatures = _copy.deepcopy(self.features)
1263
+ newfeatures = copy.deepcopy(self.features)
1279
1264
  for feature in newfeatures:
1280
- feature.location = _shift_location(feature.location, -shift, ln)
1281
- newfeatures.sort(key=_operator.attrgetter("location.start"))
1282
- answer = _copy.deepcopy(self)
1265
+ feature.location = shift_location(feature.location, -shift, ln)
1266
+ newfeatures.sort(key=operator.attrgetter("location.start"))
1267
+ answer = copy.deepcopy(self)
1283
1268
  answer.features = newfeatures
1284
1269
  answer.seq = newseq
1285
1270
  return answer
@@ -1333,7 +1318,7 @@ class Dseqrecord(_SeqRecord):
1333
1318
  if left_cut == right_cut:
1334
1319
  # Not really a cut, but to handle the general case
1335
1320
  if left_cut is None:
1336
- features = _copy.deepcopy(self.features)
1321
+ features = copy.deepcopy(self.features)
1337
1322
  else:
1338
1323
  # The features that span the origin if shifting with left_cut, but that do not cross
1339
1324
  # the cut site should be included, and if there is a feature within the cut site, it should
@@ -1356,7 +1341,7 @@ class Dseqrecord(_SeqRecord):
1356
1341
  initial_shift = left_watson if left_ovhg < 0 else left_crick
1357
1342
  features = self.shifted(initial_shift).features
1358
1343
  # for f in features:
1359
- # print(f.id, f.location, _location_boundaries(f.location))
1344
+ # print(f.id, f.location, location_boundaries(f.location))
1360
1345
  # Here, we have done what's shown below (* indicates the origin).
1361
1346
  # The features 0 and 2 have the right location for the final product:
1362
1347
  #
@@ -1370,10 +1355,10 @@ class Dseqrecord(_SeqRecord):
1370
1355
  features_need_transfer = [
1371
1356
  f
1372
1357
  for f in features
1373
- if (_location_boundaries(f.location)[1] <= abs(left_ovhg))
1358
+ if (location_boundaries(f.location)[1] <= abs(left_ovhg))
1374
1359
  ]
1375
1360
  features_need_transfer = [
1376
- _shift_feature(f, -abs(left_ovhg), len(self))
1361
+ shift_feature(f, -abs(left_ovhg), len(self))
1377
1362
  for f in features_need_transfer
1378
1363
  ]
1379
1364
 
@@ -1390,7 +1375,7 @@ class Dseqrecord(_SeqRecord):
1390
1375
  # as the original one. However, the final product is longer because of the overhang.
1391
1376
 
1392
1377
  features += [
1393
- _shift_feature(f, abs(left_ovhg), len(dseq))
1378
+ shift_feature(f, abs(left_ovhg), len(dseq))
1394
1379
  for f in features_need_transfer
1395
1380
  ]
1396
1381
  # ^ ^^^^^^^^^
@@ -1402,9 +1387,9 @@ class Dseqrecord(_SeqRecord):
1402
1387
  f
1403
1388
  for f in features
1404
1389
  if (
1405
- _location_boundaries(f.location)[1] <= len(dseq)
1406
- and _location_boundaries(f.location)[0]
1407
- <= _location_boundaries(f.location)[1]
1390
+ location_boundaries(f.location)[1] <= len(dseq)
1391
+ and location_boundaries(f.location)[0]
1392
+ <= location_boundaries(f.location)[1]
1408
1393
  )
1409
1394
  ]
1410
1395
  else:
@@ -1419,4 +1404,68 @@ class Dseqrecord(_SeqRecord):
1419
1404
  right_edge = right_watson if right_ovhg > 0 else right_crick
1420
1405
  features = self[left_edge:right_edge].features
1421
1406
 
1422
- return Dseqrecord(dseq, features=features)
1407
+ # This will need to be generalised to all types of cuts
1408
+ source = SequenceCutSource.from_parent(self, left_cut, right_cut)
1409
+ return Dseqrecord(dseq, features=features, source=source)
1410
+
1411
+ def history(self):
1412
+ """
1413
+ Returns a string representation of the cloning history of the sequence.
1414
+ Returns an empty string if the sequence has no source.
1415
+
1416
+ Check the documentation notebooks for extensive examples.
1417
+
1418
+ Returns
1419
+ -------
1420
+ str: A string representation of the cloning history of the sequence.
1421
+
1422
+ Examples
1423
+ --------
1424
+ >>> from pydna.dseqrecord import Dseqrecord
1425
+ >>> from pydna.assembly2 import gibson_assembly
1426
+ >>> fragments = [
1427
+ ... Dseqrecord("TTTTacgatAAtgctccCCCC", circular=False, name="fragment1"),
1428
+ ... Dseqrecord("CCCCtcatGGGG", circular=False, name="fragment2"),
1429
+ ... Dseqrecord("GGGGatataTTTT", circular=False, name="fragment3"),
1430
+ ... ]
1431
+ >>> product, *_ = gibson_assembly(fragments, limit=4)
1432
+ >>> product.name = "product_name"
1433
+ >>> print(product.history())
1434
+ ╙── product_name (Dseqrecord(o34))
1435
+ └─╼ GibsonAssemblySource
1436
+ ├─╼ fragment1 (Dseqrecord(-21))
1437
+ ├─╼ fragment2 (Dseqrecord(-12))
1438
+ └─╼ fragment3 (Dseqrecord(-13))
1439
+ """
1440
+ if self.source is None:
1441
+ return ""
1442
+ return self.source.history_string(self)
1443
+
1444
+ def join(self, fragments):
1445
+ """
1446
+ Join an iterable of Dseqrecords with this instance as the separator.
1447
+
1448
+ Example:
1449
+
1450
+ >>> sep = Dseqrecord("a")
1451
+ >>> joined = sep.join([Dseqrecord("A"), Dseqrecord("B"), Dseqrecord("C")])
1452
+ >>> joined
1453
+ Dseqrecord(-5)
1454
+ >>> joined.seq
1455
+ Dseq(-5)
1456
+ AaBaC
1457
+ TtVtG
1458
+
1459
+ """
1460
+ it = iter(fragments)
1461
+ try:
1462
+ result = next(it) # first element (no leading separator)
1463
+ except StopIteration:
1464
+ # Empty iterable -> return empty Dseqrecord in analogy with
1465
+ # str.join
1466
+ return Dseqrecord("")
1467
+
1468
+ # Interleave: result = first + sep + x + sep + y + ...
1469
+ for x in it:
1470
+ result = result + self + x
1471
+ return result