pydna 5.5.3__py3-none-any.whl → 5.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +24 -193
- pydna/_pretty.py +8 -8
- pydna/_thermodynamic_data.py +3 -3
- pydna/alphabet.py +995 -0
- pydna/amplicon.py +19 -24
- pydna/amplify.py +75 -95
- pydna/assembly.py +64 -81
- pydna/assembly2.py +650 -405
- pydna/codon.py +4 -4
- pydna/common_sub_strings.py +6 -8
- pydna/contig.py +203 -10
- pydna/design.py +176 -60
- pydna/download.py +6 -15
- pydna/dseq.py +1794 -718
- pydna/dseqrecord.py +220 -171
- pydna/gateway.py +6 -6
- pydna/gel.py +5 -5
- pydna/genbank.py +43 -46
- pydna/genbankfixer.py +89 -92
- pydna/ladders.py +11 -12
- pydna/oligonucleotide_hybridization.py +124 -0
- pydna/opencloning_models.py +680 -0
- pydna/parsers.py +45 -32
- pydna/primer.py +4 -4
- pydna/primer_screen.py +833 -0
- pydna/readers.py +14 -9
- pydna/seq.py +137 -47
- pydna/seqrecord.py +54 -62
- pydna/sequence_picker.py +2 -5
- pydna/sequence_regex.py +6 -6
- pydna/tm.py +17 -17
- pydna/types.py +21 -18
- pydna/utils.py +97 -75
- {pydna-5.5.3.dist-info → pydna-5.5.5.dist-info}/METADATA +14 -46
- pydna-5.5.5.dist-info/RECORD +43 -0
- {pydna-5.5.3.dist-info → pydna-5.5.5.dist-info}/WHEEL +1 -1
- pydna/conftest.py +0 -42
- pydna/genbankfile.py +0 -42
- pydna/genbankrecord.py +0 -168
- pydna/goldengate.py +0 -45
- pydna/ligate.py +0 -62
- pydna/user_cloning.py +0 -29
- pydna-5.5.3.dist-info/RECORD +0 -45
- {pydna-5.5.3.dist-info → pydna-5.5.5.dist-info/licenses}/LICENSE.txt +0 -0
pydna/dseqrecord.py
CHANGED
|
@@ -11,46 +11,44 @@ Seq and SeqRecord classes, respectively.
|
|
|
11
11
|
|
|
12
12
|
The Dseq and Dseqrecord classes support the notion of circular and linear DNA topology.
|
|
13
13
|
"""
|
|
14
|
-
from Bio.Restriction import RestrictionBatch
|
|
14
|
+
from Bio.Restriction import RestrictionBatch
|
|
15
15
|
from Bio.Restriction import CommOnly
|
|
16
|
-
from pydna.dseq import Dseq
|
|
17
|
-
from pydna._pretty import pretty_str
|
|
18
|
-
from pydna.utils import flatten
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
from pydna.utils import
|
|
22
|
-
from pydna.
|
|
23
|
-
from
|
|
24
|
-
from pydna.common_sub_strings import common_sub_strings as _common_sub_strings
|
|
25
|
-
from Bio.SeqFeature import SeqFeature as _SeqFeature
|
|
16
|
+
from pydna.dseq import Dseq
|
|
17
|
+
from pydna._pretty import pretty_str
|
|
18
|
+
from pydna.utils import flatten, location_boundaries
|
|
19
|
+
|
|
20
|
+
from pydna.utils import shift_location
|
|
21
|
+
from pydna.utils import shift_feature
|
|
22
|
+
from pydna.common_sub_strings import common_sub_strings
|
|
23
|
+
from Bio.SeqFeature import SeqFeature
|
|
26
24
|
from Bio import SeqIO
|
|
27
|
-
from Bio.SeqFeature import CompoundLocation
|
|
28
|
-
from Bio.SeqFeature import SimpleLocation
|
|
29
|
-
from pydna.seqrecord import SeqRecord
|
|
30
|
-
from Bio.Seq import translate
|
|
31
|
-
from pydna.utils import identifier_from_string
|
|
32
|
-
import copy
|
|
33
|
-
import operator
|
|
34
|
-
import os
|
|
35
|
-
import re
|
|
36
|
-
import time
|
|
37
|
-
import datetime
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
25
|
+
from Bio.SeqFeature import CompoundLocation
|
|
26
|
+
from Bio.SeqFeature import SimpleLocation
|
|
27
|
+
from pydna.seqrecord import SeqRecord
|
|
28
|
+
from Bio.Seq import translate
|
|
29
|
+
from pydna.utils import identifier_from_string
|
|
30
|
+
import copy
|
|
31
|
+
import operator
|
|
32
|
+
import os
|
|
33
|
+
import re
|
|
34
|
+
import time
|
|
35
|
+
import datetime
|
|
36
|
+
from typing import Union, TYPE_CHECKING
|
|
37
|
+
from pydna.opencloning_models import SequenceCutSource
|
|
38
|
+
|
|
39
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
40
|
+
from pydna.opencloning_models import Source
|
|
43
41
|
|
|
44
42
|
|
|
45
43
|
try:
|
|
46
|
-
from IPython.display import display_html
|
|
44
|
+
from IPython.display import display_html
|
|
47
45
|
except ImportError:
|
|
48
46
|
|
|
49
|
-
def
|
|
47
|
+
def display_html(item, raw=None):
|
|
50
48
|
return item
|
|
51
49
|
|
|
52
50
|
|
|
53
|
-
class Dseqrecord(
|
|
51
|
+
class Dseqrecord(SeqRecord):
|
|
54
52
|
"""Dseqrecord is a double stranded version of the Biopython SeqRecord [#]_ class.
|
|
55
53
|
The Dseqrecord object holds a Dseq object describing the sequence.
|
|
56
54
|
Additionally, Dseqrecord hold meta information about the sequence in the
|
|
@@ -127,7 +125,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
127
125
|
|
|
128
126
|
"""
|
|
129
127
|
|
|
130
|
-
seq:
|
|
128
|
+
seq: Dseq
|
|
129
|
+
source: Union["Source", None] = None
|
|
131
130
|
|
|
132
131
|
def __init__(
|
|
133
132
|
self,
|
|
@@ -135,17 +134,15 @@ class Dseqrecord(_SeqRecord):
|
|
|
135
134
|
*args,
|
|
136
135
|
circular=None,
|
|
137
136
|
n=5e-14, # mol ( = 0.05 pmol)
|
|
137
|
+
source=None,
|
|
138
138
|
**kwargs,
|
|
139
139
|
):
|
|
140
|
-
# _module_logger.info("### Dseqrecord initialized ###")
|
|
141
|
-
# _module_logger.info("argument circular = %s", circular)
|
|
142
|
-
# _module_logger.info("circular = %s", circular)
|
|
143
140
|
|
|
144
141
|
if isinstance(record, str):
|
|
145
|
-
|
|
142
|
+
|
|
146
143
|
super().__init__(
|
|
147
|
-
|
|
148
|
-
record,
|
|
144
|
+
Dseq.quick(
|
|
145
|
+
record.encode("ascii"),
|
|
149
146
|
# linear=linear,
|
|
150
147
|
circular=bool(circular),
|
|
151
148
|
),
|
|
@@ -159,14 +156,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
159
156
|
record = record[:]
|
|
160
157
|
elif circular is True:
|
|
161
158
|
record = record.looped()
|
|
162
|
-
|
|
159
|
+
|
|
163
160
|
super().__init__(record, *args, **kwargs)
|
|
164
161
|
|
|
165
162
|
# record is a Bio.Seq object ?
|
|
166
163
|
elif hasattr(record, "transcribe"):
|
|
167
|
-
|
|
164
|
+
|
|
168
165
|
super().__init__(
|
|
169
|
-
|
|
166
|
+
Dseq(
|
|
170
167
|
str(record),
|
|
171
168
|
# linear=linear,
|
|
172
169
|
circular=bool(circular),
|
|
@@ -177,13 +174,13 @@ class Dseqrecord(_SeqRecord):
|
|
|
177
174
|
|
|
178
175
|
# record is a Bio.SeqRecord or Dseqrecord object ?
|
|
179
176
|
elif hasattr(record, "features"):
|
|
180
|
-
|
|
177
|
+
|
|
181
178
|
for key, value in list(record.__dict__.items()):
|
|
182
179
|
setattr(self, key, value)
|
|
183
180
|
self.letter_annotations = {}
|
|
184
181
|
# record.seq is a Dseq object ?
|
|
185
182
|
if hasattr(record.seq, "watson"):
|
|
186
|
-
new_seq =
|
|
183
|
+
new_seq = copy.copy(record.seq)
|
|
187
184
|
if circular is False:
|
|
188
185
|
new_seq = new_seq[:]
|
|
189
186
|
elif circular is True:
|
|
@@ -191,7 +188,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
191
188
|
self.seq = new_seq
|
|
192
189
|
# record.seq is Bio.SeqRecord object ?
|
|
193
190
|
else:
|
|
194
|
-
self.seq =
|
|
191
|
+
self.seq = Dseq(
|
|
195
192
|
str(record.seq),
|
|
196
193
|
# linear=linear,
|
|
197
194
|
circular=bool(circular),
|
|
@@ -202,6 +199,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
202
199
|
self.map_target = None
|
|
203
200
|
self.n = n # amount, set to 5E-14 which is 5 pmols
|
|
204
201
|
self.annotations.update({"molecule_type": "DNA"})
|
|
202
|
+
self.source = source
|
|
205
203
|
|
|
206
204
|
@classmethod
|
|
207
205
|
def from_string(
|
|
@@ -218,16 +216,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
218
216
|
# linear=True, circular=False, n = 5E-14, **kwargs):
|
|
219
217
|
obj = cls.__new__(cls) # Does not call __init__
|
|
220
218
|
obj._per_letter_annotations = {}
|
|
221
|
-
obj.seq =
|
|
222
|
-
record,
|
|
223
|
-
_rc(record),
|
|
224
|
-
ovhg=0,
|
|
219
|
+
obj.seq = Dseq.quick(
|
|
220
|
+
record.encode("ascii"),
|
|
225
221
|
# linear=linear,
|
|
226
222
|
circular=circular,
|
|
227
223
|
)
|
|
228
|
-
obj.id =
|
|
229
|
-
obj.name =
|
|
230
|
-
obj.description =
|
|
224
|
+
obj.id = pretty_str("id")
|
|
225
|
+
obj.name = pretty_str("name")
|
|
226
|
+
obj.description = pretty_str("description")
|
|
231
227
|
obj.dbxrefs = []
|
|
232
228
|
obj.annotations = {"molecule_type": "DNA"}
|
|
233
229
|
obj.features = []
|
|
@@ -239,7 +235,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
239
235
|
@classmethod
|
|
240
236
|
def from_SeqRecord(
|
|
241
237
|
cls,
|
|
242
|
-
record:
|
|
238
|
+
record: SeqRecord,
|
|
243
239
|
*args,
|
|
244
240
|
circular=None,
|
|
245
241
|
n=5e-14,
|
|
@@ -256,11 +252,10 @@ class Dseqrecord(_SeqRecord):
|
|
|
256
252
|
obj.features = record.features
|
|
257
253
|
obj.map_target = None
|
|
258
254
|
obj.n = n
|
|
255
|
+
obj.source = None
|
|
259
256
|
if circular is None:
|
|
260
257
|
circular = record.annotations.get("topology") == "circular"
|
|
261
|
-
obj.seq =
|
|
262
|
-
str(record.seq), _rc(str(record.seq)), ovhg=0, circular=circular
|
|
263
|
-
)
|
|
258
|
+
obj.seq = Dseq.quick(record.seq._data, ovhg=0, circular=circular)
|
|
264
259
|
return obj
|
|
265
260
|
|
|
266
261
|
@property
|
|
@@ -330,14 +325,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
330
325
|
qualifiers = {}
|
|
331
326
|
qualifiers.update(kwargs)
|
|
332
327
|
|
|
333
|
-
location =
|
|
328
|
+
location = CompoundLocation(
|
|
334
329
|
(
|
|
335
|
-
|
|
336
|
-
|
|
330
|
+
SimpleLocation(x, len(self.seq), strand=strand),
|
|
331
|
+
SimpleLocation(0, y, strand=strand),
|
|
337
332
|
)
|
|
338
333
|
)
|
|
339
334
|
|
|
340
|
-
sf =
|
|
335
|
+
sf = SeqFeature(location, type=type_, qualifiers=qualifiers)
|
|
341
336
|
|
|
342
337
|
if "label" not in qualifiers:
|
|
343
338
|
qualifiers["label"] = [f"ft{len(location)}"]
|
|
@@ -386,35 +381,31 @@ class Dseqrecord(_SeqRecord):
|
|
|
386
381
|
--------
|
|
387
382
|
pydna.dseq.Dseq.looped
|
|
388
383
|
"""
|
|
389
|
-
new =
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
)
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
fn.location = _CompoundLocation([loc1, loc2])
|
|
415
|
-
|
|
416
|
-
fn.qualifiers = fo.qualifiers
|
|
417
|
-
|
|
384
|
+
new = copy.deepcopy(self)
|
|
385
|
+
new.seq = self.seq.looped()
|
|
386
|
+
|
|
387
|
+
old_length = len(self) # Possibly longer, including sticky ends if any.
|
|
388
|
+
new_length = len(new) # Possibly shorter, with blunt ends.
|
|
389
|
+
if old_length != new_length: # Only False if self was blunt.
|
|
390
|
+
new_features = []
|
|
391
|
+
for fn in new.features:
|
|
392
|
+
if len(fn.location) > new_length:
|
|
393
|
+
# Edge case: if the feature is longer than the sequence, it should be
|
|
394
|
+
# dropped. This can happen in a sequence with overhangs, where the feature
|
|
395
|
+
# spans both overhangs.
|
|
396
|
+
#
|
|
397
|
+
# Example:
|
|
398
|
+
# feature
|
|
399
|
+
# <------>
|
|
400
|
+
# aaACGT
|
|
401
|
+
# TGCAtt
|
|
402
|
+
#
|
|
403
|
+
# Circular sequence ACGTtt should not have that feature, so we drop it
|
|
404
|
+
continue
|
|
405
|
+
fn.location = shift_location(fn.location, 0, new_length)
|
|
406
|
+
new_features.append(fn)
|
|
407
|
+
|
|
408
|
+
new.features = new_features
|
|
418
409
|
return new
|
|
419
410
|
|
|
420
411
|
def tolinear(self): # pragma: no cover
|
|
@@ -436,16 +427,16 @@ class Dseqrecord(_SeqRecord):
|
|
|
436
427
|
>>>
|
|
437
428
|
|
|
438
429
|
"""
|
|
439
|
-
import warnings
|
|
430
|
+
import warnings
|
|
440
431
|
from pydna import _PydnaDeprecationWarning
|
|
441
432
|
|
|
442
|
-
|
|
433
|
+
warnings.warn(
|
|
443
434
|
"tolinear method is obsolete; "
|
|
444
435
|
"please use obj[:] "
|
|
445
436
|
"instead of obj.tolinear().",
|
|
446
437
|
_PydnaDeprecationWarning,
|
|
447
438
|
)
|
|
448
|
-
new =
|
|
439
|
+
new = copy.copy(self)
|
|
449
440
|
for key, value in list(self.__dict__.items()):
|
|
450
441
|
setattr(new, key, value)
|
|
451
442
|
# new._seq = self.seq.tolinear()
|
|
@@ -456,7 +447,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
456
447
|
|
|
457
448
|
def terminal_transferase(self, nucleotides="a"):
|
|
458
449
|
"""docstring."""
|
|
459
|
-
newseq =
|
|
450
|
+
newseq = copy.deepcopy(self)
|
|
460
451
|
newseq.seq = self.seq.terminal_transferase(nucleotides)
|
|
461
452
|
for feature in newseq.features:
|
|
462
453
|
feature.location += len(nucleotides)
|
|
@@ -496,12 +487,12 @@ class Dseqrecord(_SeqRecord):
|
|
|
496
487
|
|
|
497
488
|
"""
|
|
498
489
|
|
|
499
|
-
record =
|
|
490
|
+
record = copy.deepcopy(self)
|
|
500
491
|
if f in ("genbank", "gb") and self.circular:
|
|
501
492
|
record.annotations["topology"] = "circular"
|
|
502
493
|
else:
|
|
503
494
|
record.annotations["topology"] = "linear"
|
|
504
|
-
return
|
|
495
|
+
return SeqRecord.format(record, f).strip()
|
|
505
496
|
|
|
506
497
|
def write(self, filename=None, f="gb"):
|
|
507
498
|
"""Writes the Dseqrecord to a file using the format f, which must
|
|
@@ -534,9 +525,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
534
525
|
# generate a name if no name was given
|
|
535
526
|
# if not isinstance(filename, str): # is filename a string???
|
|
536
527
|
# raise ValueError("filename has to be a string, got", type(filename))
|
|
537
|
-
name, ext =
|
|
528
|
+
name, ext = os.path.splitext(filename)
|
|
538
529
|
msg = f"<font face=monospace><a href='{filename}' target='_blank'>{filename}</a></font><br>"
|
|
539
|
-
if not
|
|
530
|
+
if not os.path.isfile(filename):
|
|
540
531
|
with open(filename, "w", encoding="utf8") as fp:
|
|
541
532
|
fp.write(self.format(f))
|
|
542
533
|
else:
|
|
@@ -547,16 +538,16 @@ class Dseqrecord(_SeqRecord):
|
|
|
547
538
|
if self.seq != old_file.seq:
|
|
548
539
|
# If new sequence is different, the old file is
|
|
549
540
|
# renamed with "_OLD_" suffix:
|
|
550
|
-
oldmtime =
|
|
551
|
-
|
|
541
|
+
oldmtime = datetime.datetime.fromtimestamp(
|
|
542
|
+
os.path.getmtime(filename)
|
|
552
543
|
).isoformat()
|
|
553
|
-
tstmp = int(
|
|
544
|
+
tstmp = int(time.time() * 1_000_000)
|
|
554
545
|
old_filename = f"{name}_OLD_{tstmp}{ext}"
|
|
555
|
-
|
|
546
|
+
os.rename(filename, old_filename)
|
|
556
547
|
with open(filename, "w", encoding="utf8") as fp:
|
|
557
548
|
fp.write(self.format(f))
|
|
558
|
-
newmtime =
|
|
559
|
-
|
|
549
|
+
newmtime = datetime.datetime.fromtimestamp(
|
|
550
|
+
os.path.getmtime(filename)
|
|
560
551
|
).isoformat()
|
|
561
552
|
msg = f"""
|
|
562
553
|
<table style="padding:10px 10px;
|
|
@@ -602,8 +593,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
602
593
|
elif "seguid" in old_file.annotations.get("comment", ""):
|
|
603
594
|
pattern = r"(ldseguid|cdseguid)-(\S{27})(_[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}){0,1}"
|
|
604
595
|
# seguid=NNNNNNNNNNNNNNNNNNNNNNNNNNN_2020-10-10T11:11:11.111111
|
|
605
|
-
oldstamp =
|
|
606
|
-
newstamp =
|
|
596
|
+
oldstamp = re.search(pattern, old_file.description)
|
|
597
|
+
newstamp = re.search(pattern, self.description)
|
|
607
598
|
newdescription = self.description
|
|
608
599
|
if oldstamp and newstamp:
|
|
609
600
|
if oldstamp.group(0)[:35] == newstamp.group(0)[:35]:
|
|
@@ -612,7 +603,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
612
603
|
)
|
|
613
604
|
elif oldstamp:
|
|
614
605
|
newdescription += " " + oldstamp.group(0)
|
|
615
|
-
newobj =
|
|
606
|
+
newobj = copy.copy(self)
|
|
616
607
|
newobj.description = newdescription
|
|
617
608
|
|
|
618
609
|
with open(filename, "w", encoding="utf8") as fp:
|
|
@@ -620,7 +611,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
620
611
|
else:
|
|
621
612
|
with open(filename, "w", encoding="utf8") as fp:
|
|
622
613
|
fp.write(self.format(f))
|
|
623
|
-
return
|
|
614
|
+
return display_html(msg, raw=True)
|
|
624
615
|
|
|
625
616
|
def find(self, other):
|
|
626
617
|
# TODO allow strings, seqs, seqrecords or Dseqrecords
|
|
@@ -638,7 +629,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
638
629
|
def __str__(self):
|
|
639
630
|
return ("Dseqrecord\n" "circular: {}\n" "size: {}\n").format(
|
|
640
631
|
self.circular, len(self)
|
|
641
|
-
) +
|
|
632
|
+
) + SeqRecord.__str__(self)
|
|
642
633
|
|
|
643
634
|
def __contains__(self, other):
|
|
644
635
|
if other.lower() in str(self.seq).lower():
|
|
@@ -649,7 +640,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
649
640
|
spc = 3 - ln % 3 if ln % 3 else 0
|
|
650
641
|
s = "n" * spc + s + "nnn"
|
|
651
642
|
for frame in range(3):
|
|
652
|
-
if other.lower() in
|
|
643
|
+
if other.lower() in translate(s[frame : frame + spc + ln]).lower():
|
|
653
644
|
return True
|
|
654
645
|
return False
|
|
655
646
|
|
|
@@ -658,13 +649,13 @@ class Dseqrecord(_SeqRecord):
|
|
|
658
649
|
>>> from pydna.dseqrecord import Dseqrecord
|
|
659
650
|
>>> s=Dseqrecord("atgtacgatcgtatgctggttatattttag")
|
|
660
651
|
>>> s.seq.translate()
|
|
661
|
-
|
|
652
|
+
ProteinSeq('MYDRMLVIF*')
|
|
662
653
|
>>> "RML" in s
|
|
663
654
|
True
|
|
664
655
|
>>> "MMM" in s
|
|
665
656
|
False
|
|
666
657
|
>>> s.seq.rc().translate()
|
|
667
|
-
|
|
658
|
+
ProteinSeq('LKYNQHTIVH')
|
|
668
659
|
>>> "QHT" in s.rc()
|
|
669
660
|
True
|
|
670
661
|
>>> "QHT" in s
|
|
@@ -680,7 +671,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
680
671
|
cgtatgctg
|
|
681
672
|
gcatacgac
|
|
682
673
|
>>> code.translate()
|
|
683
|
-
|
|
674
|
+
ProteinSeq('RML')
|
|
684
675
|
"""
|
|
685
676
|
other = str(other).lower()
|
|
686
677
|
assert self.seq.watson == "".join(self.seq.watson.split())
|
|
@@ -691,7 +682,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
691
682
|
start = None
|
|
692
683
|
for frame in range(3):
|
|
693
684
|
try:
|
|
694
|
-
start =
|
|
685
|
+
start = translate(s[frame : frame + ln + spc]).lower().index(other)
|
|
695
686
|
break
|
|
696
687
|
except ValueError:
|
|
697
688
|
pass
|
|
@@ -739,7 +730,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
739
730
|
matching_reads = []
|
|
740
731
|
|
|
741
732
|
for read_ in reads:
|
|
742
|
-
matches =
|
|
733
|
+
matches = common_sub_strings(str(self.seq).lower(), str(read_.seq), limit)
|
|
743
734
|
|
|
744
735
|
if not matches:
|
|
745
736
|
continue
|
|
@@ -760,14 +751,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
760
751
|
if len(newmatches) > 1:
|
|
761
752
|
ms = []
|
|
762
753
|
for m in newmatches:
|
|
763
|
-
ms.append(
|
|
764
|
-
loc =
|
|
754
|
+
ms.append(SimpleLocation(m[0], m[0] + m[2]))
|
|
755
|
+
loc = CompoundLocation(ms)
|
|
765
756
|
else:
|
|
766
757
|
a, b, c = newmatches[0]
|
|
767
|
-
loc =
|
|
758
|
+
loc = SimpleLocation(a, a + c)
|
|
768
759
|
|
|
769
760
|
self.features.append(
|
|
770
|
-
|
|
761
|
+
SeqFeature(
|
|
771
762
|
loc,
|
|
772
763
|
qualifiers={"label": [read_.annotations["filename"]]},
|
|
773
764
|
type="trace",
|
|
@@ -777,9 +768,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
777
768
|
return [x.annotations["filename"] for x in matching_reads]
|
|
778
769
|
|
|
779
770
|
def __repr__(self):
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
)
|
|
771
|
+
top = {True: "-", False: "o"}[not self.circular]
|
|
772
|
+
return f"{self.__class__.__name__}({top}{len(self)})"
|
|
783
773
|
|
|
784
774
|
def _repr_pretty_(self, p, cycle):
|
|
785
775
|
p.text(
|
|
@@ -790,7 +780,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
790
780
|
|
|
791
781
|
def __add__(self, other):
|
|
792
782
|
if hasattr(other, "seq") and hasattr(other.seq, "watson"):
|
|
793
|
-
other =
|
|
783
|
+
other = copy.deepcopy(other)
|
|
794
784
|
other_five_prime = other.seq.five_prime_end()
|
|
795
785
|
if other_five_prime[0] == "5'":
|
|
796
786
|
# add other.seq.ovhg
|
|
@@ -801,10 +791,10 @@ class Dseqrecord(_SeqRecord):
|
|
|
801
791
|
for f in other.features:
|
|
802
792
|
f.location = f.location + (-other.seq.ovhg)
|
|
803
793
|
|
|
804
|
-
answer = Dseqrecord(
|
|
794
|
+
answer = Dseqrecord(SeqRecord.__add__(self, other))
|
|
805
795
|
answer.n = min(self.n, other.n)
|
|
806
796
|
else:
|
|
807
|
-
answer = Dseqrecord(
|
|
797
|
+
answer = Dseqrecord(SeqRecord.__add__(self, Dseqrecord(other)))
|
|
808
798
|
answer.n = self.n
|
|
809
799
|
return answer
|
|
810
800
|
|
|
@@ -818,7 +808,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
818
808
|
if self.circular:
|
|
819
809
|
raise TypeError("TypeError: can't multiply circular Dseqrecord.")
|
|
820
810
|
if number > 0:
|
|
821
|
-
new =
|
|
811
|
+
new = copy.deepcopy(self)
|
|
822
812
|
for i in range(1, number):
|
|
823
813
|
new += self
|
|
824
814
|
new._per_letter_annotations = self._per_letter_annotations
|
|
@@ -828,7 +818,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
828
818
|
|
|
829
819
|
def __getitem__(self, sl):
|
|
830
820
|
"""docstring."""
|
|
831
|
-
answer = Dseqrecord(
|
|
821
|
+
answer = Dseqrecord(copy.copy(self))
|
|
832
822
|
answer.seq = self.seq.__getitem__(sl)
|
|
833
823
|
# answer.seq.alphabet = self.seq.alphabet
|
|
834
824
|
# breakpoint()
|
|
@@ -850,9 +840,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
850
840
|
f
|
|
851
841
|
for f in answer.features
|
|
852
842
|
if (
|
|
853
|
-
|
|
854
|
-
and
|
|
855
|
-
<
|
|
843
|
+
location_boundaries(f.location)[1] <= len(answer.seq)
|
|
844
|
+
and location_boundaries(f.location)[0]
|
|
845
|
+
< location_boundaries(f.location)[1]
|
|
856
846
|
)
|
|
857
847
|
]
|
|
858
848
|
|
|
@@ -868,14 +858,18 @@ class Dseqrecord(_SeqRecord):
|
|
|
868
858
|
identifier = " ".join(sf.qualifiers["label"])
|
|
869
859
|
elif "note" in sf.qualifiers:
|
|
870
860
|
identifier = " ".join(sf.qualifiers["note"])
|
|
871
|
-
answer.id =
|
|
872
|
-
answer.name =
|
|
861
|
+
answer.id = identifier_from_string(identifier)[:16]
|
|
862
|
+
answer.name = identifier_from_string("part_{name}".format(name=self.name))[:16]
|
|
873
863
|
return answer
|
|
874
864
|
|
|
875
865
|
def __eq__(self, other):
|
|
876
866
|
"""docstring."""
|
|
877
867
|
try:
|
|
878
|
-
|
|
868
|
+
this_dict = self.__dict__.copy()
|
|
869
|
+
other_dict = other.__dict__.copy()
|
|
870
|
+
del this_dict["source"]
|
|
871
|
+
del other_dict["source"]
|
|
872
|
+
if self.seq == other.seq and str(this_dict) == str(other_dict):
|
|
879
873
|
return True
|
|
880
874
|
except AttributeError:
|
|
881
875
|
pass
|
|
@@ -907,43 +901,34 @@ class Dseqrecord(_SeqRecord):
|
|
|
907
901
|
answer.name = answer.id[:16]
|
|
908
902
|
return fragments[0]
|
|
909
903
|
|
|
910
|
-
def no_cutters(self, batch:
|
|
904
|
+
def no_cutters(self, batch: RestrictionBatch = None):
|
|
911
905
|
"""docstring."""
|
|
912
906
|
return self.seq.no_cutters(batch=batch or CommOnly)
|
|
913
907
|
|
|
914
|
-
def unique_cutters(self, batch:
|
|
908
|
+
def unique_cutters(self, batch: RestrictionBatch = None):
|
|
915
909
|
"""docstring."""
|
|
916
910
|
return self.seq.unique_cutters(batch=batch or CommOnly)
|
|
917
911
|
|
|
918
|
-
def once_cutters(self, batch:
|
|
912
|
+
def once_cutters(self, batch: RestrictionBatch = None):
|
|
919
913
|
"""docstring."""
|
|
920
914
|
return self.seq.once_cutters(batch=batch or CommOnly)
|
|
921
915
|
|
|
922
|
-
def twice_cutters(self, batch:
|
|
916
|
+
def twice_cutters(self, batch: RestrictionBatch = None):
|
|
923
917
|
"""docstring."""
|
|
924
918
|
return self.seq.twice_cutters(batch=batch or CommOnly)
|
|
925
919
|
|
|
926
|
-
def n_cutters(self, n=3, batch:
|
|
920
|
+
def n_cutters(self, n=3, batch: RestrictionBatch = None):
|
|
927
921
|
"""docstring."""
|
|
928
922
|
return self.seq.n_cutters(n=n, batch=batch or CommOnly)
|
|
929
923
|
|
|
930
|
-
def cutters(self, batch:
|
|
924
|
+
def cutters(self, batch: RestrictionBatch = None):
|
|
931
925
|
"""docstring."""
|
|
932
926
|
return self.seq.cutters(batch=batch or CommOnly)
|
|
933
927
|
|
|
934
928
|
def number_of_cuts(self, *enzymes):
|
|
935
929
|
"""The number of cuts by digestion with the Restriction enzymes
|
|
936
930
|
contained in the iterable."""
|
|
937
|
-
return sum([len(enzyme.search(self.seq)) for enzyme in
|
|
938
|
-
|
|
939
|
-
def cas9(self, RNA: str):
|
|
940
|
-
"""docstring."""
|
|
941
|
-
fragments = []
|
|
942
|
-
result = []
|
|
943
|
-
for target in (self.seq, self.seq.rc()):
|
|
944
|
-
fragments = [self[sl.start : sl.stop] for sl in target.cas9(RNA)]
|
|
945
|
-
result.append(fragments)
|
|
946
|
-
return result
|
|
931
|
+
return sum([len(enzyme.search(self.seq)) for enzyme in flatten(enzymes)])
|
|
947
932
|
|
|
948
933
|
def reverse_complement(self):
|
|
949
934
|
"""Reverse complement.
|
|
@@ -1020,7 +1005,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1020
1005
|
if not self.circular:
|
|
1021
1006
|
raise TypeError("Only circular DNA can be synced!")
|
|
1022
1007
|
|
|
1023
|
-
newseq =
|
|
1008
|
+
newseq = copy.copy(self)
|
|
1024
1009
|
|
|
1025
1010
|
s = str(self.seq.watson).lower()
|
|
1026
1011
|
s_rc = str(self.seq.crick).lower()
|
|
@@ -1036,8 +1021,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
1036
1021
|
|
|
1037
1022
|
lim = min(limit, limit * (len(s) // limit) + 1)
|
|
1038
1023
|
|
|
1039
|
-
c =
|
|
1040
|
-
d =
|
|
1024
|
+
c = common_sub_strings(s + s, r, limit=lim)
|
|
1025
|
+
d = common_sub_strings(s_rc + s_rc, r, limit=lim)
|
|
1041
1026
|
|
|
1042
1027
|
c = [(x[0], x[2]) for x in c if x[1] == 0]
|
|
1043
1028
|
d = [(x[0], x[2]) for x in d if x[1] == 0]
|
|
@@ -1063,7 +1048,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1063
1048
|
result = newseq
|
|
1064
1049
|
else:
|
|
1065
1050
|
result = newseq.shifted(start)
|
|
1066
|
-
|
|
1051
|
+
|
|
1067
1052
|
return result
|
|
1068
1053
|
|
|
1069
1054
|
def upper(self):
|
|
@@ -1092,7 +1077,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1092
1077
|
--------
|
|
1093
1078
|
pydna.dseqrecord.Dseqrecord.lower"""
|
|
1094
1079
|
|
|
1095
|
-
upper =
|
|
1080
|
+
upper = copy.deepcopy(self)
|
|
1096
1081
|
# This is because the @seq.setter methods otherwise sets the _per_letter_annotations to an empty dict
|
|
1097
1082
|
prev_per_letter_annotation = upper._per_letter_annotations
|
|
1098
1083
|
upper.seq = upper.seq.upper()
|
|
@@ -1126,7 +1111,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1126
1111
|
pydna.dseqrecord.Dseqrecord.upper
|
|
1127
1112
|
|
|
1128
1113
|
"""
|
|
1129
|
-
lower =
|
|
1114
|
+
lower = copy.deepcopy(self)
|
|
1130
1115
|
prev_per_letter_annotation = lower._per_letter_annotations
|
|
1131
1116
|
lower.seq = lower.seq.lower()
|
|
1132
1117
|
lower._per_letter_annotations = prev_per_letter_annotation
|
|
@@ -1144,8 +1129,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
1144
1129
|
orf = self[x:y]
|
|
1145
1130
|
prt = orf.translate()
|
|
1146
1131
|
features.append(
|
|
1147
|
-
|
|
1148
|
-
|
|
1132
|
+
SeqFeature(
|
|
1133
|
+
SimpleLocation(x, y, strand=strand),
|
|
1149
1134
|
type="CDS",
|
|
1150
1135
|
qualifiers={
|
|
1151
1136
|
"note": f"{y - x}bp {(y - x) // 3}aa",
|
|
@@ -1183,11 +1168,11 @@ class Dseqrecord(_SeqRecord):
|
|
|
1183
1168
|
if self.features:
|
|
1184
1169
|
f = self.features[feature]
|
|
1185
1170
|
locations = sorted(
|
|
1186
|
-
self.features[feature].location.parts, key=
|
|
1171
|
+
self.features[feature].location.parts, key=SimpleLocation.start.fget
|
|
1187
1172
|
)
|
|
1188
1173
|
strand = f.location.strand
|
|
1189
1174
|
else:
|
|
1190
|
-
locations = [
|
|
1175
|
+
locations = [SimpleLocation(0, 0, 1)]
|
|
1191
1176
|
strand = 1
|
|
1192
1177
|
|
|
1193
1178
|
ovhg = self.seq.ovhg + len(self.seq.watson) - len(self.seq.crick)
|
|
@@ -1218,7 +1203,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1218
1203
|
result += f"{s1}\n{s2}"
|
|
1219
1204
|
else:
|
|
1220
1205
|
result += f"{s2}\n{s1}"
|
|
1221
|
-
return
|
|
1206
|
+
return pretty_str(result)
|
|
1222
1207
|
|
|
1223
1208
|
def shifted(self, shift):
|
|
1224
1209
|
"""Circular Dseqrecord with a new origin <shift>.
|
|
@@ -1271,15 +1256,15 @@ class Dseqrecord(_SeqRecord):
|
|
|
1271
1256
|
)
|
|
1272
1257
|
ln = len(self)
|
|
1273
1258
|
if not shift % ln:
|
|
1274
|
-
return
|
|
1259
|
+
return copy.deepcopy(self) # shift is a multiple of ln or 0
|
|
1275
1260
|
else:
|
|
1276
1261
|
shift %= ln # 0<=shift<=ln
|
|
1277
1262
|
newseq = (self.seq[shift:] + self.seq[:shift]).looped()
|
|
1278
|
-
newfeatures =
|
|
1263
|
+
newfeatures = copy.deepcopy(self.features)
|
|
1279
1264
|
for feature in newfeatures:
|
|
1280
|
-
feature.location =
|
|
1281
|
-
newfeatures.sort(key=
|
|
1282
|
-
answer =
|
|
1265
|
+
feature.location = shift_location(feature.location, -shift, ln)
|
|
1266
|
+
newfeatures.sort(key=operator.attrgetter("location.start"))
|
|
1267
|
+
answer = copy.deepcopy(self)
|
|
1283
1268
|
answer.features = newfeatures
|
|
1284
1269
|
answer.seq = newseq
|
|
1285
1270
|
return answer
|
|
@@ -1333,7 +1318,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1333
1318
|
if left_cut == right_cut:
|
|
1334
1319
|
# Not really a cut, but to handle the general case
|
|
1335
1320
|
if left_cut is None:
|
|
1336
|
-
features =
|
|
1321
|
+
features = copy.deepcopy(self.features)
|
|
1337
1322
|
else:
|
|
1338
1323
|
# The features that span the origin if shifting with left_cut, but that do not cross
|
|
1339
1324
|
# the cut site should be included, and if there is a feature within the cut site, it should
|
|
@@ -1356,7 +1341,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1356
1341
|
initial_shift = left_watson if left_ovhg < 0 else left_crick
|
|
1357
1342
|
features = self.shifted(initial_shift).features
|
|
1358
1343
|
# for f in features:
|
|
1359
|
-
# print(f.id, f.location,
|
|
1344
|
+
# print(f.id, f.location, location_boundaries(f.location))
|
|
1360
1345
|
# Here, we have done what's shown below (* indicates the origin).
|
|
1361
1346
|
# The features 0 and 2 have the right location for the final product:
|
|
1362
1347
|
#
|
|
@@ -1370,10 +1355,10 @@ class Dseqrecord(_SeqRecord):
|
|
|
1370
1355
|
features_need_transfer = [
|
|
1371
1356
|
f
|
|
1372
1357
|
for f in features
|
|
1373
|
-
if (
|
|
1358
|
+
if (location_boundaries(f.location)[1] <= abs(left_ovhg))
|
|
1374
1359
|
]
|
|
1375
1360
|
features_need_transfer = [
|
|
1376
|
-
|
|
1361
|
+
shift_feature(f, -abs(left_ovhg), len(self))
|
|
1377
1362
|
for f in features_need_transfer
|
|
1378
1363
|
]
|
|
1379
1364
|
|
|
@@ -1390,7 +1375,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1390
1375
|
# as the original one. However, the final product is longer because of the overhang.
|
|
1391
1376
|
|
|
1392
1377
|
features += [
|
|
1393
|
-
|
|
1378
|
+
shift_feature(f, abs(left_ovhg), len(dseq))
|
|
1394
1379
|
for f in features_need_transfer
|
|
1395
1380
|
]
|
|
1396
1381
|
# ^ ^^^^^^^^^
|
|
@@ -1402,9 +1387,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
1402
1387
|
f
|
|
1403
1388
|
for f in features
|
|
1404
1389
|
if (
|
|
1405
|
-
|
|
1406
|
-
and
|
|
1407
|
-
<=
|
|
1390
|
+
location_boundaries(f.location)[1] <= len(dseq)
|
|
1391
|
+
and location_boundaries(f.location)[0]
|
|
1392
|
+
<= location_boundaries(f.location)[1]
|
|
1408
1393
|
)
|
|
1409
1394
|
]
|
|
1410
1395
|
else:
|
|
@@ -1419,4 +1404,68 @@ class Dseqrecord(_SeqRecord):
|
|
|
1419
1404
|
right_edge = right_watson if right_ovhg > 0 else right_crick
|
|
1420
1405
|
features = self[left_edge:right_edge].features
|
|
1421
1406
|
|
|
1422
|
-
|
|
1407
|
+
# This will need to be generalised to all types of cuts
|
|
1408
|
+
source = SequenceCutSource.from_parent(self, left_cut, right_cut)
|
|
1409
|
+
return Dseqrecord(dseq, features=features, source=source)
|
|
1410
|
+
|
|
1411
|
+
def history(self):
|
|
1412
|
+
"""
|
|
1413
|
+
Returns a string representation of the cloning history of the sequence.
|
|
1414
|
+
Returns an empty string if the sequence has no source.
|
|
1415
|
+
|
|
1416
|
+
Check the documentation notebooks for extensive examples.
|
|
1417
|
+
|
|
1418
|
+
Returns
|
|
1419
|
+
-------
|
|
1420
|
+
str: A string representation of the cloning history of the sequence.
|
|
1421
|
+
|
|
1422
|
+
Examples
|
|
1423
|
+
--------
|
|
1424
|
+
>>> from pydna.dseqrecord import Dseqrecord
|
|
1425
|
+
>>> from pydna.assembly2 import gibson_assembly
|
|
1426
|
+
>>> fragments = [
|
|
1427
|
+
... Dseqrecord("TTTTacgatAAtgctccCCCC", circular=False, name="fragment1"),
|
|
1428
|
+
... Dseqrecord("CCCCtcatGGGG", circular=False, name="fragment2"),
|
|
1429
|
+
... Dseqrecord("GGGGatataTTTT", circular=False, name="fragment3"),
|
|
1430
|
+
... ]
|
|
1431
|
+
>>> product, *_ = gibson_assembly(fragments, limit=4)
|
|
1432
|
+
>>> product.name = "product_name"
|
|
1433
|
+
>>> print(product.history())
|
|
1434
|
+
╙── product_name (Dseqrecord(o34))
|
|
1435
|
+
└─╼ GibsonAssemblySource
|
|
1436
|
+
├─╼ fragment1 (Dseqrecord(-21))
|
|
1437
|
+
├─╼ fragment2 (Dseqrecord(-12))
|
|
1438
|
+
└─╼ fragment3 (Dseqrecord(-13))
|
|
1439
|
+
"""
|
|
1440
|
+
if self.source is None:
|
|
1441
|
+
return ""
|
|
1442
|
+
return self.source.history_string(self)
|
|
1443
|
+
|
|
1444
|
+
def join(self, fragments):
|
|
1445
|
+
"""
|
|
1446
|
+
Join an iterable of Dseqrecords with this instance as the separator.
|
|
1447
|
+
|
|
1448
|
+
Example:
|
|
1449
|
+
|
|
1450
|
+
>>> sep = Dseqrecord("a")
|
|
1451
|
+
>>> joined = sep.join([Dseqrecord("A"), Dseqrecord("B"), Dseqrecord("C")])
|
|
1452
|
+
>>> joined
|
|
1453
|
+
Dseqrecord(-5)
|
|
1454
|
+
>>> joined.seq
|
|
1455
|
+
Dseq(-5)
|
|
1456
|
+
AaBaC
|
|
1457
|
+
TtVtG
|
|
1458
|
+
|
|
1459
|
+
"""
|
|
1460
|
+
it = iter(fragments)
|
|
1461
|
+
try:
|
|
1462
|
+
result = next(it) # first element (no leading separator)
|
|
1463
|
+
except StopIteration:
|
|
1464
|
+
# Empty iterable -> return empty Dseqrecord in analogy with
|
|
1465
|
+
# str.join
|
|
1466
|
+
return Dseqrecord("")
|
|
1467
|
+
|
|
1468
|
+
# Interleave: result = first + sep + x + sep + y + ...
|
|
1469
|
+
for x in it:
|
|
1470
|
+
result = result + self + x
|
|
1471
|
+
return result
|