pydna 5.5.4__py3-none-any.whl → 5.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +24 -193
- pydna/_pretty.py +8 -8
- pydna/_thermodynamic_data.py +3 -3
- pydna/alphabet.py +995 -0
- pydna/amplicon.py +19 -24
- pydna/amplify.py +75 -95
- pydna/assembly.py +64 -81
- pydna/assembly2.py +283 -294
- pydna/codon.py +4 -4
- pydna/common_sub_strings.py +6 -8
- pydna/contig.py +203 -10
- pydna/design.py +176 -60
- pydna/download.py +6 -15
- pydna/dseq.py +1794 -718
- pydna/dseqrecord.py +170 -169
- pydna/gateway.py +6 -6
- pydna/gel.py +5 -5
- pydna/genbank.py +43 -46
- pydna/genbankfixer.py +89 -92
- pydna/ladders.py +11 -12
- pydna/oligonucleotide_hybridization.py +124 -0
- pydna/opencloning_models.py +187 -60
- pydna/parsers.py +45 -32
- pydna/primer.py +4 -4
- pydna/primer_screen.py +833 -0
- pydna/readers.py +14 -9
- pydna/seq.py +137 -47
- pydna/seqrecord.py +54 -62
- pydna/sequence_picker.py +2 -5
- pydna/sequence_regex.py +6 -6
- pydna/tm.py +17 -17
- pydna/types.py +19 -19
- pydna/utils.py +97 -75
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/METADATA +8 -8
- pydna-5.5.5.dist-info/RECORD +43 -0
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/WHEEL +1 -1
- pydna/conftest.py +0 -42
- pydna/genbankfile.py +0 -42
- pydna/genbankrecord.py +0 -168
- pydna/goldengate.py +0 -45
- pydna/ligate.py +0 -62
- pydna/user_cloning.py +0 -29
- pydna-5.5.4.dist-info/RECORD +0 -46
- {pydna-5.5.4.dist-info → pydna-5.5.5.dist-info}/licenses/LICENSE.txt +0 -0
pydna/dseqrecord.py
CHANGED
|
@@ -11,30 +11,28 @@ Seq and SeqRecord classes, respectively.
|
|
|
11
11
|
|
|
12
12
|
The Dseq and Dseqrecord classes support the notion of circular and linear DNA topology.
|
|
13
13
|
"""
|
|
14
|
-
from Bio.Restriction import RestrictionBatch
|
|
14
|
+
from Bio.Restriction import RestrictionBatch
|
|
15
15
|
from Bio.Restriction import CommOnly
|
|
16
|
-
from pydna.dseq import Dseq
|
|
17
|
-
from pydna._pretty import pretty_str
|
|
18
|
-
from pydna.utils import flatten
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
from pydna.utils import
|
|
22
|
-
from pydna.
|
|
23
|
-
from
|
|
24
|
-
from pydna.common_sub_strings import common_sub_strings as _common_sub_strings
|
|
25
|
-
from Bio.SeqFeature import SeqFeature as _SeqFeature
|
|
16
|
+
from pydna.dseq import Dseq
|
|
17
|
+
from pydna._pretty import pretty_str
|
|
18
|
+
from pydna.utils import flatten, location_boundaries
|
|
19
|
+
|
|
20
|
+
from pydna.utils import shift_location
|
|
21
|
+
from pydna.utils import shift_feature
|
|
22
|
+
from pydna.common_sub_strings import common_sub_strings
|
|
23
|
+
from Bio.SeqFeature import SeqFeature
|
|
26
24
|
from Bio import SeqIO
|
|
27
|
-
from Bio.SeqFeature import CompoundLocation
|
|
28
|
-
from Bio.SeqFeature import SimpleLocation
|
|
29
|
-
from pydna.seqrecord import SeqRecord
|
|
30
|
-
from Bio.Seq import translate
|
|
31
|
-
from pydna.utils import identifier_from_string
|
|
32
|
-
import copy
|
|
33
|
-
import operator
|
|
34
|
-
import os
|
|
35
|
-
import re
|
|
36
|
-
import time
|
|
37
|
-
import datetime
|
|
25
|
+
from Bio.SeqFeature import CompoundLocation
|
|
26
|
+
from Bio.SeqFeature import SimpleLocation
|
|
27
|
+
from pydna.seqrecord import SeqRecord
|
|
28
|
+
from Bio.Seq import translate
|
|
29
|
+
from pydna.utils import identifier_from_string
|
|
30
|
+
import copy
|
|
31
|
+
import operator
|
|
32
|
+
import os
|
|
33
|
+
import re
|
|
34
|
+
import time
|
|
35
|
+
import datetime
|
|
38
36
|
from typing import Union, TYPE_CHECKING
|
|
39
37
|
from pydna.opencloning_models import SequenceCutSource
|
|
40
38
|
|
|
@@ -42,20 +40,15 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
42
40
|
from pydna.opencloning_models import Source
|
|
43
41
|
|
|
44
42
|
|
|
45
|
-
# import logging as _logging
|
|
46
|
-
|
|
47
|
-
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
48
|
-
|
|
49
|
-
|
|
50
43
|
try:
|
|
51
|
-
from IPython.display import display_html
|
|
44
|
+
from IPython.display import display_html
|
|
52
45
|
except ImportError:
|
|
53
46
|
|
|
54
|
-
def
|
|
47
|
+
def display_html(item, raw=None):
|
|
55
48
|
return item
|
|
56
49
|
|
|
57
50
|
|
|
58
|
-
class Dseqrecord(
|
|
51
|
+
class Dseqrecord(SeqRecord):
|
|
59
52
|
"""Dseqrecord is a double stranded version of the Biopython SeqRecord [#]_ class.
|
|
60
53
|
The Dseqrecord object holds a Dseq object describing the sequence.
|
|
61
54
|
Additionally, Dseqrecord hold meta information about the sequence in the
|
|
@@ -132,7 +125,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
132
125
|
|
|
133
126
|
"""
|
|
134
127
|
|
|
135
|
-
seq:
|
|
128
|
+
seq: Dseq
|
|
136
129
|
source: Union["Source", None] = None
|
|
137
130
|
|
|
138
131
|
def __init__(
|
|
@@ -144,15 +137,12 @@ class Dseqrecord(_SeqRecord):
|
|
|
144
137
|
source=None,
|
|
145
138
|
**kwargs,
|
|
146
139
|
):
|
|
147
|
-
# _module_logger.info("### Dseqrecord initialized ###")
|
|
148
|
-
# _module_logger.info("argument circular = %s", circular)
|
|
149
|
-
# _module_logger.info("circular = %s", circular)
|
|
150
140
|
|
|
151
141
|
if isinstance(record, str):
|
|
152
|
-
|
|
142
|
+
|
|
153
143
|
super().__init__(
|
|
154
|
-
|
|
155
|
-
record,
|
|
144
|
+
Dseq.quick(
|
|
145
|
+
record.encode("ascii"),
|
|
156
146
|
# linear=linear,
|
|
157
147
|
circular=bool(circular),
|
|
158
148
|
),
|
|
@@ -166,14 +156,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
166
156
|
record = record[:]
|
|
167
157
|
elif circular is True:
|
|
168
158
|
record = record.looped()
|
|
169
|
-
|
|
159
|
+
|
|
170
160
|
super().__init__(record, *args, **kwargs)
|
|
171
161
|
|
|
172
162
|
# record is a Bio.Seq object ?
|
|
173
163
|
elif hasattr(record, "transcribe"):
|
|
174
|
-
|
|
164
|
+
|
|
175
165
|
super().__init__(
|
|
176
|
-
|
|
166
|
+
Dseq(
|
|
177
167
|
str(record),
|
|
178
168
|
# linear=linear,
|
|
179
169
|
circular=bool(circular),
|
|
@@ -184,13 +174,13 @@ class Dseqrecord(_SeqRecord):
|
|
|
184
174
|
|
|
185
175
|
# record is a Bio.SeqRecord or Dseqrecord object ?
|
|
186
176
|
elif hasattr(record, "features"):
|
|
187
|
-
|
|
177
|
+
|
|
188
178
|
for key, value in list(record.__dict__.items()):
|
|
189
179
|
setattr(self, key, value)
|
|
190
180
|
self.letter_annotations = {}
|
|
191
181
|
# record.seq is a Dseq object ?
|
|
192
182
|
if hasattr(record.seq, "watson"):
|
|
193
|
-
new_seq =
|
|
183
|
+
new_seq = copy.copy(record.seq)
|
|
194
184
|
if circular is False:
|
|
195
185
|
new_seq = new_seq[:]
|
|
196
186
|
elif circular is True:
|
|
@@ -198,7 +188,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
198
188
|
self.seq = new_seq
|
|
199
189
|
# record.seq is Bio.SeqRecord object ?
|
|
200
190
|
else:
|
|
201
|
-
self.seq =
|
|
191
|
+
self.seq = Dseq(
|
|
202
192
|
str(record.seq),
|
|
203
193
|
# linear=linear,
|
|
204
194
|
circular=bool(circular),
|
|
@@ -226,16 +216,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
226
216
|
# linear=True, circular=False, n = 5E-14, **kwargs):
|
|
227
217
|
obj = cls.__new__(cls) # Does not call __init__
|
|
228
218
|
obj._per_letter_annotations = {}
|
|
229
|
-
obj.seq =
|
|
230
|
-
record,
|
|
231
|
-
_rc(record),
|
|
232
|
-
ovhg=0,
|
|
219
|
+
obj.seq = Dseq.quick(
|
|
220
|
+
record.encode("ascii"),
|
|
233
221
|
# linear=linear,
|
|
234
222
|
circular=circular,
|
|
235
223
|
)
|
|
236
|
-
obj.id =
|
|
237
|
-
obj.name =
|
|
238
|
-
obj.description =
|
|
224
|
+
obj.id = pretty_str("id")
|
|
225
|
+
obj.name = pretty_str("name")
|
|
226
|
+
obj.description = pretty_str("description")
|
|
239
227
|
obj.dbxrefs = []
|
|
240
228
|
obj.annotations = {"molecule_type": "DNA"}
|
|
241
229
|
obj.features = []
|
|
@@ -247,7 +235,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
247
235
|
@classmethod
|
|
248
236
|
def from_SeqRecord(
|
|
249
237
|
cls,
|
|
250
|
-
record:
|
|
238
|
+
record: SeqRecord,
|
|
251
239
|
*args,
|
|
252
240
|
circular=None,
|
|
253
241
|
n=5e-14,
|
|
@@ -267,9 +255,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
267
255
|
obj.source = None
|
|
268
256
|
if circular is None:
|
|
269
257
|
circular = record.annotations.get("topology") == "circular"
|
|
270
|
-
obj.seq =
|
|
271
|
-
str(record.seq), _rc(str(record.seq)), ovhg=0, circular=circular
|
|
272
|
-
)
|
|
258
|
+
obj.seq = Dseq.quick(record.seq._data, ovhg=0, circular=circular)
|
|
273
259
|
return obj
|
|
274
260
|
|
|
275
261
|
@property
|
|
@@ -339,14 +325,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
339
325
|
qualifiers = {}
|
|
340
326
|
qualifiers.update(kwargs)
|
|
341
327
|
|
|
342
|
-
location =
|
|
328
|
+
location = CompoundLocation(
|
|
343
329
|
(
|
|
344
|
-
|
|
345
|
-
|
|
330
|
+
SimpleLocation(x, len(self.seq), strand=strand),
|
|
331
|
+
SimpleLocation(0, y, strand=strand),
|
|
346
332
|
)
|
|
347
333
|
)
|
|
348
334
|
|
|
349
|
-
sf =
|
|
335
|
+
sf = SeqFeature(location, type=type_, qualifiers=qualifiers)
|
|
350
336
|
|
|
351
337
|
if "label" not in qualifiers:
|
|
352
338
|
qualifiers["label"] = [f"ft{len(location)}"]
|
|
@@ -395,35 +381,31 @@ class Dseqrecord(_SeqRecord):
|
|
|
395
381
|
--------
|
|
396
382
|
pydna.dseq.Dseq.looped
|
|
397
383
|
"""
|
|
398
|
-
new =
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
)
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
fn.location = _CompoundLocation([loc1, loc2])
|
|
424
|
-
|
|
425
|
-
fn.qualifiers = fo.qualifiers
|
|
426
|
-
|
|
384
|
+
new = copy.deepcopy(self)
|
|
385
|
+
new.seq = self.seq.looped()
|
|
386
|
+
|
|
387
|
+
old_length = len(self) # Possibly longer, including sticky ends if any.
|
|
388
|
+
new_length = len(new) # Possibly shorter, with blunt ends.
|
|
389
|
+
if old_length != new_length: # Only False if self was blunt.
|
|
390
|
+
new_features = []
|
|
391
|
+
for fn in new.features:
|
|
392
|
+
if len(fn.location) > new_length:
|
|
393
|
+
# Edge case: if the feature is longer than the sequence, it should be
|
|
394
|
+
# dropped. This can happen in a sequence with overhangs, where the feature
|
|
395
|
+
# spans both overhangs.
|
|
396
|
+
#
|
|
397
|
+
# Example:
|
|
398
|
+
# feature
|
|
399
|
+
# <------>
|
|
400
|
+
# aaACGT
|
|
401
|
+
# TGCAtt
|
|
402
|
+
#
|
|
403
|
+
# Circular sequence ACGTtt should not have that feature, so we drop it
|
|
404
|
+
continue
|
|
405
|
+
fn.location = shift_location(fn.location, 0, new_length)
|
|
406
|
+
new_features.append(fn)
|
|
407
|
+
|
|
408
|
+
new.features = new_features
|
|
427
409
|
return new
|
|
428
410
|
|
|
429
411
|
def tolinear(self): # pragma: no cover
|
|
@@ -445,16 +427,16 @@ class Dseqrecord(_SeqRecord):
|
|
|
445
427
|
>>>
|
|
446
428
|
|
|
447
429
|
"""
|
|
448
|
-
import warnings
|
|
430
|
+
import warnings
|
|
449
431
|
from pydna import _PydnaDeprecationWarning
|
|
450
432
|
|
|
451
|
-
|
|
433
|
+
warnings.warn(
|
|
452
434
|
"tolinear method is obsolete; "
|
|
453
435
|
"please use obj[:] "
|
|
454
436
|
"instead of obj.tolinear().",
|
|
455
437
|
_PydnaDeprecationWarning,
|
|
456
438
|
)
|
|
457
|
-
new =
|
|
439
|
+
new = copy.copy(self)
|
|
458
440
|
for key, value in list(self.__dict__.items()):
|
|
459
441
|
setattr(new, key, value)
|
|
460
442
|
# new._seq = self.seq.tolinear()
|
|
@@ -465,7 +447,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
465
447
|
|
|
466
448
|
def terminal_transferase(self, nucleotides="a"):
|
|
467
449
|
"""docstring."""
|
|
468
|
-
newseq =
|
|
450
|
+
newseq = copy.deepcopy(self)
|
|
469
451
|
newseq.seq = self.seq.terminal_transferase(nucleotides)
|
|
470
452
|
for feature in newseq.features:
|
|
471
453
|
feature.location += len(nucleotides)
|
|
@@ -505,12 +487,12 @@ class Dseqrecord(_SeqRecord):
|
|
|
505
487
|
|
|
506
488
|
"""
|
|
507
489
|
|
|
508
|
-
record =
|
|
490
|
+
record = copy.deepcopy(self)
|
|
509
491
|
if f in ("genbank", "gb") and self.circular:
|
|
510
492
|
record.annotations["topology"] = "circular"
|
|
511
493
|
else:
|
|
512
494
|
record.annotations["topology"] = "linear"
|
|
513
|
-
return
|
|
495
|
+
return SeqRecord.format(record, f).strip()
|
|
514
496
|
|
|
515
497
|
def write(self, filename=None, f="gb"):
|
|
516
498
|
"""Writes the Dseqrecord to a file using the format f, which must
|
|
@@ -543,9 +525,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
543
525
|
# generate a name if no name was given
|
|
544
526
|
# if not isinstance(filename, str): # is filename a string???
|
|
545
527
|
# raise ValueError("filename has to be a string, got", type(filename))
|
|
546
|
-
name, ext =
|
|
528
|
+
name, ext = os.path.splitext(filename)
|
|
547
529
|
msg = f"<font face=monospace><a href='{filename}' target='_blank'>{filename}</a></font><br>"
|
|
548
|
-
if not
|
|
530
|
+
if not os.path.isfile(filename):
|
|
549
531
|
with open(filename, "w", encoding="utf8") as fp:
|
|
550
532
|
fp.write(self.format(f))
|
|
551
533
|
else:
|
|
@@ -556,16 +538,16 @@ class Dseqrecord(_SeqRecord):
|
|
|
556
538
|
if self.seq != old_file.seq:
|
|
557
539
|
# If new sequence is different, the old file is
|
|
558
540
|
# renamed with "_OLD_" suffix:
|
|
559
|
-
oldmtime =
|
|
560
|
-
|
|
541
|
+
oldmtime = datetime.datetime.fromtimestamp(
|
|
542
|
+
os.path.getmtime(filename)
|
|
561
543
|
).isoformat()
|
|
562
|
-
tstmp = int(
|
|
544
|
+
tstmp = int(time.time() * 1_000_000)
|
|
563
545
|
old_filename = f"{name}_OLD_{tstmp}{ext}"
|
|
564
|
-
|
|
546
|
+
os.rename(filename, old_filename)
|
|
565
547
|
with open(filename, "w", encoding="utf8") as fp:
|
|
566
548
|
fp.write(self.format(f))
|
|
567
|
-
newmtime =
|
|
568
|
-
|
|
549
|
+
newmtime = datetime.datetime.fromtimestamp(
|
|
550
|
+
os.path.getmtime(filename)
|
|
569
551
|
).isoformat()
|
|
570
552
|
msg = f"""
|
|
571
553
|
<table style="padding:10px 10px;
|
|
@@ -611,8 +593,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
611
593
|
elif "seguid" in old_file.annotations.get("comment", ""):
|
|
612
594
|
pattern = r"(ldseguid|cdseguid)-(\S{27})(_[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}){0,1}"
|
|
613
595
|
# seguid=NNNNNNNNNNNNNNNNNNNNNNNNNNN_2020-10-10T11:11:11.111111
|
|
614
|
-
oldstamp =
|
|
615
|
-
newstamp =
|
|
596
|
+
oldstamp = re.search(pattern, old_file.description)
|
|
597
|
+
newstamp = re.search(pattern, self.description)
|
|
616
598
|
newdescription = self.description
|
|
617
599
|
if oldstamp and newstamp:
|
|
618
600
|
if oldstamp.group(0)[:35] == newstamp.group(0)[:35]:
|
|
@@ -621,7 +603,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
621
603
|
)
|
|
622
604
|
elif oldstamp:
|
|
623
605
|
newdescription += " " + oldstamp.group(0)
|
|
624
|
-
newobj =
|
|
606
|
+
newobj = copy.copy(self)
|
|
625
607
|
newobj.description = newdescription
|
|
626
608
|
|
|
627
609
|
with open(filename, "w", encoding="utf8") as fp:
|
|
@@ -629,7 +611,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
629
611
|
else:
|
|
630
612
|
with open(filename, "w", encoding="utf8") as fp:
|
|
631
613
|
fp.write(self.format(f))
|
|
632
|
-
return
|
|
614
|
+
return display_html(msg, raw=True)
|
|
633
615
|
|
|
634
616
|
def find(self, other):
|
|
635
617
|
# TODO allow strings, seqs, seqrecords or Dseqrecords
|
|
@@ -647,7 +629,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
647
629
|
def __str__(self):
|
|
648
630
|
return ("Dseqrecord\n" "circular: {}\n" "size: {}\n").format(
|
|
649
631
|
self.circular, len(self)
|
|
650
|
-
) +
|
|
632
|
+
) + SeqRecord.__str__(self)
|
|
651
633
|
|
|
652
634
|
def __contains__(self, other):
|
|
653
635
|
if other.lower() in str(self.seq).lower():
|
|
@@ -658,7 +640,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
658
640
|
spc = 3 - ln % 3 if ln % 3 else 0
|
|
659
641
|
s = "n" * spc + s + "nnn"
|
|
660
642
|
for frame in range(3):
|
|
661
|
-
if other.lower() in
|
|
643
|
+
if other.lower() in translate(s[frame : frame + spc + ln]).lower():
|
|
662
644
|
return True
|
|
663
645
|
return False
|
|
664
646
|
|
|
@@ -667,13 +649,13 @@ class Dseqrecord(_SeqRecord):
|
|
|
667
649
|
>>> from pydna.dseqrecord import Dseqrecord
|
|
668
650
|
>>> s=Dseqrecord("atgtacgatcgtatgctggttatattttag")
|
|
669
651
|
>>> s.seq.translate()
|
|
670
|
-
|
|
652
|
+
ProteinSeq('MYDRMLVIF*')
|
|
671
653
|
>>> "RML" in s
|
|
672
654
|
True
|
|
673
655
|
>>> "MMM" in s
|
|
674
656
|
False
|
|
675
657
|
>>> s.seq.rc().translate()
|
|
676
|
-
|
|
658
|
+
ProteinSeq('LKYNQHTIVH')
|
|
677
659
|
>>> "QHT" in s.rc()
|
|
678
660
|
True
|
|
679
661
|
>>> "QHT" in s
|
|
@@ -689,7 +671,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
689
671
|
cgtatgctg
|
|
690
672
|
gcatacgac
|
|
691
673
|
>>> code.translate()
|
|
692
|
-
|
|
674
|
+
ProteinSeq('RML')
|
|
693
675
|
"""
|
|
694
676
|
other = str(other).lower()
|
|
695
677
|
assert self.seq.watson == "".join(self.seq.watson.split())
|
|
@@ -700,7 +682,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
700
682
|
start = None
|
|
701
683
|
for frame in range(3):
|
|
702
684
|
try:
|
|
703
|
-
start =
|
|
685
|
+
start = translate(s[frame : frame + ln + spc]).lower().index(other)
|
|
704
686
|
break
|
|
705
687
|
except ValueError:
|
|
706
688
|
pass
|
|
@@ -748,7 +730,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
748
730
|
matching_reads = []
|
|
749
731
|
|
|
750
732
|
for read_ in reads:
|
|
751
|
-
matches =
|
|
733
|
+
matches = common_sub_strings(str(self.seq).lower(), str(read_.seq), limit)
|
|
752
734
|
|
|
753
735
|
if not matches:
|
|
754
736
|
continue
|
|
@@ -769,14 +751,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
769
751
|
if len(newmatches) > 1:
|
|
770
752
|
ms = []
|
|
771
753
|
for m in newmatches:
|
|
772
|
-
ms.append(
|
|
773
|
-
loc =
|
|
754
|
+
ms.append(SimpleLocation(m[0], m[0] + m[2]))
|
|
755
|
+
loc = CompoundLocation(ms)
|
|
774
756
|
else:
|
|
775
757
|
a, b, c = newmatches[0]
|
|
776
|
-
loc =
|
|
758
|
+
loc = SimpleLocation(a, a + c)
|
|
777
759
|
|
|
778
760
|
self.features.append(
|
|
779
|
-
|
|
761
|
+
SeqFeature(
|
|
780
762
|
loc,
|
|
781
763
|
qualifiers={"label": [read_.annotations["filename"]]},
|
|
782
764
|
type="trace",
|
|
@@ -786,9 +768,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
786
768
|
return [x.annotations["filename"] for x in matching_reads]
|
|
787
769
|
|
|
788
770
|
def __repr__(self):
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
)
|
|
771
|
+
top = {True: "-", False: "o"}[not self.circular]
|
|
772
|
+
return f"{self.__class__.__name__}({top}{len(self)})"
|
|
792
773
|
|
|
793
774
|
def _repr_pretty_(self, p, cycle):
|
|
794
775
|
p.text(
|
|
@@ -799,7 +780,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
799
780
|
|
|
800
781
|
def __add__(self, other):
|
|
801
782
|
if hasattr(other, "seq") and hasattr(other.seq, "watson"):
|
|
802
|
-
other =
|
|
783
|
+
other = copy.deepcopy(other)
|
|
803
784
|
other_five_prime = other.seq.five_prime_end()
|
|
804
785
|
if other_five_prime[0] == "5'":
|
|
805
786
|
# add other.seq.ovhg
|
|
@@ -810,10 +791,10 @@ class Dseqrecord(_SeqRecord):
|
|
|
810
791
|
for f in other.features:
|
|
811
792
|
f.location = f.location + (-other.seq.ovhg)
|
|
812
793
|
|
|
813
|
-
answer = Dseqrecord(
|
|
794
|
+
answer = Dseqrecord(SeqRecord.__add__(self, other))
|
|
814
795
|
answer.n = min(self.n, other.n)
|
|
815
796
|
else:
|
|
816
|
-
answer = Dseqrecord(
|
|
797
|
+
answer = Dseqrecord(SeqRecord.__add__(self, Dseqrecord(other)))
|
|
817
798
|
answer.n = self.n
|
|
818
799
|
return answer
|
|
819
800
|
|
|
@@ -827,7 +808,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
827
808
|
if self.circular:
|
|
828
809
|
raise TypeError("TypeError: can't multiply circular Dseqrecord.")
|
|
829
810
|
if number > 0:
|
|
830
|
-
new =
|
|
811
|
+
new = copy.deepcopy(self)
|
|
831
812
|
for i in range(1, number):
|
|
832
813
|
new += self
|
|
833
814
|
new._per_letter_annotations = self._per_letter_annotations
|
|
@@ -837,7 +818,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
837
818
|
|
|
838
819
|
def __getitem__(self, sl):
|
|
839
820
|
"""docstring."""
|
|
840
|
-
answer = Dseqrecord(
|
|
821
|
+
answer = Dseqrecord(copy.copy(self))
|
|
841
822
|
answer.seq = self.seq.__getitem__(sl)
|
|
842
823
|
# answer.seq.alphabet = self.seq.alphabet
|
|
843
824
|
# breakpoint()
|
|
@@ -859,9 +840,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
859
840
|
f
|
|
860
841
|
for f in answer.features
|
|
861
842
|
if (
|
|
862
|
-
|
|
863
|
-
and
|
|
864
|
-
<
|
|
843
|
+
location_boundaries(f.location)[1] <= len(answer.seq)
|
|
844
|
+
and location_boundaries(f.location)[0]
|
|
845
|
+
< location_boundaries(f.location)[1]
|
|
865
846
|
)
|
|
866
847
|
]
|
|
867
848
|
|
|
@@ -877,8 +858,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
877
858
|
identifier = " ".join(sf.qualifiers["label"])
|
|
878
859
|
elif "note" in sf.qualifiers:
|
|
879
860
|
identifier = " ".join(sf.qualifiers["note"])
|
|
880
|
-
answer.id =
|
|
881
|
-
answer.name =
|
|
861
|
+
answer.id = identifier_from_string(identifier)[:16]
|
|
862
|
+
answer.name = identifier_from_string("part_{name}".format(name=self.name))[:16]
|
|
882
863
|
return answer
|
|
883
864
|
|
|
884
865
|
def __eq__(self, other):
|
|
@@ -920,43 +901,34 @@ class Dseqrecord(_SeqRecord):
|
|
|
920
901
|
answer.name = answer.id[:16]
|
|
921
902
|
return fragments[0]
|
|
922
903
|
|
|
923
|
-
def no_cutters(self, batch:
|
|
904
|
+
def no_cutters(self, batch: RestrictionBatch = None):
|
|
924
905
|
"""docstring."""
|
|
925
906
|
return self.seq.no_cutters(batch=batch or CommOnly)
|
|
926
907
|
|
|
927
|
-
def unique_cutters(self, batch:
|
|
908
|
+
def unique_cutters(self, batch: RestrictionBatch = None):
|
|
928
909
|
"""docstring."""
|
|
929
910
|
return self.seq.unique_cutters(batch=batch or CommOnly)
|
|
930
911
|
|
|
931
|
-
def once_cutters(self, batch:
|
|
912
|
+
def once_cutters(self, batch: RestrictionBatch = None):
|
|
932
913
|
"""docstring."""
|
|
933
914
|
return self.seq.once_cutters(batch=batch or CommOnly)
|
|
934
915
|
|
|
935
|
-
def twice_cutters(self, batch:
|
|
916
|
+
def twice_cutters(self, batch: RestrictionBatch = None):
|
|
936
917
|
"""docstring."""
|
|
937
918
|
return self.seq.twice_cutters(batch=batch or CommOnly)
|
|
938
919
|
|
|
939
|
-
def n_cutters(self, n=3, batch:
|
|
920
|
+
def n_cutters(self, n=3, batch: RestrictionBatch = None):
|
|
940
921
|
"""docstring."""
|
|
941
922
|
return self.seq.n_cutters(n=n, batch=batch or CommOnly)
|
|
942
923
|
|
|
943
|
-
def cutters(self, batch:
|
|
924
|
+
def cutters(self, batch: RestrictionBatch = None):
|
|
944
925
|
"""docstring."""
|
|
945
926
|
return self.seq.cutters(batch=batch or CommOnly)
|
|
946
927
|
|
|
947
928
|
def number_of_cuts(self, *enzymes):
|
|
948
929
|
"""The number of cuts by digestion with the Restriction enzymes
|
|
949
930
|
contained in the iterable."""
|
|
950
|
-
return sum([len(enzyme.search(self.seq)) for enzyme in
|
|
951
|
-
|
|
952
|
-
def cas9(self, RNA: str):
|
|
953
|
-
"""docstring."""
|
|
954
|
-
fragments = []
|
|
955
|
-
result = []
|
|
956
|
-
for target in (self.seq, self.seq.rc()):
|
|
957
|
-
fragments = [self[sl.start : sl.stop] for sl in target.cas9(RNA)]
|
|
958
|
-
result.append(fragments)
|
|
959
|
-
return result
|
|
931
|
+
return sum([len(enzyme.search(self.seq)) for enzyme in flatten(enzymes)])
|
|
960
932
|
|
|
961
933
|
def reverse_complement(self):
|
|
962
934
|
"""Reverse complement.
|
|
@@ -1033,7 +1005,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1033
1005
|
if not self.circular:
|
|
1034
1006
|
raise TypeError("Only circular DNA can be synced!")
|
|
1035
1007
|
|
|
1036
|
-
newseq =
|
|
1008
|
+
newseq = copy.copy(self)
|
|
1037
1009
|
|
|
1038
1010
|
s = str(self.seq.watson).lower()
|
|
1039
1011
|
s_rc = str(self.seq.crick).lower()
|
|
@@ -1049,8 +1021,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
1049
1021
|
|
|
1050
1022
|
lim = min(limit, limit * (len(s) // limit) + 1)
|
|
1051
1023
|
|
|
1052
|
-
c =
|
|
1053
|
-
d =
|
|
1024
|
+
c = common_sub_strings(s + s, r, limit=lim)
|
|
1025
|
+
d = common_sub_strings(s_rc + s_rc, r, limit=lim)
|
|
1054
1026
|
|
|
1055
1027
|
c = [(x[0], x[2]) for x in c if x[1] == 0]
|
|
1056
1028
|
d = [(x[0], x[2]) for x in d if x[1] == 0]
|
|
@@ -1076,7 +1048,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1076
1048
|
result = newseq
|
|
1077
1049
|
else:
|
|
1078
1050
|
result = newseq.shifted(start)
|
|
1079
|
-
|
|
1051
|
+
|
|
1080
1052
|
return result
|
|
1081
1053
|
|
|
1082
1054
|
def upper(self):
|
|
@@ -1105,7 +1077,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1105
1077
|
--------
|
|
1106
1078
|
pydna.dseqrecord.Dseqrecord.lower"""
|
|
1107
1079
|
|
|
1108
|
-
upper =
|
|
1080
|
+
upper = copy.deepcopy(self)
|
|
1109
1081
|
# This is because the @seq.setter methods otherwise sets the _per_letter_annotations to an empty dict
|
|
1110
1082
|
prev_per_letter_annotation = upper._per_letter_annotations
|
|
1111
1083
|
upper.seq = upper.seq.upper()
|
|
@@ -1139,7 +1111,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1139
1111
|
pydna.dseqrecord.Dseqrecord.upper
|
|
1140
1112
|
|
|
1141
1113
|
"""
|
|
1142
|
-
lower =
|
|
1114
|
+
lower = copy.deepcopy(self)
|
|
1143
1115
|
prev_per_letter_annotation = lower._per_letter_annotations
|
|
1144
1116
|
lower.seq = lower.seq.lower()
|
|
1145
1117
|
lower._per_letter_annotations = prev_per_letter_annotation
|
|
@@ -1157,8 +1129,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
1157
1129
|
orf = self[x:y]
|
|
1158
1130
|
prt = orf.translate()
|
|
1159
1131
|
features.append(
|
|
1160
|
-
|
|
1161
|
-
|
|
1132
|
+
SeqFeature(
|
|
1133
|
+
SimpleLocation(x, y, strand=strand),
|
|
1162
1134
|
type="CDS",
|
|
1163
1135
|
qualifiers={
|
|
1164
1136
|
"note": f"{y - x}bp {(y - x) // 3}aa",
|
|
@@ -1196,11 +1168,11 @@ class Dseqrecord(_SeqRecord):
|
|
|
1196
1168
|
if self.features:
|
|
1197
1169
|
f = self.features[feature]
|
|
1198
1170
|
locations = sorted(
|
|
1199
|
-
self.features[feature].location.parts, key=
|
|
1171
|
+
self.features[feature].location.parts, key=SimpleLocation.start.fget
|
|
1200
1172
|
)
|
|
1201
1173
|
strand = f.location.strand
|
|
1202
1174
|
else:
|
|
1203
|
-
locations = [
|
|
1175
|
+
locations = [SimpleLocation(0, 0, 1)]
|
|
1204
1176
|
strand = 1
|
|
1205
1177
|
|
|
1206
1178
|
ovhg = self.seq.ovhg + len(self.seq.watson) - len(self.seq.crick)
|
|
@@ -1231,7 +1203,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1231
1203
|
result += f"{s1}\n{s2}"
|
|
1232
1204
|
else:
|
|
1233
1205
|
result += f"{s2}\n{s1}"
|
|
1234
|
-
return
|
|
1206
|
+
return pretty_str(result)
|
|
1235
1207
|
|
|
1236
1208
|
def shifted(self, shift):
|
|
1237
1209
|
"""Circular Dseqrecord with a new origin <shift>.
|
|
@@ -1284,15 +1256,15 @@ class Dseqrecord(_SeqRecord):
|
|
|
1284
1256
|
)
|
|
1285
1257
|
ln = len(self)
|
|
1286
1258
|
if not shift % ln:
|
|
1287
|
-
return
|
|
1259
|
+
return copy.deepcopy(self) # shift is a multiple of ln or 0
|
|
1288
1260
|
else:
|
|
1289
1261
|
shift %= ln # 0<=shift<=ln
|
|
1290
1262
|
newseq = (self.seq[shift:] + self.seq[:shift]).looped()
|
|
1291
|
-
newfeatures =
|
|
1263
|
+
newfeatures = copy.deepcopy(self.features)
|
|
1292
1264
|
for feature in newfeatures:
|
|
1293
|
-
feature.location =
|
|
1294
|
-
newfeatures.sort(key=
|
|
1295
|
-
answer =
|
|
1265
|
+
feature.location = shift_location(feature.location, -shift, ln)
|
|
1266
|
+
newfeatures.sort(key=operator.attrgetter("location.start"))
|
|
1267
|
+
answer = copy.deepcopy(self)
|
|
1296
1268
|
answer.features = newfeatures
|
|
1297
1269
|
answer.seq = newseq
|
|
1298
1270
|
return answer
|
|
@@ -1346,7 +1318,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1346
1318
|
if left_cut == right_cut:
|
|
1347
1319
|
# Not really a cut, but to handle the general case
|
|
1348
1320
|
if left_cut is None:
|
|
1349
|
-
features =
|
|
1321
|
+
features = copy.deepcopy(self.features)
|
|
1350
1322
|
else:
|
|
1351
1323
|
# The features that span the origin if shifting with left_cut, but that do not cross
|
|
1352
1324
|
# the cut site should be included, and if there is a feature within the cut site, it should
|
|
@@ -1369,7 +1341,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1369
1341
|
initial_shift = left_watson if left_ovhg < 0 else left_crick
|
|
1370
1342
|
features = self.shifted(initial_shift).features
|
|
1371
1343
|
# for f in features:
|
|
1372
|
-
# print(f.id, f.location,
|
|
1344
|
+
# print(f.id, f.location, location_boundaries(f.location))
|
|
1373
1345
|
# Here, we have done what's shown below (* indicates the origin).
|
|
1374
1346
|
# The features 0 and 2 have the right location for the final product:
|
|
1375
1347
|
#
|
|
@@ -1383,10 +1355,10 @@ class Dseqrecord(_SeqRecord):
|
|
|
1383
1355
|
features_need_transfer = [
|
|
1384
1356
|
f
|
|
1385
1357
|
for f in features
|
|
1386
|
-
if (
|
|
1358
|
+
if (location_boundaries(f.location)[1] <= abs(left_ovhg))
|
|
1387
1359
|
]
|
|
1388
1360
|
features_need_transfer = [
|
|
1389
|
-
|
|
1361
|
+
shift_feature(f, -abs(left_ovhg), len(self))
|
|
1390
1362
|
for f in features_need_transfer
|
|
1391
1363
|
]
|
|
1392
1364
|
|
|
@@ -1403,7 +1375,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1403
1375
|
# as the original one. However, the final product is longer because of the overhang.
|
|
1404
1376
|
|
|
1405
1377
|
features += [
|
|
1406
|
-
|
|
1378
|
+
shift_feature(f, abs(left_ovhg), len(dseq))
|
|
1407
1379
|
for f in features_need_transfer
|
|
1408
1380
|
]
|
|
1409
1381
|
# ^ ^^^^^^^^^
|
|
@@ -1415,9 +1387,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
1415
1387
|
f
|
|
1416
1388
|
for f in features
|
|
1417
1389
|
if (
|
|
1418
|
-
|
|
1419
|
-
and
|
|
1420
|
-
<=
|
|
1390
|
+
location_boundaries(f.location)[1] <= len(dseq)
|
|
1391
|
+
and location_boundaries(f.location)[0]
|
|
1392
|
+
<= location_boundaries(f.location)[1]
|
|
1421
1393
|
)
|
|
1422
1394
|
]
|
|
1423
1395
|
else:
|
|
@@ -1468,3 +1440,32 @@ class Dseqrecord(_SeqRecord):
|
|
|
1468
1440
|
if self.source is None:
|
|
1469
1441
|
return ""
|
|
1470
1442
|
return self.source.history_string(self)
|
|
1443
|
+
|
|
1444
|
+
def join(self, fragments):
|
|
1445
|
+
"""
|
|
1446
|
+
Join an iterable of Dseqrecords with this instance as the separator.
|
|
1447
|
+
|
|
1448
|
+
Example:
|
|
1449
|
+
|
|
1450
|
+
>>> sep = Dseqrecord("a")
|
|
1451
|
+
>>> joined = sep.join([Dseqrecord("A"), Dseqrecord("B"), Dseqrecord("C")])
|
|
1452
|
+
>>> joined
|
|
1453
|
+
Dseqrecord(-5)
|
|
1454
|
+
>>> joined.seq
|
|
1455
|
+
Dseq(-5)
|
|
1456
|
+
AaBaC
|
|
1457
|
+
TtVtG
|
|
1458
|
+
|
|
1459
|
+
"""
|
|
1460
|
+
it = iter(fragments)
|
|
1461
|
+
try:
|
|
1462
|
+
result = next(it) # first element (no leading separator)
|
|
1463
|
+
except StopIteration:
|
|
1464
|
+
# Empty iterable -> return empty Dseqrecord in analogy with
|
|
1465
|
+
# str.join
|
|
1466
|
+
return Dseqrecord("")
|
|
1467
|
+
|
|
1468
|
+
# Interleave: result = first + sep + x + sep + y + ...
|
|
1469
|
+
for x in it:
|
|
1470
|
+
result = result + self + x
|
|
1471
|
+
return result
|