pydna 5.5.4__py3-none-any.whl → 5.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +30 -195
- pydna/_pretty.py +8 -8
- pydna/_thermodynamic_data.py +3 -3
- pydna/all.py +1 -12
- pydna/alphabet.py +995 -0
- pydna/amplicon.py +19 -24
- pydna/amplify.py +75 -95
- pydna/assembly.py +64 -81
- pydna/assembly2.py +375 -310
- pydna/codon.py +4 -4
- pydna/common_sub_strings.py +6 -8
- pydna/contig.py +203 -10
- pydna/design.py +176 -60
- pydna/dseq.py +1788 -718
- pydna/dseqrecord.py +197 -179
- pydna/gateway.py +6 -6
- pydna/gel.py +5 -5
- pydna/genbank.py +43 -46
- pydna/genbankfixer.py +89 -92
- pydna/ladders.py +11 -12
- pydna/oligonucleotide_hybridization.py +124 -0
- pydna/opencloning_models.py +187 -60
- pydna/parsers.py +45 -32
- pydna/primer.py +4 -4
- pydna/primer_screen.py +833 -0
- pydna/readers.py +14 -9
- pydna/seq.py +137 -47
- pydna/seqrecord.py +54 -62
- pydna/sequence_picker.py +2 -5
- pydna/sequence_regex.py +6 -6
- pydna/tm.py +17 -17
- pydna/types.py +19 -19
- pydna/utils.py +97 -75
- {pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/METADATA +8 -8
- pydna-5.5.6.dist-info/RECORD +42 -0
- {pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/WHEEL +1 -1
- pydna/conftest.py +0 -42
- pydna/download.py +0 -32
- pydna/genbankfile.py +0 -42
- pydna/genbankrecord.py +0 -168
- pydna/goldengate.py +0 -45
- pydna/ligate.py +0 -62
- pydna/user_cloning.py +0 -29
- pydna-5.5.4.dist-info/RECORD +0 -46
- {pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/licenses/LICENSE.txt +0 -0
pydna/dseqrecord.py
CHANGED
|
@@ -11,30 +11,28 @@ Seq and SeqRecord classes, respectively.
|
|
|
11
11
|
|
|
12
12
|
The Dseq and Dseqrecord classes support the notion of circular and linear DNA topology.
|
|
13
13
|
"""
|
|
14
|
-
from Bio.Restriction import RestrictionBatch
|
|
14
|
+
from Bio.Restriction import RestrictionBatch
|
|
15
15
|
from Bio.Restriction import CommOnly
|
|
16
|
-
from pydna.dseq import Dseq
|
|
17
|
-
from pydna._pretty import pretty_str
|
|
18
|
-
from pydna.utils import flatten
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
from pydna.utils import
|
|
22
|
-
from pydna.
|
|
23
|
-
from
|
|
24
|
-
from pydna.common_sub_strings import common_sub_strings as _common_sub_strings
|
|
25
|
-
from Bio.SeqFeature import SeqFeature as _SeqFeature
|
|
16
|
+
from pydna.dseq import Dseq
|
|
17
|
+
from pydna._pretty import pretty_str
|
|
18
|
+
from pydna.utils import flatten, location_boundaries
|
|
19
|
+
|
|
20
|
+
from pydna.utils import shift_location
|
|
21
|
+
from pydna.utils import shift_feature
|
|
22
|
+
from pydna.common_sub_strings import common_sub_strings
|
|
23
|
+
from Bio.SeqFeature import SeqFeature
|
|
26
24
|
from Bio import SeqIO
|
|
27
|
-
from Bio.SeqFeature import CompoundLocation
|
|
28
|
-
from Bio.SeqFeature import SimpleLocation
|
|
29
|
-
from pydna.seqrecord import SeqRecord
|
|
30
|
-
from Bio.Seq import translate
|
|
31
|
-
from
|
|
32
|
-
import copy
|
|
33
|
-
import operator
|
|
34
|
-
import os
|
|
35
|
-
import re
|
|
36
|
-
import time
|
|
37
|
-
import datetime
|
|
25
|
+
from Bio.SeqFeature import CompoundLocation
|
|
26
|
+
from Bio.SeqFeature import SimpleLocation
|
|
27
|
+
from pydna.seqrecord import SeqRecord
|
|
28
|
+
from Bio.Seq import translate
|
|
29
|
+
from Bio.Seq import Seq as BPSeq
|
|
30
|
+
import copy
|
|
31
|
+
import operator
|
|
32
|
+
import os
|
|
33
|
+
import re
|
|
34
|
+
import time
|
|
35
|
+
import datetime
|
|
38
36
|
from typing import Union, TYPE_CHECKING
|
|
39
37
|
from pydna.opencloning_models import SequenceCutSource
|
|
40
38
|
|
|
@@ -42,20 +40,15 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
42
40
|
from pydna.opencloning_models import Source
|
|
43
41
|
|
|
44
42
|
|
|
45
|
-
# import logging as _logging
|
|
46
|
-
|
|
47
|
-
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
48
|
-
|
|
49
|
-
|
|
50
43
|
try:
|
|
51
|
-
from IPython.display import display_html
|
|
44
|
+
from IPython.display import display_html
|
|
52
45
|
except ImportError:
|
|
53
46
|
|
|
54
|
-
def
|
|
47
|
+
def display_html(item, raw=None):
|
|
55
48
|
return item
|
|
56
49
|
|
|
57
50
|
|
|
58
|
-
class Dseqrecord(
|
|
51
|
+
class Dseqrecord(SeqRecord):
|
|
59
52
|
"""Dseqrecord is a double stranded version of the Biopython SeqRecord [#]_ class.
|
|
60
53
|
The Dseqrecord object holds a Dseq object describing the sequence.
|
|
61
54
|
Additionally, Dseqrecord hold meta information about the sequence in the
|
|
@@ -132,7 +125,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
132
125
|
|
|
133
126
|
"""
|
|
134
127
|
|
|
135
|
-
seq:
|
|
128
|
+
seq: Dseq
|
|
136
129
|
source: Union["Source", None] = None
|
|
137
130
|
|
|
138
131
|
def __init__(
|
|
@@ -144,15 +137,12 @@ class Dseqrecord(_SeqRecord):
|
|
|
144
137
|
source=None,
|
|
145
138
|
**kwargs,
|
|
146
139
|
):
|
|
147
|
-
# _module_logger.info("### Dseqrecord initialized ###")
|
|
148
|
-
# _module_logger.info("argument circular = %s", circular)
|
|
149
|
-
# _module_logger.info("circular = %s", circular)
|
|
150
140
|
|
|
151
141
|
if isinstance(record, str):
|
|
152
|
-
|
|
142
|
+
|
|
153
143
|
super().__init__(
|
|
154
|
-
|
|
155
|
-
record,
|
|
144
|
+
Dseq.quick(
|
|
145
|
+
record.encode("ascii"),
|
|
156
146
|
# linear=linear,
|
|
157
147
|
circular=bool(circular),
|
|
158
148
|
),
|
|
@@ -166,14 +156,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
166
156
|
record = record[:]
|
|
167
157
|
elif circular is True:
|
|
168
158
|
record = record.looped()
|
|
169
|
-
|
|
159
|
+
|
|
170
160
|
super().__init__(record, *args, **kwargs)
|
|
171
161
|
|
|
172
162
|
# record is a Bio.Seq object ?
|
|
173
163
|
elif hasattr(record, "transcribe"):
|
|
174
|
-
|
|
164
|
+
|
|
175
165
|
super().__init__(
|
|
176
|
-
|
|
166
|
+
Dseq(
|
|
177
167
|
str(record),
|
|
178
168
|
# linear=linear,
|
|
179
169
|
circular=bool(circular),
|
|
@@ -184,13 +174,13 @@ class Dseqrecord(_SeqRecord):
|
|
|
184
174
|
|
|
185
175
|
# record is a Bio.SeqRecord or Dseqrecord object ?
|
|
186
176
|
elif hasattr(record, "features"):
|
|
187
|
-
|
|
177
|
+
|
|
188
178
|
for key, value in list(record.__dict__.items()):
|
|
189
179
|
setattr(self, key, value)
|
|
190
180
|
self.letter_annotations = {}
|
|
191
181
|
# record.seq is a Dseq object ?
|
|
192
182
|
if hasattr(record.seq, "watson"):
|
|
193
|
-
new_seq =
|
|
183
|
+
new_seq = copy.copy(record.seq)
|
|
194
184
|
if circular is False:
|
|
195
185
|
new_seq = new_seq[:]
|
|
196
186
|
elif circular is True:
|
|
@@ -198,7 +188,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
198
188
|
self.seq = new_seq
|
|
199
189
|
# record.seq is Bio.SeqRecord object ?
|
|
200
190
|
else:
|
|
201
|
-
self.seq =
|
|
191
|
+
self.seq = Dseq(
|
|
202
192
|
str(record.seq),
|
|
203
193
|
# linear=linear,
|
|
204
194
|
circular=bool(circular),
|
|
@@ -226,16 +216,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
226
216
|
# linear=True, circular=False, n = 5E-14, **kwargs):
|
|
227
217
|
obj = cls.__new__(cls) # Does not call __init__
|
|
228
218
|
obj._per_letter_annotations = {}
|
|
229
|
-
obj.seq =
|
|
230
|
-
record,
|
|
231
|
-
_rc(record),
|
|
232
|
-
ovhg=0,
|
|
219
|
+
obj.seq = Dseq.quick(
|
|
220
|
+
record.encode("ascii"),
|
|
233
221
|
# linear=linear,
|
|
234
222
|
circular=circular,
|
|
235
223
|
)
|
|
236
|
-
obj.id =
|
|
237
|
-
obj.name =
|
|
238
|
-
obj.description =
|
|
224
|
+
obj.id = pretty_str("id")
|
|
225
|
+
obj.name = pretty_str("name")
|
|
226
|
+
obj.description = pretty_str("description")
|
|
239
227
|
obj.dbxrefs = []
|
|
240
228
|
obj.annotations = {"molecule_type": "DNA"}
|
|
241
229
|
obj.features = []
|
|
@@ -247,7 +235,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
247
235
|
@classmethod
|
|
248
236
|
def from_SeqRecord(
|
|
249
237
|
cls,
|
|
250
|
-
record:
|
|
238
|
+
record: SeqRecord,
|
|
251
239
|
*args,
|
|
252
240
|
circular=None,
|
|
253
241
|
n=5e-14,
|
|
@@ -267,9 +255,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
267
255
|
obj.source = None
|
|
268
256
|
if circular is None:
|
|
269
257
|
circular = record.annotations.get("topology") == "circular"
|
|
270
|
-
obj.seq =
|
|
271
|
-
str(record.seq), _rc(str(record.seq)), ovhg=0, circular=circular
|
|
272
|
-
)
|
|
258
|
+
obj.seq = Dseq.quick(record.seq._data, ovhg=0, circular=circular)
|
|
273
259
|
return obj
|
|
274
260
|
|
|
275
261
|
@property
|
|
@@ -339,14 +325,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
339
325
|
qualifiers = {}
|
|
340
326
|
qualifiers.update(kwargs)
|
|
341
327
|
|
|
342
|
-
location =
|
|
328
|
+
location = CompoundLocation(
|
|
343
329
|
(
|
|
344
|
-
|
|
345
|
-
|
|
330
|
+
SimpleLocation(x, len(self.seq), strand=strand),
|
|
331
|
+
SimpleLocation(0, y, strand=strand),
|
|
346
332
|
)
|
|
347
333
|
)
|
|
348
334
|
|
|
349
|
-
sf =
|
|
335
|
+
sf = SeqFeature(location, type=type_, qualifiers=qualifiers)
|
|
350
336
|
|
|
351
337
|
if "label" not in qualifiers:
|
|
352
338
|
qualifiers["label"] = [f"ft{len(location)}"]
|
|
@@ -395,35 +381,31 @@ class Dseqrecord(_SeqRecord):
|
|
|
395
381
|
--------
|
|
396
382
|
pydna.dseq.Dseq.looped
|
|
397
383
|
"""
|
|
398
|
-
new =
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
)
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
fn.location = _CompoundLocation([loc1, loc2])
|
|
424
|
-
|
|
425
|
-
fn.qualifiers = fo.qualifiers
|
|
426
|
-
|
|
384
|
+
new = copy.deepcopy(self)
|
|
385
|
+
new.seq = self.seq.looped()
|
|
386
|
+
|
|
387
|
+
old_length = len(self) # Possibly longer, including sticky ends if any.
|
|
388
|
+
new_length = len(new) # Possibly shorter, with blunt ends.
|
|
389
|
+
if old_length != new_length: # Only False if self was blunt.
|
|
390
|
+
new_features = []
|
|
391
|
+
for fn in new.features:
|
|
392
|
+
if len(fn.location) > new_length:
|
|
393
|
+
# Edge case: if the feature is longer than the sequence, it should be
|
|
394
|
+
# dropped. This can happen in a sequence with overhangs, where the feature
|
|
395
|
+
# spans both overhangs.
|
|
396
|
+
#
|
|
397
|
+
# Example:
|
|
398
|
+
# feature
|
|
399
|
+
# <------>
|
|
400
|
+
# aaACGT
|
|
401
|
+
# TGCAtt
|
|
402
|
+
#
|
|
403
|
+
# Circular sequence ACGTtt should not have that feature, so we drop it
|
|
404
|
+
continue
|
|
405
|
+
fn.location = shift_location(fn.location, 0, new_length)
|
|
406
|
+
new_features.append(fn)
|
|
407
|
+
|
|
408
|
+
new.features = new_features
|
|
427
409
|
return new
|
|
428
410
|
|
|
429
411
|
def tolinear(self): # pragma: no cover
|
|
@@ -445,16 +427,16 @@ class Dseqrecord(_SeqRecord):
|
|
|
445
427
|
>>>
|
|
446
428
|
|
|
447
429
|
"""
|
|
448
|
-
import warnings
|
|
430
|
+
import warnings
|
|
449
431
|
from pydna import _PydnaDeprecationWarning
|
|
450
432
|
|
|
451
|
-
|
|
433
|
+
warnings.warn(
|
|
452
434
|
"tolinear method is obsolete; "
|
|
453
435
|
"please use obj[:] "
|
|
454
436
|
"instead of obj.tolinear().",
|
|
455
437
|
_PydnaDeprecationWarning,
|
|
456
438
|
)
|
|
457
|
-
new =
|
|
439
|
+
new = copy.copy(self)
|
|
458
440
|
for key, value in list(self.__dict__.items()):
|
|
459
441
|
setattr(new, key, value)
|
|
460
442
|
# new._seq = self.seq.tolinear()
|
|
@@ -465,15 +447,29 @@ class Dseqrecord(_SeqRecord):
|
|
|
465
447
|
|
|
466
448
|
def terminal_transferase(self, nucleotides="a"):
|
|
467
449
|
"""docstring."""
|
|
468
|
-
newseq =
|
|
450
|
+
newseq = copy.deepcopy(self)
|
|
469
451
|
newseq.seq = self.seq.terminal_transferase(nucleotides)
|
|
470
452
|
for feature in newseq.features:
|
|
471
453
|
feature.location += len(nucleotides)
|
|
472
454
|
return newseq
|
|
473
455
|
|
|
474
|
-
def format(self,
|
|
456
|
+
def format(self, format: str = "gb"):
|
|
475
457
|
"""Returns the sequence as a string using a format supported by Biopython
|
|
476
458
|
SeqIO [#]_. Default is "gb" which is short for Genbank.
|
|
459
|
+
Allowed Formats are for example:
|
|
460
|
+
|
|
461
|
+
* "fasta": The standard FASTA format.
|
|
462
|
+
* "fasta-2line": No line wrapping and exactly two lines per record.
|
|
463
|
+
* "genbank" (or "gb"): The GenBank flat file format.
|
|
464
|
+
* "embl": The EMBL flat file format.
|
|
465
|
+
* "imgt": The IMGT variant of the EMBL format.
|
|
466
|
+
|
|
467
|
+
The format string can be modified with the keyword "dscode" if
|
|
468
|
+
the underlying dscode string is desired in the output. for example:
|
|
469
|
+
::
|
|
470
|
+
|
|
471
|
+
Dseqrecord("PEXIGATCQFZJ").format("fasta-2line dscode")
|
|
472
|
+
|
|
477
473
|
|
|
478
474
|
Examples
|
|
479
475
|
--------
|
|
@@ -495,6 +491,12 @@ class Dseqrecord(_SeqRecord):
|
|
|
495
491
|
ORIGIN
|
|
496
492
|
1 aaa
|
|
497
493
|
//
|
|
494
|
+
>>> print(Dseqrecord("PEXIGATCQFZJ").format("fasta-2line"))
|
|
495
|
+
>id description
|
|
496
|
+
GATCGATCGATC
|
|
497
|
+
>>> print(Dseqrecord("PEXIGATCQFZJ").format("fasta-2line dscode"))
|
|
498
|
+
>id description
|
|
499
|
+
PEXIGATCQFZJ
|
|
498
500
|
|
|
499
501
|
|
|
500
502
|
References
|
|
@@ -504,13 +506,19 @@ class Dseqrecord(_SeqRecord):
|
|
|
504
506
|
|
|
505
507
|
|
|
506
508
|
"""
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
509
|
+
record = copy.deepcopy(self)
|
|
510
|
+
if "dscode" in format:
|
|
511
|
+
format = format.replace("dscode", "")
|
|
512
|
+
obj = BPSeq("")
|
|
513
|
+
obj._data = record.seq._data
|
|
514
|
+
record.seq = obj
|
|
515
|
+
format = format.strip(" -")
|
|
516
|
+
if format in ("genbank", "gb") and self.circular:
|
|
510
517
|
record.annotations["topology"] = "circular"
|
|
511
518
|
else:
|
|
512
519
|
record.annotations["topology"] = "linear"
|
|
513
|
-
|
|
520
|
+
|
|
521
|
+
return SeqRecord.format(record, format).strip()
|
|
514
522
|
|
|
515
523
|
def write(self, filename=None, f="gb"):
|
|
516
524
|
"""Writes the Dseqrecord to a file using the format f, which must
|
|
@@ -543,9 +551,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
543
551
|
# generate a name if no name was given
|
|
544
552
|
# if not isinstance(filename, str): # is filename a string???
|
|
545
553
|
# raise ValueError("filename has to be a string, got", type(filename))
|
|
546
|
-
name, ext =
|
|
554
|
+
name, ext = os.path.splitext(filename)
|
|
547
555
|
msg = f"<font face=monospace><a href='{filename}' target='_blank'>{filename}</a></font><br>"
|
|
548
|
-
if not
|
|
556
|
+
if not os.path.isfile(filename):
|
|
549
557
|
with open(filename, "w", encoding="utf8") as fp:
|
|
550
558
|
fp.write(self.format(f))
|
|
551
559
|
else:
|
|
@@ -556,16 +564,16 @@ class Dseqrecord(_SeqRecord):
|
|
|
556
564
|
if self.seq != old_file.seq:
|
|
557
565
|
# If new sequence is different, the old file is
|
|
558
566
|
# renamed with "_OLD_" suffix:
|
|
559
|
-
oldmtime =
|
|
560
|
-
|
|
567
|
+
oldmtime = datetime.datetime.fromtimestamp(
|
|
568
|
+
os.path.getmtime(filename)
|
|
561
569
|
).isoformat()
|
|
562
|
-
tstmp = int(
|
|
570
|
+
tstmp = int(time.time() * 1_000_000)
|
|
563
571
|
old_filename = f"{name}_OLD_{tstmp}{ext}"
|
|
564
|
-
|
|
572
|
+
os.rename(filename, old_filename)
|
|
565
573
|
with open(filename, "w", encoding="utf8") as fp:
|
|
566
574
|
fp.write(self.format(f))
|
|
567
|
-
newmtime =
|
|
568
|
-
|
|
575
|
+
newmtime = datetime.datetime.fromtimestamp(
|
|
576
|
+
os.path.getmtime(filename)
|
|
569
577
|
).isoformat()
|
|
570
578
|
msg = f"""
|
|
571
579
|
<table style="padding:10px 10px;
|
|
@@ -611,8 +619,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
611
619
|
elif "seguid" in old_file.annotations.get("comment", ""):
|
|
612
620
|
pattern = r"(ldseguid|cdseguid)-(\S{27})(_[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}){0,1}"
|
|
613
621
|
# seguid=NNNNNNNNNNNNNNNNNNNNNNNNNNN_2020-10-10T11:11:11.111111
|
|
614
|
-
oldstamp =
|
|
615
|
-
newstamp =
|
|
622
|
+
oldstamp = re.search(pattern, old_file.description)
|
|
623
|
+
newstamp = re.search(pattern, self.description)
|
|
616
624
|
newdescription = self.description
|
|
617
625
|
if oldstamp and newstamp:
|
|
618
626
|
if oldstamp.group(0)[:35] == newstamp.group(0)[:35]:
|
|
@@ -621,7 +629,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
621
629
|
)
|
|
622
630
|
elif oldstamp:
|
|
623
631
|
newdescription += " " + oldstamp.group(0)
|
|
624
|
-
newobj =
|
|
632
|
+
newobj = copy.copy(self)
|
|
625
633
|
newobj.description = newdescription
|
|
626
634
|
|
|
627
635
|
with open(filename, "w", encoding="utf8") as fp:
|
|
@@ -629,7 +637,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
629
637
|
else:
|
|
630
638
|
with open(filename, "w", encoding="utf8") as fp:
|
|
631
639
|
fp.write(self.format(f))
|
|
632
|
-
return
|
|
640
|
+
return display_html(msg, raw=True)
|
|
633
641
|
|
|
634
642
|
def find(self, other):
|
|
635
643
|
# TODO allow strings, seqs, seqrecords or Dseqrecords
|
|
@@ -647,7 +655,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
647
655
|
def __str__(self):
|
|
648
656
|
return ("Dseqrecord\n" "circular: {}\n" "size: {}\n").format(
|
|
649
657
|
self.circular, len(self)
|
|
650
|
-
) +
|
|
658
|
+
) + SeqRecord.__str__(self)
|
|
651
659
|
|
|
652
660
|
def __contains__(self, other):
|
|
653
661
|
if other.lower() in str(self.seq).lower():
|
|
@@ -658,7 +666,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
658
666
|
spc = 3 - ln % 3 if ln % 3 else 0
|
|
659
667
|
s = "n" * spc + s + "nnn"
|
|
660
668
|
for frame in range(3):
|
|
661
|
-
if other.lower() in
|
|
669
|
+
if other.lower() in translate(s[frame : frame + spc + ln]).lower():
|
|
662
670
|
return True
|
|
663
671
|
return False
|
|
664
672
|
|
|
@@ -667,13 +675,13 @@ class Dseqrecord(_SeqRecord):
|
|
|
667
675
|
>>> from pydna.dseqrecord import Dseqrecord
|
|
668
676
|
>>> s=Dseqrecord("atgtacgatcgtatgctggttatattttag")
|
|
669
677
|
>>> s.seq.translate()
|
|
670
|
-
|
|
678
|
+
ProteinSeq('MYDRMLVIF*')
|
|
671
679
|
>>> "RML" in s
|
|
672
680
|
True
|
|
673
681
|
>>> "MMM" in s
|
|
674
682
|
False
|
|
675
683
|
>>> s.seq.rc().translate()
|
|
676
|
-
|
|
684
|
+
ProteinSeq('LKYNQHTIVH')
|
|
677
685
|
>>> "QHT" in s.rc()
|
|
678
686
|
True
|
|
679
687
|
>>> "QHT" in s
|
|
@@ -689,7 +697,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
689
697
|
cgtatgctg
|
|
690
698
|
gcatacgac
|
|
691
699
|
>>> code.translate()
|
|
692
|
-
|
|
700
|
+
ProteinSeq('RML')
|
|
693
701
|
"""
|
|
694
702
|
other = str(other).lower()
|
|
695
703
|
assert self.seq.watson == "".join(self.seq.watson.split())
|
|
@@ -700,7 +708,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
700
708
|
start = None
|
|
701
709
|
for frame in range(3):
|
|
702
710
|
try:
|
|
703
|
-
start =
|
|
711
|
+
start = translate(s[frame : frame + ln + spc]).lower().index(other)
|
|
704
712
|
break
|
|
705
713
|
except ValueError:
|
|
706
714
|
pass
|
|
@@ -748,7 +756,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
748
756
|
matching_reads = []
|
|
749
757
|
|
|
750
758
|
for read_ in reads:
|
|
751
|
-
matches =
|
|
759
|
+
matches = common_sub_strings(str(self.seq).lower(), str(read_.seq), limit)
|
|
752
760
|
|
|
753
761
|
if not matches:
|
|
754
762
|
continue
|
|
@@ -769,14 +777,14 @@ class Dseqrecord(_SeqRecord):
|
|
|
769
777
|
if len(newmatches) > 1:
|
|
770
778
|
ms = []
|
|
771
779
|
for m in newmatches:
|
|
772
|
-
ms.append(
|
|
773
|
-
loc =
|
|
780
|
+
ms.append(SimpleLocation(m[0], m[0] + m[2]))
|
|
781
|
+
loc = CompoundLocation(ms)
|
|
774
782
|
else:
|
|
775
783
|
a, b, c = newmatches[0]
|
|
776
|
-
loc =
|
|
784
|
+
loc = SimpleLocation(a, a + c)
|
|
777
785
|
|
|
778
786
|
self.features.append(
|
|
779
|
-
|
|
787
|
+
SeqFeature(
|
|
780
788
|
loc,
|
|
781
789
|
qualifiers={"label": [read_.annotations["filename"]]},
|
|
782
790
|
type="trace",
|
|
@@ -786,9 +794,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
786
794
|
return [x.annotations["filename"] for x in matching_reads]
|
|
787
795
|
|
|
788
796
|
def __repr__(self):
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
)
|
|
797
|
+
top = {True: "-", False: "o"}[not self.circular]
|
|
798
|
+
return f"{self.__class__.__name__}({top}{len(self)})"
|
|
792
799
|
|
|
793
800
|
def _repr_pretty_(self, p, cycle):
|
|
794
801
|
p.text(
|
|
@@ -799,7 +806,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
799
806
|
|
|
800
807
|
def __add__(self, other):
|
|
801
808
|
if hasattr(other, "seq") and hasattr(other.seq, "watson"):
|
|
802
|
-
other =
|
|
809
|
+
other = copy.deepcopy(other)
|
|
803
810
|
other_five_prime = other.seq.five_prime_end()
|
|
804
811
|
if other_five_prime[0] == "5'":
|
|
805
812
|
# add other.seq.ovhg
|
|
@@ -810,10 +817,10 @@ class Dseqrecord(_SeqRecord):
|
|
|
810
817
|
for f in other.features:
|
|
811
818
|
f.location = f.location + (-other.seq.ovhg)
|
|
812
819
|
|
|
813
|
-
answer = Dseqrecord(
|
|
820
|
+
answer = Dseqrecord(SeqRecord.__add__(self, other))
|
|
814
821
|
answer.n = min(self.n, other.n)
|
|
815
822
|
else:
|
|
816
|
-
answer = Dseqrecord(
|
|
823
|
+
answer = Dseqrecord(SeqRecord.__add__(self, Dseqrecord(other)))
|
|
817
824
|
answer.n = self.n
|
|
818
825
|
return answer
|
|
819
826
|
|
|
@@ -827,7 +834,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
827
834
|
if self.circular:
|
|
828
835
|
raise TypeError("TypeError: can't multiply circular Dseqrecord.")
|
|
829
836
|
if number > 0:
|
|
830
|
-
new =
|
|
837
|
+
new = copy.deepcopy(self)
|
|
831
838
|
for i in range(1, number):
|
|
832
839
|
new += self
|
|
833
840
|
new._per_letter_annotations = self._per_letter_annotations
|
|
@@ -837,7 +844,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
837
844
|
|
|
838
845
|
def __getitem__(self, sl):
|
|
839
846
|
"""docstring."""
|
|
840
|
-
answer = Dseqrecord(
|
|
847
|
+
answer = Dseqrecord(copy.copy(self))
|
|
841
848
|
answer.seq = self.seq.__getitem__(sl)
|
|
842
849
|
# answer.seq.alphabet = self.seq.alphabet
|
|
843
850
|
# breakpoint()
|
|
@@ -859,9 +866,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
859
866
|
f
|
|
860
867
|
for f in answer.features
|
|
861
868
|
if (
|
|
862
|
-
|
|
863
|
-
and
|
|
864
|
-
<
|
|
869
|
+
location_boundaries(f.location)[1] <= len(answer.seq)
|
|
870
|
+
and location_boundaries(f.location)[0]
|
|
871
|
+
< location_boundaries(f.location)[1]
|
|
865
872
|
)
|
|
866
873
|
]
|
|
867
874
|
|
|
@@ -870,15 +877,6 @@ class Dseqrecord(_SeqRecord):
|
|
|
870
877
|
return self.apply_cut(cut, cut)
|
|
871
878
|
else:
|
|
872
879
|
answer = Dseqrecord("")
|
|
873
|
-
identifier = "part_{id}".format(id=self.id)
|
|
874
|
-
if answer.features:
|
|
875
|
-
sf = max(answer.features, key=len) # default
|
|
876
|
-
if "label" in sf.qualifiers:
|
|
877
|
-
identifier = " ".join(sf.qualifiers["label"])
|
|
878
|
-
elif "note" in sf.qualifiers:
|
|
879
|
-
identifier = " ".join(sf.qualifiers["note"])
|
|
880
|
-
answer.id = _identifier_from_string(identifier)[:16]
|
|
881
|
-
answer.name = _identifier_from_string("part_{name}".format(name=self.name))[:16]
|
|
882
880
|
return answer
|
|
883
881
|
|
|
884
882
|
def __eq__(self, other):
|
|
@@ -920,43 +918,34 @@ class Dseqrecord(_SeqRecord):
|
|
|
920
918
|
answer.name = answer.id[:16]
|
|
921
919
|
return fragments[0]
|
|
922
920
|
|
|
923
|
-
def no_cutters(self, batch:
|
|
921
|
+
def no_cutters(self, batch: RestrictionBatch = None):
|
|
924
922
|
"""docstring."""
|
|
925
923
|
return self.seq.no_cutters(batch=batch or CommOnly)
|
|
926
924
|
|
|
927
|
-
def unique_cutters(self, batch:
|
|
925
|
+
def unique_cutters(self, batch: RestrictionBatch = None):
|
|
928
926
|
"""docstring."""
|
|
929
927
|
return self.seq.unique_cutters(batch=batch or CommOnly)
|
|
930
928
|
|
|
931
|
-
def once_cutters(self, batch:
|
|
929
|
+
def once_cutters(self, batch: RestrictionBatch = None):
|
|
932
930
|
"""docstring."""
|
|
933
931
|
return self.seq.once_cutters(batch=batch or CommOnly)
|
|
934
932
|
|
|
935
|
-
def twice_cutters(self, batch:
|
|
933
|
+
def twice_cutters(self, batch: RestrictionBatch = None):
|
|
936
934
|
"""docstring."""
|
|
937
935
|
return self.seq.twice_cutters(batch=batch or CommOnly)
|
|
938
936
|
|
|
939
|
-
def n_cutters(self, n=3, batch:
|
|
937
|
+
def n_cutters(self, n=3, batch: RestrictionBatch = None):
|
|
940
938
|
"""docstring."""
|
|
941
939
|
return self.seq.n_cutters(n=n, batch=batch or CommOnly)
|
|
942
940
|
|
|
943
|
-
def cutters(self, batch:
|
|
941
|
+
def cutters(self, batch: RestrictionBatch = None):
|
|
944
942
|
"""docstring."""
|
|
945
943
|
return self.seq.cutters(batch=batch or CommOnly)
|
|
946
944
|
|
|
947
945
|
def number_of_cuts(self, *enzymes):
|
|
948
946
|
"""The number of cuts by digestion with the Restriction enzymes
|
|
949
947
|
contained in the iterable."""
|
|
950
|
-
return sum([len(enzyme.search(self.seq)) for enzyme in
|
|
951
|
-
|
|
952
|
-
def cas9(self, RNA: str):
|
|
953
|
-
"""docstring."""
|
|
954
|
-
fragments = []
|
|
955
|
-
result = []
|
|
956
|
-
for target in (self.seq, self.seq.rc()):
|
|
957
|
-
fragments = [self[sl.start : sl.stop] for sl in target.cas9(RNA)]
|
|
958
|
-
result.append(fragments)
|
|
959
|
-
return result
|
|
948
|
+
return sum([len(enzyme.search(self.seq)) for enzyme in flatten(enzymes)])
|
|
960
949
|
|
|
961
950
|
def reverse_complement(self):
|
|
962
951
|
"""Reverse complement.
|
|
@@ -1033,7 +1022,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1033
1022
|
if not self.circular:
|
|
1034
1023
|
raise TypeError("Only circular DNA can be synced!")
|
|
1035
1024
|
|
|
1036
|
-
newseq =
|
|
1025
|
+
newseq = copy.copy(self)
|
|
1037
1026
|
|
|
1038
1027
|
s = str(self.seq.watson).lower()
|
|
1039
1028
|
s_rc = str(self.seq.crick).lower()
|
|
@@ -1049,8 +1038,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
1049
1038
|
|
|
1050
1039
|
lim = min(limit, limit * (len(s) // limit) + 1)
|
|
1051
1040
|
|
|
1052
|
-
c =
|
|
1053
|
-
d =
|
|
1041
|
+
c = common_sub_strings(s + s, r, limit=lim)
|
|
1042
|
+
d = common_sub_strings(s_rc + s_rc, r, limit=lim)
|
|
1054
1043
|
|
|
1055
1044
|
c = [(x[0], x[2]) for x in c if x[1] == 0]
|
|
1056
1045
|
d = [(x[0], x[2]) for x in d if x[1] == 0]
|
|
@@ -1076,7 +1065,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1076
1065
|
result = newseq
|
|
1077
1066
|
else:
|
|
1078
1067
|
result = newseq.shifted(start)
|
|
1079
|
-
|
|
1068
|
+
|
|
1080
1069
|
return result
|
|
1081
1070
|
|
|
1082
1071
|
def upper(self):
|
|
@@ -1105,7 +1094,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1105
1094
|
--------
|
|
1106
1095
|
pydna.dseqrecord.Dseqrecord.lower"""
|
|
1107
1096
|
|
|
1108
|
-
upper =
|
|
1097
|
+
upper = copy.deepcopy(self)
|
|
1109
1098
|
# This is because the @seq.setter methods otherwise sets the _per_letter_annotations to an empty dict
|
|
1110
1099
|
prev_per_letter_annotation = upper._per_letter_annotations
|
|
1111
1100
|
upper.seq = upper.seq.upper()
|
|
@@ -1139,7 +1128,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1139
1128
|
pydna.dseqrecord.Dseqrecord.upper
|
|
1140
1129
|
|
|
1141
1130
|
"""
|
|
1142
|
-
lower =
|
|
1131
|
+
lower = copy.deepcopy(self)
|
|
1143
1132
|
prev_per_letter_annotation = lower._per_letter_annotations
|
|
1144
1133
|
lower.seq = lower.seq.lower()
|
|
1145
1134
|
lower._per_letter_annotations = prev_per_letter_annotation
|
|
@@ -1157,8 +1146,8 @@ class Dseqrecord(_SeqRecord):
|
|
|
1157
1146
|
orf = self[x:y]
|
|
1158
1147
|
prt = orf.translate()
|
|
1159
1148
|
features.append(
|
|
1160
|
-
|
|
1161
|
-
|
|
1149
|
+
SeqFeature(
|
|
1150
|
+
SimpleLocation(x, y, strand=strand),
|
|
1162
1151
|
type="CDS",
|
|
1163
1152
|
qualifiers={
|
|
1164
1153
|
"note": f"{y - x}bp {(y - x) // 3}aa",
|
|
@@ -1196,11 +1185,11 @@ class Dseqrecord(_SeqRecord):
|
|
|
1196
1185
|
if self.features:
|
|
1197
1186
|
f = self.features[feature]
|
|
1198
1187
|
locations = sorted(
|
|
1199
|
-
self.features[feature].location.parts, key=
|
|
1188
|
+
self.features[feature].location.parts, key=SimpleLocation.start.fget
|
|
1200
1189
|
)
|
|
1201
1190
|
strand = f.location.strand
|
|
1202
1191
|
else:
|
|
1203
|
-
locations = [
|
|
1192
|
+
locations = [SimpleLocation(0, 0, 1)]
|
|
1204
1193
|
strand = 1
|
|
1205
1194
|
|
|
1206
1195
|
ovhg = self.seq.ovhg + len(self.seq.watson) - len(self.seq.crick)
|
|
@@ -1231,7 +1220,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1231
1220
|
result += f"{s1}\n{s2}"
|
|
1232
1221
|
else:
|
|
1233
1222
|
result += f"{s2}\n{s1}"
|
|
1234
|
-
return
|
|
1223
|
+
return pretty_str(result)
|
|
1235
1224
|
|
|
1236
1225
|
def shifted(self, shift):
|
|
1237
1226
|
"""Circular Dseqrecord with a new origin <shift>.
|
|
@@ -1284,15 +1273,15 @@ class Dseqrecord(_SeqRecord):
|
|
|
1284
1273
|
)
|
|
1285
1274
|
ln = len(self)
|
|
1286
1275
|
if not shift % ln:
|
|
1287
|
-
return
|
|
1276
|
+
return copy.deepcopy(self) # shift is a multiple of ln or 0
|
|
1288
1277
|
else:
|
|
1289
1278
|
shift %= ln # 0<=shift<=ln
|
|
1290
1279
|
newseq = (self.seq[shift:] + self.seq[:shift]).looped()
|
|
1291
|
-
newfeatures =
|
|
1280
|
+
newfeatures = copy.deepcopy(self.features)
|
|
1292
1281
|
for feature in newfeatures:
|
|
1293
|
-
feature.location =
|
|
1294
|
-
newfeatures.sort(key=
|
|
1295
|
-
answer =
|
|
1282
|
+
feature.location = shift_location(feature.location, -shift, ln)
|
|
1283
|
+
newfeatures.sort(key=operator.attrgetter("location.start"))
|
|
1284
|
+
answer = copy.deepcopy(self)
|
|
1296
1285
|
answer.features = newfeatures
|
|
1297
1286
|
answer.seq = newseq
|
|
1298
1287
|
return answer
|
|
@@ -1346,7 +1335,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1346
1335
|
if left_cut == right_cut:
|
|
1347
1336
|
# Not really a cut, but to handle the general case
|
|
1348
1337
|
if left_cut is None:
|
|
1349
|
-
features =
|
|
1338
|
+
features = copy.deepcopy(self.features)
|
|
1350
1339
|
else:
|
|
1351
1340
|
# The features that span the origin if shifting with left_cut, but that do not cross
|
|
1352
1341
|
# the cut site should be included, and if there is a feature within the cut site, it should
|
|
@@ -1369,7 +1358,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1369
1358
|
initial_shift = left_watson if left_ovhg < 0 else left_crick
|
|
1370
1359
|
features = self.shifted(initial_shift).features
|
|
1371
1360
|
# for f in features:
|
|
1372
|
-
# print(f.id, f.location,
|
|
1361
|
+
# print(f.id, f.location, location_boundaries(f.location))
|
|
1373
1362
|
# Here, we have done what's shown below (* indicates the origin).
|
|
1374
1363
|
# The features 0 and 2 have the right location for the final product:
|
|
1375
1364
|
#
|
|
@@ -1383,10 +1372,10 @@ class Dseqrecord(_SeqRecord):
|
|
|
1383
1372
|
features_need_transfer = [
|
|
1384
1373
|
f
|
|
1385
1374
|
for f in features
|
|
1386
|
-
if (
|
|
1375
|
+
if (location_boundaries(f.location)[1] <= abs(left_ovhg))
|
|
1387
1376
|
]
|
|
1388
1377
|
features_need_transfer = [
|
|
1389
|
-
|
|
1378
|
+
shift_feature(f, -abs(left_ovhg), len(self))
|
|
1390
1379
|
for f in features_need_transfer
|
|
1391
1380
|
]
|
|
1392
1381
|
|
|
@@ -1403,7 +1392,7 @@ class Dseqrecord(_SeqRecord):
|
|
|
1403
1392
|
# as the original one. However, the final product is longer because of the overhang.
|
|
1404
1393
|
|
|
1405
1394
|
features += [
|
|
1406
|
-
|
|
1395
|
+
shift_feature(f, abs(left_ovhg), len(dseq))
|
|
1407
1396
|
for f in features_need_transfer
|
|
1408
1397
|
]
|
|
1409
1398
|
# ^ ^^^^^^^^^
|
|
@@ -1415,9 +1404,9 @@ class Dseqrecord(_SeqRecord):
|
|
|
1415
1404
|
f
|
|
1416
1405
|
for f in features
|
|
1417
1406
|
if (
|
|
1418
|
-
|
|
1419
|
-
and
|
|
1420
|
-
<=
|
|
1407
|
+
location_boundaries(f.location)[1] <= len(dseq)
|
|
1408
|
+
and location_boundaries(f.location)[0]
|
|
1409
|
+
<= location_boundaries(f.location)[1]
|
|
1421
1410
|
)
|
|
1422
1411
|
]
|
|
1423
1412
|
else:
|
|
@@ -1468,3 +1457,32 @@ class Dseqrecord(_SeqRecord):
|
|
|
1468
1457
|
if self.source is None:
|
|
1469
1458
|
return ""
|
|
1470
1459
|
return self.source.history_string(self)
|
|
1460
|
+
|
|
1461
|
+
def join(self, fragments):
|
|
1462
|
+
"""
|
|
1463
|
+
Join an iterable of Dseqrecords with this instance as the separator.
|
|
1464
|
+
|
|
1465
|
+
Example:
|
|
1466
|
+
|
|
1467
|
+
>>> sep = Dseqrecord("a")
|
|
1468
|
+
>>> joined = sep.join([Dseqrecord("A"), Dseqrecord("B"), Dseqrecord("C")])
|
|
1469
|
+
>>> joined
|
|
1470
|
+
Dseqrecord(-5)
|
|
1471
|
+
>>> joined.seq
|
|
1472
|
+
Dseq(-5)
|
|
1473
|
+
AaBaC
|
|
1474
|
+
TtVtG
|
|
1475
|
+
|
|
1476
|
+
"""
|
|
1477
|
+
it = iter(fragments)
|
|
1478
|
+
try:
|
|
1479
|
+
result = next(it) # first element (no leading separator)
|
|
1480
|
+
except StopIteration:
|
|
1481
|
+
# Empty iterable -> return empty Dseqrecord in analogy with
|
|
1482
|
+
# str.join
|
|
1483
|
+
return Dseqrecord("")
|
|
1484
|
+
|
|
1485
|
+
# Interleave: result = first + sep + x + sep + y + ...
|
|
1486
|
+
for x in it:
|
|
1487
|
+
result = result + self + x
|
|
1488
|
+
return result
|