pydna 5.5.5__tar.gz → 5.5.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pydna-5.5.5 → pydna-5.5.7}/PKG-INFO +2 -2
- {pydna-5.5.5 → pydna-5.5.7}/pyproject.toml +2 -2
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/__init__.py +7 -3
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/all.py +1 -12
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/assembly2.py +97 -21
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/dseq.py +21 -26
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/dseqrecord.py +31 -14
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/seqrecord.py +3 -3
- pydna-5.5.5/src/pydna/download.py +0 -23
- {pydna-5.5.5 → pydna-5.5.7}/LICENSE.txt +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/README.md +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/_pretty.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/_thermodynamic_data.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/alphabet.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/amplicon.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/amplify.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/assembly.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/codon.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/common_sub_strings.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/contig.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/cre_lox.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/crispr.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/design.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/fakeseq.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/fusionpcr.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/gateway.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/gel.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/genbank.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/genbankfixer.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/ladders.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/oligonucleotide_hybridization.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/opencloning_models.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/parsers.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/primer.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/primer_screen.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/readers.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/seq.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/sequence_picker.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/sequence_regex.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/threading_timer_decorator_exit.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/tm.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/types.py +0 -0
- {pydna-5.5.5 → pydna-5.5.7}/src/pydna/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pydna
|
|
3
|
-
Version: 5.5.
|
|
3
|
+
Version: 5.5.7
|
|
4
4
|
Summary: Representing double stranded DNA and functions for simulating cloning and homologous recombination between DNA molecules.
|
|
5
5
|
License: BSD
|
|
6
6
|
License-File: LICENSE.txt
|
|
@@ -39,7 +39,7 @@ Requires-Dist: pydivsufsort (>=0.0.14)
|
|
|
39
39
|
Requires-Dist: pyfiglet (==0.8.post1)
|
|
40
40
|
Requires-Dist: pyparsing (>=2.4.7) ; extra == "download"
|
|
41
41
|
Requires-Dist: pyperclip (>=1.8.2) ; extra == "clipboard"
|
|
42
|
-
Requires-Dist: regex (>=2024.11.6,<
|
|
42
|
+
Requires-Dist: regex (>=2024.11.6,<2027.0.0)
|
|
43
43
|
Requires-Dist: requests (>=2.26.0) ; extra == "download"
|
|
44
44
|
Requires-Dist: scipy (>=1.11.3) ; (python_version >= "3.12") and (extra == "gel")
|
|
45
45
|
Requires-Dist: scipy (>=1.9.3) ; (python_version < "3.12") and (extra == "gel")
|
|
@@ -35,7 +35,7 @@ license = "BSD"
|
|
|
35
35
|
name = "pydna"
|
|
36
36
|
readme = "README.md"
|
|
37
37
|
repository = "https://github.com/pydna-group/pydna/tree/master"
|
|
38
|
-
version = "5.5.
|
|
38
|
+
version = "5.5.7"
|
|
39
39
|
[tool.poetry.dependencies]
|
|
40
40
|
appdirs = ">=1.4.4"
|
|
41
41
|
biopython = "1.85"
|
|
@@ -60,7 +60,7 @@ scipy = [
|
|
|
60
60
|
{ version = ">=1.9.3", python = "<3.12", optional = true },
|
|
61
61
|
]
|
|
62
62
|
seguid = ">=0.0.5"
|
|
63
|
-
regex = "
|
|
63
|
+
regex = ">=2024.11.6,<2027.0.0"
|
|
64
64
|
opencloning-linkml = "^0.4.9"
|
|
65
65
|
[tool.poetry.extras]
|
|
66
66
|
clipboard = ["pyperclip"]
|
|
@@ -49,23 +49,27 @@ functions with a lowercase letter:
|
|
|
49
49
|
├── amplify
|
|
50
50
|
│ ├── Anneal
|
|
51
51
|
│ └── pcr
|
|
52
|
+
│
|
|
52
53
|
├── assembly
|
|
53
54
|
│ └── Assembly
|
|
55
|
+
│
|
|
54
56
|
├── design
|
|
55
57
|
│ ├── assembly_fragments
|
|
56
58
|
│ └── primer_design
|
|
57
|
-
|
|
58
|
-
│ └── download_text
|
|
59
|
+
│
|
|
59
60
|
├── dseqrecord
|
|
60
61
|
│ └── Dseqrecord
|
|
61
62
|
├── gel
|
|
62
63
|
│ └── Gel
|
|
64
|
+
│
|
|
63
65
|
├── genbank
|
|
64
66
|
│ ├── genbank
|
|
65
67
|
│ └── Genbank
|
|
68
|
+
│
|
|
66
69
|
├── parsers
|
|
67
70
|
│ ├── parse
|
|
68
71
|
│ └── parse_primers
|
|
72
|
+
│
|
|
69
73
|
└── readers
|
|
70
74
|
├── read
|
|
71
75
|
└── read_primers
|
|
@@ -143,7 +147,7 @@ __license__ = "BSD"
|
|
|
143
147
|
__maintainer__ = "Björn Johansson"
|
|
144
148
|
__email__ = "bjorn_johansson@bio.uminho.pt"
|
|
145
149
|
__status__ = "Development" # "Production" #"Prototype"
|
|
146
|
-
__version__ = "5.5.
|
|
150
|
+
__version__ = "5.5.7"
|
|
147
151
|
|
|
148
152
|
|
|
149
153
|
class _PydnaWarning(Warning):
|
|
@@ -18,7 +18,7 @@ ttt
|
|
|
18
18
|
Dseqrecord(-3)
|
|
19
19
|
>>> from pydna.all import __all__
|
|
20
20
|
>>> __all__
|
|
21
|
-
['Anneal', 'pcr', 'Assembly', 'genbank', 'Genbank', '
|
|
21
|
+
['Anneal', 'pcr', 'Assembly', 'genbank', 'Genbank', 'Dseqrecord',
|
|
22
22
|
'Dseq', 'read', 'read_primer', 'parse', 'parse_primers', 'primer_design', 'assembly_fragments', 'eq', 'gbtext_clean']
|
|
23
23
|
>>>
|
|
24
24
|
"""
|
|
@@ -30,20 +30,16 @@ __all__ = [
|
|
|
30
30
|
"Assembly",
|
|
31
31
|
"genbank",
|
|
32
32
|
"Genbank",
|
|
33
|
-
"download_text",
|
|
34
33
|
"Dseqrecord",
|
|
35
34
|
"Dseq",
|
|
36
35
|
"read",
|
|
37
36
|
"read_primer",
|
|
38
37
|
"parse",
|
|
39
38
|
"parse_primers",
|
|
40
|
-
# "ape",
|
|
41
39
|
"primer_design",
|
|
42
40
|
"assembly_fragments",
|
|
43
|
-
# "circular_assembly_fragments",
|
|
44
41
|
"eq",
|
|
45
42
|
"gbtext_clean",
|
|
46
|
-
# "PrimerList",
|
|
47
43
|
]
|
|
48
44
|
|
|
49
45
|
|
|
@@ -52,20 +48,13 @@ from pydna.amplify import pcr
|
|
|
52
48
|
from pydna.assembly import Assembly
|
|
53
49
|
from pydna.genbank import genbank
|
|
54
50
|
from pydna.genbank import Genbank
|
|
55
|
-
from pydna.download import download_text
|
|
56
51
|
from pydna.dseqrecord import Dseqrecord
|
|
57
52
|
from pydna.dseq import Dseq
|
|
58
53
|
from pydna.readers import read
|
|
59
54
|
from pydna.readers import read_primer
|
|
60
55
|
from pydna.parsers import parse
|
|
61
56
|
from pydna.parsers import parse_primers
|
|
62
|
-
|
|
63
|
-
# from pydna.editor import ape
|
|
64
57
|
from pydna.design import primer_design
|
|
65
58
|
from pydna.design import assembly_fragments
|
|
66
|
-
|
|
67
|
-
# from pydna.design import circular_assembly_fragments
|
|
68
59
|
from pydna.utils import eq
|
|
69
60
|
from pydna.genbankfixer import gbtext_clean
|
|
70
|
-
|
|
71
|
-
# from pydna.myprimers import PrimerList
|
|
@@ -357,7 +357,18 @@ def common_sub_strings(
|
|
|
357
357
|
return [r for r in results if r not in shifted_matches]
|
|
358
358
|
|
|
359
359
|
|
|
360
|
-
def
|
|
360
|
+
def _get_trim_end_info(
|
|
361
|
+
end_info: tuple[str, str], trim_ends: str, is_five_prime: bool
|
|
362
|
+
) -> int | None:
|
|
363
|
+
"""Utility function to get the trim information for terminal_overlap."""
|
|
364
|
+
if end_info[0] == trim_ends:
|
|
365
|
+
return len(end_info[1]) if is_five_prime else len(end_info[1]) * -1
|
|
366
|
+
return 0 if is_five_prime else None
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def terminal_overlap(
|
|
370
|
+
seqx: Dseqrecord, seqy: Dseqrecord, limit=25, trim_ends: None | str = None
|
|
371
|
+
):
|
|
361
372
|
"""
|
|
362
373
|
Assembly algorithm to find terminal overlaps (e.g. for Gibson assembly).
|
|
363
374
|
The order matters, we want alignments like:
|
|
@@ -382,6 +393,9 @@ def gibson_overlap(seqx: Dseqrecord, seqy: Dseqrecord, limit=25):
|
|
|
382
393
|
The second sequence
|
|
383
394
|
limit : int
|
|
384
395
|
Minimum length of the overlap
|
|
396
|
+
trim_ends : str
|
|
397
|
+
The ends to trim, either '5' or '3'
|
|
398
|
+
If None, no trimming is done
|
|
385
399
|
|
|
386
400
|
Returns
|
|
387
401
|
-------
|
|
@@ -389,32 +403,64 @@ def gibson_overlap(seqx: Dseqrecord, seqy: Dseqrecord, limit=25):
|
|
|
389
403
|
A list of overlaps between the two sequences
|
|
390
404
|
|
|
391
405
|
>>> from pydna.dseqrecord import Dseqrecord
|
|
392
|
-
>>> from pydna.assembly2 import
|
|
406
|
+
>>> from pydna.assembly2 import terminal_overlap
|
|
393
407
|
>>> x = Dseqrecord("ttactaAAAAAA")
|
|
394
408
|
>>> y = Dseqrecord("AAAAAAcgcacg")
|
|
395
|
-
>>>
|
|
409
|
+
>>> terminal_overlap(x, y, limit=5)
|
|
396
410
|
[(6, 0, 6), (7, 0, 5)]
|
|
397
|
-
>>>
|
|
411
|
+
>>> terminal_overlap(y, x, limit=5)
|
|
412
|
+
[]
|
|
413
|
+
|
|
414
|
+
Trimming the ends:
|
|
415
|
+
>>> from pydna.dseq import Dseq
|
|
416
|
+
>>> from pydna.dseqrecord import Dseqrecord
|
|
417
|
+
>>> from pydna.assembly2 import terminal_overlap
|
|
418
|
+
>>> x = Dseqrecord(Dseq.from_full_sequence_and_overhangs("aaaACGT", 0, 3))
|
|
419
|
+
>>> y = Dseqrecord(Dseq.from_full_sequence_and_overhangs("ACGTccc", 3, 0))
|
|
420
|
+
>>> terminal_overlap(x, y, limit=4)
|
|
421
|
+
[(3, 0, 4)]
|
|
422
|
+
>>> terminal_overlap(x, y, limit=4, trim_ends="5'")
|
|
423
|
+
[(3, 0, 4)]
|
|
424
|
+
>>> terminal_overlap(x, y, limit=4, trim_ends="3'")
|
|
398
425
|
[]
|
|
399
426
|
"""
|
|
400
427
|
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
428
|
+
if trim_ends is not None and trim_ends not in ["5'", "3'"]:
|
|
429
|
+
raise ValueError("trim_ends must be '5' or '3'")
|
|
430
|
+
|
|
431
|
+
if trim_ends is None:
|
|
432
|
+
trim_x_left, trim_x_right, trim_y_left, trim_y_right = (0, None, 0, None)
|
|
433
|
+
stringx = str(seqx.seq).upper()
|
|
434
|
+
stringy = str(seqy.seq).upper()
|
|
435
|
+
else:
|
|
436
|
+
trim_x_right = _get_trim_end_info(
|
|
437
|
+
seqx.seq.three_prime_end(), trim_ends, is_five_prime=False
|
|
438
|
+
)
|
|
439
|
+
trim_y_left = _get_trim_end_info(
|
|
440
|
+
seqy.seq.five_prime_end(), trim_ends, is_five_prime=True
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
# I actually don't think these two are needed, since only the terminal
|
|
444
|
+
# join between x_right and y_left is tested, but maybe there is some edge-case
|
|
445
|
+
# that I am missing, so keeping them just in case.
|
|
446
|
+
trim_x_left = _get_trim_end_info(
|
|
447
|
+
seqx.seq.five_prime_end(), trim_ends, is_five_prime=True
|
|
448
|
+
)
|
|
449
|
+
trim_y_right = _get_trim_end_info(
|
|
450
|
+
seqy.seq.three_prime_end(), trim_ends, is_five_prime=False
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
stringx = str(seqx.seq[trim_x_left:trim_x_right]).upper()
|
|
454
|
+
stringy = str(seqy.seq[trim_y_left:trim_y_right]).upper()
|
|
455
|
+
|
|
412
456
|
# We have to convert to list because we need to modify the matches
|
|
413
457
|
matches = [
|
|
414
458
|
list(m)
|
|
415
459
|
for m in common_sub_strings_str(stringx, stringy, limit)
|
|
416
460
|
if (m[1] == 0 and m[0] + m[2] == len(stringx))
|
|
417
461
|
]
|
|
462
|
+
|
|
463
|
+
# Shift the matches if the left end has been trimmed
|
|
418
464
|
for match in matches:
|
|
419
465
|
match[0] += trim_x_left
|
|
420
466
|
match[1] += trim_y_left
|
|
@@ -423,6 +469,31 @@ def gibson_overlap(seqx: Dseqrecord, seqy: Dseqrecord, limit=25):
|
|
|
423
469
|
return [tuple(m) for m in matches]
|
|
424
470
|
|
|
425
471
|
|
|
472
|
+
def gibson_overlap(seqx: Dseqrecord, seqy: Dseqrecord, limit=25):
|
|
473
|
+
"""
|
|
474
|
+
Assembly algorithm to find terminal overlaps for Gibson assembly.
|
|
475
|
+
It is a wrapper around terminal_overlap with trim_ends="5'".
|
|
476
|
+
"""
|
|
477
|
+
|
|
478
|
+
return terminal_overlap(seqx, seqy, limit, trim_ends="5'")
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def in_fusion_overlap(seqx: Dseqrecord, seqy: Dseqrecord, limit=25):
|
|
482
|
+
"""
|
|
483
|
+
Assembly algorithm to find terminal overlaps for in-fusion assembly.
|
|
484
|
+
It is a wrapper around terminal_overlap with trim_ends="3'".
|
|
485
|
+
"""
|
|
486
|
+
return terminal_overlap(seqx, seqy, limit, trim_ends="3'")
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def pcr_fusion_overlap(seqx: Dseqrecord, seqy: Dseqrecord, limit=25):
|
|
490
|
+
"""
|
|
491
|
+
Assembly algorithm to find terminal overlaps for PCR fusion assembly.
|
|
492
|
+
It is a wrapper around terminal_overlap with trim_ends=None.
|
|
493
|
+
"""
|
|
494
|
+
return terminal_overlap(seqx, seqy, limit, trim_ends=None)
|
|
495
|
+
|
|
496
|
+
|
|
426
497
|
def sticky_end_sub_strings(seqx: Dseqrecord, seqy: Dseqrecord, limit: bool = False):
|
|
427
498
|
"""
|
|
428
499
|
Assembly algorithm for ligation of sticky ends.
|
|
@@ -796,7 +867,7 @@ def assemble(
|
|
|
796
867
|
f_u = fragments[u - 1] if u > 0 else fragments[-u - 1].reverse_complement()
|
|
797
868
|
f_v = fragments[v - 1] if v > 0 else fragments[-v - 1].reverse_complement()
|
|
798
869
|
seq_u = str(loc_u.extract(f_u).seq)
|
|
799
|
-
seq_v = str(loc_v.extract(f_v).seq.
|
|
870
|
+
seq_v = str(loc_v.extract(f_v).seq.reverse_complement())
|
|
800
871
|
# Test if seq_u and seq_v anneal
|
|
801
872
|
if not anneal_strands(seq_u, seq_v):
|
|
802
873
|
raise ValueError("Mismatch in assembly")
|
|
@@ -1578,8 +1649,9 @@ class Assembly:
|
|
|
1578
1649
|
fragment2[f2_1_start:f2_2_end]
|
|
1579
1650
|
)
|
|
1580
1651
|
|
|
1581
|
-
|
|
1582
|
-
|
|
1652
|
+
# Safeguard
|
|
1653
|
+
if overlap_diff == 0: # pragma: no cover
|
|
1654
|
+
raise AssertionError("Overlap is 0")
|
|
1583
1655
|
|
|
1584
1656
|
if overlap_diff > 0:
|
|
1585
1657
|
new_loc_f1_1 = create_location(
|
|
@@ -1874,7 +1946,7 @@ class PCRAssembly(Assembly):
|
|
|
1874
1946
|
results = super().assemble_linear(only_adjacent_edges, max_assemblies)
|
|
1875
1947
|
for result in results:
|
|
1876
1948
|
rp = self.fragments[2]
|
|
1877
|
-
result.seq = result.seq[: -len(rp)] + Dseq(str(rp.seq.
|
|
1949
|
+
result.seq = result.seq[: -len(rp)] + Dseq(str(rp.seq.reverse_complement()))
|
|
1878
1950
|
return results
|
|
1879
1951
|
|
|
1880
1952
|
|
|
@@ -2077,7 +2149,9 @@ def in_fusion_assembly(
|
|
|
2077
2149
|
List of assembled DNA molecules
|
|
2078
2150
|
"""
|
|
2079
2151
|
|
|
2080
|
-
products =
|
|
2152
|
+
products = common_function_assembly_products(
|
|
2153
|
+
frags, limit, in_fusion_overlap, circular_only
|
|
2154
|
+
)
|
|
2081
2155
|
return _recast_sources(products, InFusionSource)
|
|
2082
2156
|
|
|
2083
2157
|
|
|
@@ -2101,7 +2175,9 @@ def fusion_pcr_assembly(
|
|
|
2101
2175
|
list[Dseqrecord]
|
|
2102
2176
|
List of assembled DNA molecules
|
|
2103
2177
|
"""
|
|
2104
|
-
products =
|
|
2178
|
+
products = common_function_assembly_products(
|
|
2179
|
+
frags, limit, pcr_fusion_overlap, circular_only
|
|
2180
|
+
)
|
|
2105
2181
|
return _recast_sources(products, OverlapExtensionPCRLigationSource)
|
|
2106
2182
|
|
|
2107
2183
|
|
|
@@ -785,7 +785,7 @@ class Dseq(Seq):
|
|
|
785
785
|
>>> ssobj
|
|
786
786
|
Dseq(-7)
|
|
787
787
|
GATTACA
|
|
788
|
-
|
|
788
|
+
|||||||
|
|
789
789
|
>>> round(ssobj.mw(), 1)
|
|
790
790
|
2184.4
|
|
791
791
|
>>> ds_lin_obj2 = Dseq("GATZFCA")
|
|
@@ -908,7 +908,8 @@ class Dseq(Seq):
|
|
|
908
908
|
w, c = representation_tuple(
|
|
909
909
|
self._data.decode("ascii"), length_limit_for_repr=length_limit_for_repr
|
|
910
910
|
)
|
|
911
|
-
|
|
911
|
+
w = w or "|" * len(c)
|
|
912
|
+
c = c or "|" * len(w)
|
|
912
913
|
return pretty_str(header + "\n" + w + "\n" + c)
|
|
913
914
|
|
|
914
915
|
def reverse_complement(self) -> "Dseq":
|
|
@@ -1574,7 +1575,7 @@ class Dseq(Seq):
|
|
|
1574
1575
|
ctag
|
|
1575
1576
|
>>> ds.nibble_five_prime_left(4)
|
|
1576
1577
|
Dseq(-4)
|
|
1577
|
-
|
|
1578
|
+
||||
|
|
1578
1579
|
ctag
|
|
1579
1580
|
>>> ds = Dseq.from_representation(
|
|
1580
1581
|
... '''
|
|
@@ -1601,9 +1602,8 @@ class Dseq(Seq):
|
|
|
1601
1602
|
DESCRIPTION.
|
|
1602
1603
|
|
|
1603
1604
|
"""
|
|
1604
|
-
recessed = copy.deepcopy(self)
|
|
1605
1605
|
n += max(0, self.ovhg or 0)
|
|
1606
|
-
|
|
1606
|
+
return Dseq(
|
|
1607
1607
|
self._data[:n]
|
|
1608
1608
|
.translate(dscode_to_crick_table)
|
|
1609
1609
|
.translate(complement_table_for_dscode)
|
|
@@ -1611,7 +1611,6 @@ class Dseq(Seq):
|
|
|
1611
1611
|
.lstrip()
|
|
1612
1612
|
+ self._data[n:]
|
|
1613
1613
|
)
|
|
1614
|
-
return recessed
|
|
1615
1614
|
|
|
1616
1615
|
def nibble_five_prime_right(self: DseqType, n: int = 1) -> DseqType:
|
|
1617
1616
|
"""
|
|
@@ -1657,7 +1656,7 @@ class Dseq(Seq):
|
|
|
1657
1656
|
>>> ds.nibble_five_prime_right(4)
|
|
1658
1657
|
Dseq(-4)
|
|
1659
1658
|
gatc
|
|
1660
|
-
|
|
1659
|
+
||||
|
|
1661
1660
|
>>> ds = Dseq.from_representation(
|
|
1662
1661
|
... '''
|
|
1663
1662
|
... gatc
|
|
@@ -1668,18 +1667,16 @@ class Dseq(Seq):
|
|
|
1668
1667
|
gatc
|
|
1669
1668
|
ctag
|
|
1670
1669
|
"""
|
|
1671
|
-
recessed = copy.deepcopy(self)
|
|
1672
1670
|
n = len(self) - n
|
|
1673
1671
|
ovhg = len(self) if self.right_ovhg is None else self.right_ovhg
|
|
1674
1672
|
n -= max(0, ovhg)
|
|
1675
|
-
|
|
1673
|
+
return Dseq(
|
|
1676
1674
|
self._data[:n]
|
|
1677
1675
|
+ self._data[n:]
|
|
1678
1676
|
.translate(dscode_to_watson_table)
|
|
1679
1677
|
.translate(dscode_to_watson_tail_table)
|
|
1680
1678
|
.lstrip()
|
|
1681
1679
|
)
|
|
1682
|
-
return recessed
|
|
1683
1680
|
|
|
1684
1681
|
exo1_front = nibble_five_prime_left # TODO: consider using the new names
|
|
1685
1682
|
exo1_end = nibble_five_prime_right # TODO: consider using the new names
|
|
@@ -1728,7 +1725,7 @@ class Dseq(Seq):
|
|
|
1728
1725
|
>>> ds.nibble_three_prime_left(4)
|
|
1729
1726
|
Dseq(-4)
|
|
1730
1727
|
gatc
|
|
1731
|
-
|
|
1728
|
+
||||
|
|
1732
1729
|
>>> ds = Dseq.from_representation(
|
|
1733
1730
|
... '''
|
|
1734
1731
|
... gatc
|
|
@@ -1745,14 +1742,13 @@ class Dseq(Seq):
|
|
|
1745
1742
|
"""
|
|
1746
1743
|
ovhg = len(self) if self.ovhg is None else self.ovhg
|
|
1747
1744
|
n -= min(0, ovhg)
|
|
1748
|
-
|
|
1745
|
+
return Dseq(
|
|
1749
1746
|
self._data[:n]
|
|
1750
1747
|
.translate(dscode_to_watson_table)
|
|
1751
1748
|
.translate(dscode_to_watson_tail_table)
|
|
1752
1749
|
.lstrip()
|
|
1753
1750
|
+ self._data[n:]
|
|
1754
1751
|
)
|
|
1755
|
-
return recessed
|
|
1756
1752
|
|
|
1757
1753
|
def nibble_three_prime_right(self: DseqType, n=1) -> DseqType:
|
|
1758
1754
|
"""
|
|
@@ -1797,7 +1793,7 @@ class Dseq(Seq):
|
|
|
1797
1793
|
ctag
|
|
1798
1794
|
>>> ds.nibble_three_prime_right(4)
|
|
1799
1795
|
Dseq(-4)
|
|
1800
|
-
|
|
1796
|
+
||||
|
|
1801
1797
|
ctag
|
|
1802
1798
|
>>> ds = Dseq.from_representation(
|
|
1803
1799
|
... '''
|
|
@@ -1812,7 +1808,7 @@ class Dseq(Seq):
|
|
|
1812
1808
|
n = len(self) - n
|
|
1813
1809
|
ovhg = len(self) if self.right_ovhg is None else self.right_ovhg
|
|
1814
1810
|
n += min(0, ovhg)
|
|
1815
|
-
|
|
1811
|
+
return Dseq(
|
|
1816
1812
|
self._data[:n]
|
|
1817
1813
|
+ self._data[n:]
|
|
1818
1814
|
.translate(dscode_to_crick_table)
|
|
@@ -1820,7 +1816,6 @@ class Dseq(Seq):
|
|
|
1820
1816
|
.translate(dscode_to_crick_tail_table)
|
|
1821
1817
|
.lstrip()
|
|
1822
1818
|
)
|
|
1823
|
-
return recessed
|
|
1824
1819
|
|
|
1825
1820
|
def no_cutters(
|
|
1826
1821
|
self, batch: Union[RestrictionBatch, None] = None
|
|
@@ -1878,7 +1873,7 @@ class Dseq(Seq):
|
|
|
1878
1873
|
"""docstring."""
|
|
1879
1874
|
w = f"{self.ovhg * '-'}{self.watson}{'-' * (-self.ovhg + len(self.crick) - len(self.watson))}".upper()
|
|
1880
1875
|
c = f"{'-' * (self.ovhg + len(self.watson) - len(self.crick))}{self.crick}{-self.ovhg * '-'}".upper()
|
|
1881
|
-
cs = ldseguid(w, c, alphabet="{DNA-extended}")
|
|
1876
|
+
cs = ldseguid(w, c, alphabet="{DNA-extended},AU")
|
|
1882
1877
|
return cs
|
|
1883
1878
|
|
|
1884
1879
|
def isblunt(self) -> bool:
|
|
@@ -2250,7 +2245,7 @@ class Dseq(Seq):
|
|
|
2250
2245
|
# argument is probably a RestrictionBatch
|
|
2251
2246
|
enzymes = [e for e in enzymes[0]]
|
|
2252
2247
|
|
|
2253
|
-
enzymes = flatten(enzymes)
|
|
2248
|
+
enzymes = list(dict.fromkeys(flatten(enzymes))) # remove duplicate enzymes
|
|
2254
2249
|
out = list()
|
|
2255
2250
|
for e in enzymes:
|
|
2256
2251
|
# Positions of the cut on the watson strand. They are 1-based, so we subtract
|
|
@@ -2589,7 +2584,7 @@ class Dseq(Seq):
|
|
|
2589
2584
|
>>> strands[0]
|
|
2590
2585
|
Dseq(-2)
|
|
2591
2586
|
ta
|
|
2592
|
-
|
|
2587
|
+
||
|
|
2593
2588
|
>>> ds = Dseq("tagaaptapgtatg")
|
|
2594
2589
|
>>> ds
|
|
2595
2590
|
Dseq(-14)
|
|
@@ -2602,7 +2597,7 @@ class Dseq(Seq):
|
|
|
2602
2597
|
atctt catac
|
|
2603
2598
|
>>> strands[0]
|
|
2604
2599
|
Dseq(-2)
|
|
2605
|
-
|
|
2600
|
+
||
|
|
2606
2601
|
at
|
|
2607
2602
|
"""
|
|
2608
2603
|
|
|
@@ -2636,7 +2631,7 @@ class Dseq(Seq):
|
|
|
2636
2631
|
>>> strands[0]
|
|
2637
2632
|
Dseq(-2)
|
|
2638
2633
|
ta
|
|
2639
|
-
|
|
2634
|
+
||
|
|
2640
2635
|
>>> new, strands = ds.shed_ss_dna([],[(6, 8)])
|
|
2641
2636
|
>>> new
|
|
2642
2637
|
Dseq(-14)
|
|
@@ -2644,7 +2639,7 @@ class Dseq(Seq):
|
|
|
2644
2639
|
atcttc ccatac
|
|
2645
2640
|
>>> strands[0]
|
|
2646
2641
|
Dseq(-2)
|
|
2647
|
-
|
|
2642
|
+
||
|
|
2648
2643
|
at
|
|
2649
2644
|
>>> ds = Dseq("tagaagtaggtatg")
|
|
2650
2645
|
>>> new, (strand1, strand2) = ds.shed_ss_dna([(6, 8), (9, 11)],[])
|
|
@@ -2655,11 +2650,11 @@ class Dseq(Seq):
|
|
|
2655
2650
|
>>> strand1
|
|
2656
2651
|
Dseq(-2)
|
|
2657
2652
|
ta
|
|
2658
|
-
|
|
2653
|
+
||
|
|
2659
2654
|
>>> strand2
|
|
2660
2655
|
Dseq(-2)
|
|
2661
2656
|
gt
|
|
2662
|
-
|
|
2657
|
+
||
|
|
2663
2658
|
"""
|
|
2664
2659
|
|
|
2665
2660
|
watson_cutpairs = watson_cutpairs or list()
|
|
@@ -2878,14 +2873,14 @@ class Dseq(Seq):
|
|
|
2878
2873
|
>>> Dseq(parts.sticky_left5)
|
|
2879
2874
|
Dseq(-3)
|
|
2880
2875
|
GGG
|
|
2881
|
-
|
|
2876
|
+
|||
|
|
2882
2877
|
>>> Dseq(parts.middle)
|
|
2883
2878
|
Dseq(-3)
|
|
2884
2879
|
ATC
|
|
2885
2880
|
TAG
|
|
2886
2881
|
>>> Dseq(parts.sticky_right5)
|
|
2887
2882
|
Dseq(-3)
|
|
2888
|
-
|
|
2883
|
+
|||
|
|
2889
2884
|
TCA
|
|
2890
2885
|
|
|
2891
2886
|
Parameters
|
|
@@ -26,7 +26,7 @@ from Bio.SeqFeature import CompoundLocation
|
|
|
26
26
|
from Bio.SeqFeature import SimpleLocation
|
|
27
27
|
from pydna.seqrecord import SeqRecord
|
|
28
28
|
from Bio.Seq import translate
|
|
29
|
-
from
|
|
29
|
+
from Bio.Seq import Seq as BPSeq
|
|
30
30
|
import copy
|
|
31
31
|
import operator
|
|
32
32
|
import os
|
|
@@ -453,9 +453,23 @@ class Dseqrecord(SeqRecord):
|
|
|
453
453
|
feature.location += len(nucleotides)
|
|
454
454
|
return newseq
|
|
455
455
|
|
|
456
|
-
def format(self,
|
|
456
|
+
def format(self, format: str = "gb"):
|
|
457
457
|
"""Returns the sequence as a string using a format supported by Biopython
|
|
458
458
|
SeqIO [#]_. Default is "gb" which is short for Genbank.
|
|
459
|
+
Allowed Formats are for example:
|
|
460
|
+
|
|
461
|
+
* "fasta": The standard FASTA format.
|
|
462
|
+
* "fasta-2line": No line wrapping and exactly two lines per record.
|
|
463
|
+
* "genbank" (or "gb"): The GenBank flat file format.
|
|
464
|
+
* "embl": The EMBL flat file format.
|
|
465
|
+
* "imgt": The IMGT variant of the EMBL format.
|
|
466
|
+
|
|
467
|
+
The format string can be modified with the keyword "dscode" if
|
|
468
|
+
the underlying dscode string is desired in the output. for example:
|
|
469
|
+
::
|
|
470
|
+
|
|
471
|
+
Dseqrecord("PEXIGATCQFZJ").format("fasta-2line dscode")
|
|
472
|
+
|
|
459
473
|
|
|
460
474
|
Examples
|
|
461
475
|
--------
|
|
@@ -477,6 +491,12 @@ class Dseqrecord(SeqRecord):
|
|
|
477
491
|
ORIGIN
|
|
478
492
|
1 aaa
|
|
479
493
|
//
|
|
494
|
+
>>> print(Dseqrecord("PEXIGATCQFZJ").format("fasta-2line"))
|
|
495
|
+
>id description
|
|
496
|
+
GATCGATCGATC
|
|
497
|
+
>>> print(Dseqrecord("PEXIGATCQFZJ").format("fasta-2line dscode"))
|
|
498
|
+
>id description
|
|
499
|
+
PEXIGATCQFZJ
|
|
480
500
|
|
|
481
501
|
|
|
482
502
|
References
|
|
@@ -486,13 +506,19 @@ class Dseqrecord(SeqRecord):
|
|
|
486
506
|
|
|
487
507
|
|
|
488
508
|
"""
|
|
489
|
-
|
|
490
509
|
record = copy.deepcopy(self)
|
|
491
|
-
if
|
|
510
|
+
if "dscode" in format:
|
|
511
|
+
format = format.replace("dscode", "")
|
|
512
|
+
obj = BPSeq("")
|
|
513
|
+
obj._data = record.seq._data
|
|
514
|
+
record.seq = obj
|
|
515
|
+
format = format.strip(" -")
|
|
516
|
+
if format in ("genbank", "gb") and self.circular:
|
|
492
517
|
record.annotations["topology"] = "circular"
|
|
493
518
|
else:
|
|
494
519
|
record.annotations["topology"] = "linear"
|
|
495
|
-
|
|
520
|
+
|
|
521
|
+
return SeqRecord.format(record, format).strip()
|
|
496
522
|
|
|
497
523
|
def write(self, filename=None, f="gb"):
|
|
498
524
|
"""Writes the Dseqrecord to a file using the format f, which must
|
|
@@ -851,15 +877,6 @@ class Dseqrecord(SeqRecord):
|
|
|
851
877
|
return self.apply_cut(cut, cut)
|
|
852
878
|
else:
|
|
853
879
|
answer = Dseqrecord("")
|
|
854
|
-
identifier = "part_{id}".format(id=self.id)
|
|
855
|
-
if answer.features:
|
|
856
|
-
sf = max(answer.features, key=len) # default
|
|
857
|
-
if "label" in sf.qualifiers:
|
|
858
|
-
identifier = " ".join(sf.qualifiers["label"])
|
|
859
|
-
elif "note" in sf.qualifiers:
|
|
860
|
-
identifier = " ".join(sf.qualifiers["note"])
|
|
861
|
-
answer.id = identifier_from_string(identifier)[:16]
|
|
862
|
-
answer.name = identifier_from_string("part_{name}".format(name=self.name))[:16]
|
|
863
880
|
return answer
|
|
864
881
|
|
|
865
882
|
def __eq__(self, other):
|
|
@@ -21,7 +21,7 @@ from pydna.seq import ProteinSeq
|
|
|
21
21
|
from pydna.common_sub_strings import common_sub_strings
|
|
22
22
|
|
|
23
23
|
from Bio.Data.CodonTable import TranslationError
|
|
24
|
-
from Bio.SeqRecord import SeqRecord
|
|
24
|
+
from Bio.SeqRecord import SeqRecord as BioSeqRecordSeqRecord
|
|
25
25
|
from Bio.SeqFeature import SimpleLocation
|
|
26
26
|
from Bio.SeqFeature import CompoundLocation
|
|
27
27
|
from pydna.seq import Seq
|
|
@@ -37,7 +37,7 @@ from warnings import warn
|
|
|
37
37
|
import datetime
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
class SeqRecord(
|
|
40
|
+
class SeqRecord(BioSeqRecordSeqRecord):
|
|
41
41
|
"""
|
|
42
42
|
A subclass of the Biopython SeqRecord class.
|
|
43
43
|
|
|
@@ -86,7 +86,7 @@ class SeqRecord(SeqRecord):
|
|
|
86
86
|
self.annotations = {ps(k): ps(v) for k, v in self.annotations.items()}
|
|
87
87
|
|
|
88
88
|
@classmethod
|
|
89
|
-
def from_Bio_SeqRecord(clc, sr:
|
|
89
|
+
def from_Bio_SeqRecord(clc, sr: BioSeqRecordSeqRecord):
|
|
90
90
|
"""Creates a pydnaSeqRecord from a Biopython SeqRecord."""
|
|
91
91
|
# https://stackoverflow.com/questions/15404256/changing-the-\
|
|
92
92
|
# class-of-a-python-object-casting
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
# Copyright 2013-2023 by Björn Johansson. All rights reserved.
|
|
4
|
-
# This code is part of the Python-dna distribution and governed by its
|
|
5
|
-
# license. Please see the LICENSE.txt file that should have been included
|
|
6
|
-
# as part of this package.
|
|
7
|
-
"""Provides a function for downloading online text files."""
|
|
8
|
-
|
|
9
|
-
import textwrap
|
|
10
|
-
|
|
11
|
-
from pydna._pretty import pretty_str as ps
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def download_text(url):
|
|
15
|
-
"""docstring."""
|
|
16
|
-
import requests
|
|
17
|
-
|
|
18
|
-
req = requests.get(url)
|
|
19
|
-
|
|
20
|
-
result = textwrap.dedent(req.text).strip()
|
|
21
|
-
result = result.replace("\r\n", "\n").replace("\r", "\n")
|
|
22
|
-
|
|
23
|
-
return ps(result)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|