pydna 5.5.4__py3-none-any.whl → 5.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydna/__init__.py +30 -195
- pydna/_pretty.py +8 -8
- pydna/_thermodynamic_data.py +3 -3
- pydna/all.py +1 -12
- pydna/alphabet.py +995 -0
- pydna/amplicon.py +19 -24
- pydna/amplify.py +75 -95
- pydna/assembly.py +64 -81
- pydna/assembly2.py +375 -310
- pydna/codon.py +4 -4
- pydna/common_sub_strings.py +6 -8
- pydna/contig.py +203 -10
- pydna/design.py +176 -60
- pydna/dseq.py +1788 -718
- pydna/dseqrecord.py +197 -179
- pydna/gateway.py +6 -6
- pydna/gel.py +5 -5
- pydna/genbank.py +43 -46
- pydna/genbankfixer.py +89 -92
- pydna/ladders.py +11 -12
- pydna/oligonucleotide_hybridization.py +124 -0
- pydna/opencloning_models.py +187 -60
- pydna/parsers.py +45 -32
- pydna/primer.py +4 -4
- pydna/primer_screen.py +833 -0
- pydna/readers.py +14 -9
- pydna/seq.py +137 -47
- pydna/seqrecord.py +54 -62
- pydna/sequence_picker.py +2 -5
- pydna/sequence_regex.py +6 -6
- pydna/tm.py +17 -17
- pydna/types.py +19 -19
- pydna/utils.py +97 -75
- {pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/METADATA +8 -8
- pydna-5.5.6.dist-info/RECORD +42 -0
- {pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/WHEEL +1 -1
- pydna/conftest.py +0 -42
- pydna/download.py +0 -32
- pydna/genbankfile.py +0 -42
- pydna/genbankrecord.py +0 -168
- pydna/goldengate.py +0 -45
- pydna/ligate.py +0 -62
- pydna/user_cloning.py +0 -29
- pydna-5.5.4.dist-info/RECORD +0 -46
- {pydna-5.5.4.dist-info → pydna-5.5.6.dist-info}/licenses/LICENSE.txt +0 -0
pydna/readers.py
CHANGED
|
@@ -6,8 +6,8 @@
|
|
|
6
6
|
# as part of this package.
|
|
7
7
|
|
|
8
8
|
"""Provides two functions, read and read_primer."""
|
|
9
|
-
from pydna.parsers import parse
|
|
10
|
-
from pydna.primer import Primer
|
|
9
|
+
from pydna.parsers import parse
|
|
10
|
+
from pydna.primer import Primer
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def read(data, ds=True):
|
|
@@ -39,13 +39,18 @@ def read(data, ds=True):
|
|
|
39
39
|
"""
|
|
40
40
|
|
|
41
41
|
try:
|
|
42
|
-
(result,) =
|
|
42
|
+
(result,) = parse(data, ds)
|
|
43
43
|
except ValueError as err:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
44
|
+
msg = str(err)
|
|
45
|
+
|
|
46
|
+
if "too many" in msg:
|
|
47
|
+
raise ValueError(
|
|
48
|
+
f"More than one sequence found in data ({str(data)[:79]})"
|
|
49
|
+
) from err
|
|
50
|
+
elif "not enough" in msg:
|
|
51
|
+
raise ValueError(f"No sequence found in data ({str(data)[:79]})") from err
|
|
52
|
+
else: # pragma: no cover
|
|
53
|
+
raise err # re-raises the same ValueError with original traceback
|
|
49
54
|
return result
|
|
50
55
|
|
|
51
56
|
|
|
@@ -53,4 +58,4 @@ def read_primer(data):
|
|
|
53
58
|
"""Use this function to read a primer sequence from a string or a local file.
|
|
54
59
|
The usage is similar to the :func:`parse_primer` function."""
|
|
55
60
|
|
|
56
|
-
return
|
|
61
|
+
return Primer(read(data, ds=False))
|
pydna/seq.py
CHANGED
|
@@ -2,56 +2,144 @@
|
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
|
|
4
4
|
"""
|
|
5
|
-
A subclass of
|
|
5
|
+
A subclass of Biopython Bio.Seq.Seq
|
|
6
6
|
|
|
7
7
|
Has a number of extra methods and uses
|
|
8
8
|
the :class:`pydna._pretty_str.pretty_str` class instread of str for a
|
|
9
9
|
nicer output in the IPython shell.
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
-
# from pydna.codon import weights as _weights
|
|
13
12
|
from Bio.SeqUtils.ProtParam import ProteinAnalysis
|
|
14
|
-
from pydna.codon import rare_codons
|
|
13
|
+
from pydna.codon import rare_codons
|
|
15
14
|
from pydna.codon import start as _start
|
|
16
15
|
from pydna.codon import stop as _stop
|
|
17
16
|
from pydna.codon import n_end as _n_end
|
|
18
|
-
from seguid import lsseguid
|
|
19
|
-
from pydna.utils import rc
|
|
17
|
+
from seguid import lsseguid
|
|
18
|
+
from pydna.utils import rc
|
|
20
19
|
|
|
21
|
-
from Bio.SeqUtils import seq3
|
|
22
|
-
from Bio.SeqUtils import gc_fraction
|
|
23
|
-
import re
|
|
20
|
+
from Bio.SeqUtils import seq3
|
|
21
|
+
from Bio.SeqUtils import gc_fraction
|
|
22
|
+
import re
|
|
24
23
|
from Bio.Seq import Seq as _Seq
|
|
25
|
-
from pydna._pretty import PrettyTable
|
|
24
|
+
from pydna._pretty import PrettyTable
|
|
26
25
|
|
|
27
|
-
from typing import List
|
|
28
|
-
|
|
29
|
-
# import logging as _logging
|
|
30
|
-
|
|
31
|
-
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
26
|
+
from typing import List, Optional, Tuple
|
|
32
27
|
|
|
33
28
|
|
|
34
29
|
class Seq(_Seq):
|
|
35
30
|
"""docstring."""
|
|
36
31
|
|
|
32
|
+
# @property
|
|
33
|
+
# def full_sequence(self):
|
|
34
|
+
# return self
|
|
35
|
+
|
|
36
|
+
# def translate(
|
|
37
|
+
# self,
|
|
38
|
+
# *args,
|
|
39
|
+
# stop_symbol: str = "*",
|
|
40
|
+
# to_stop: bool = False,
|
|
41
|
+
# cds: bool = False,
|
|
42
|
+
# gap: str = "-",
|
|
43
|
+
# **kwargs,
|
|
44
|
+
# ) -> "ProteinSeq":
|
|
45
|
+
# """Translate.."""
|
|
46
|
+
# p = super().translate(
|
|
47
|
+
# *args, stop_symbol=stop_symbol, to_stop=to_stop, cds=cds, gap=gap, **kwargs
|
|
48
|
+
# )
|
|
49
|
+
# return ProteinSeq(p._data)
|
|
50
|
+
|
|
37
51
|
def translate(
|
|
38
52
|
self,
|
|
39
|
-
|
|
40
|
-
stop_symbol: str = "*",
|
|
53
|
+
table: [str, int] = "Standard",
|
|
54
|
+
stop_symbol: [str] = "*",
|
|
41
55
|
to_stop: bool = False,
|
|
42
56
|
cds: bool = False,
|
|
43
57
|
gap: str = "-",
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
58
|
+
) -> _Seq:
|
|
59
|
+
|
|
60
|
+
# TODO: is this method needed?
|
|
61
|
+
"""
|
|
62
|
+
Translate into protein.
|
|
63
|
+
|
|
64
|
+
The table argument is the name of a codon table (string). These names
|
|
65
|
+
can be for example "Standard" or "Alternative Yeast Nuclear" for the
|
|
66
|
+
yeast CUG clade where the CUG codon is translated as serine instead
|
|
67
|
+
of the standard leucine.
|
|
68
|
+
|
|
69
|
+
Over forty translation tables are available from the BioPython
|
|
70
|
+
Bio.Data.CodonTable module. Look at the keys of the dictionary
|
|
71
|
+
´CodonTable.ambiguous_generic_by_name´.
|
|
72
|
+
These are based on tables in this file provided by NCBI:
|
|
73
|
+
|
|
74
|
+
https://ftp.ncbi.nlm.nih.gov/entrez/misc/data/gc.prt
|
|
75
|
+
|
|
76
|
+
Standard table
|
|
77
|
+
|
|
78
|
+
| T | C | A | G |
|
|
79
|
+
--+---------+---------+---------+---------+--
|
|
80
|
+
T | TTT F | TCT S | TAT Y | TGT C | T
|
|
81
|
+
T | TTC F | TCC S | TAC Y | TGC C | C
|
|
82
|
+
T | TTA L | TCA S | TAA Stop| TGA Stop| A
|
|
83
|
+
T | TTG L(s)| TCG S | TAG Stop| TGG W | G
|
|
84
|
+
--+---------+---------+---------+---------+--
|
|
85
|
+
C | CTT L | CCT P | CAT H | CGT R | T
|
|
86
|
+
C | CTC L | CCC P | CAC H | CGC R | C
|
|
87
|
+
C | CTA L | CCA P | CAA Q | CGA R | A
|
|
88
|
+
C | CTG L(s)| CCG P | CAG Q | CGG R | G
|
|
89
|
+
--+---------+---------+---------+---------+--
|
|
90
|
+
A | ATT I | ACT T | AAT N | AGT S | T
|
|
91
|
+
A | ATC I | ACC T | AAC N | AGC S | C
|
|
92
|
+
A | ATA I | ACA T | AAA K | AGA R | A
|
|
93
|
+
A | ATG M(s)| ACG T | AAG K | AGG R | G
|
|
94
|
+
--+---------+---------+---------+---------+--
|
|
95
|
+
G | GTT V | GCT A | GAT D | GGT G | T
|
|
96
|
+
G | GTC V | GCC A | GAC D | GGC G | C
|
|
97
|
+
G | GTA V | GCA A | GAA E | GGA G | A
|
|
98
|
+
G | GTG V | GCG A | GAG E | GGG G | G
|
|
99
|
+
--+---------+---------+---------+---------+--
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
table : [str, int], optional
|
|
105
|
+
The default is "Standard". Can be a table id integer, see here for table
|
|
106
|
+
numbering https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
|
|
107
|
+
stop_symbol : [str], optional
|
|
108
|
+
The default is "*". Single character string to indicate translation stop.
|
|
109
|
+
to_stop : bool, optional
|
|
110
|
+
The default is False. True means that translation terminates at the first
|
|
111
|
+
in frame stop codon. False translates to the end.
|
|
112
|
+
cds : bool, optional
|
|
113
|
+
The default is False. If True, checks that the sequence starts with a
|
|
114
|
+
valid alternative start codon sequence length is a multiple of three, and
|
|
115
|
+
that there is a single in frame stop codon at the end. If these tests fail,
|
|
116
|
+
an exception is raised.
|
|
117
|
+
gap : str, optional
|
|
118
|
+
The default is "-".
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
Bio.Seq.Seq
|
|
123
|
+
A Biopython Seq object with the translated amino acid code.
|
|
124
|
+
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
p = _Seq(self._data).translate(
|
|
128
|
+
stop_symbol=stop_symbol, to_stop=to_stop, cds=cds, gap=gap
|
|
49
129
|
)
|
|
50
130
|
return ProteinSeq(p._data)
|
|
51
131
|
|
|
132
|
+
def transcribe(self) -> _Seq:
|
|
133
|
+
"""
|
|
134
|
+
Transcribe a DNA sequence into RNA and return the RNA sequence
|
|
135
|
+
as a new Seq object.
|
|
136
|
+
|
|
137
|
+
"""
|
|
138
|
+
return Seq(_Seq(self._data).transcribe()._data)
|
|
139
|
+
|
|
52
140
|
def gc(self) -> float:
|
|
53
141
|
"""Return GC content."""
|
|
54
|
-
return round(
|
|
142
|
+
return round(gc_fraction(self._data.upper().decode("ASCII")), 3)
|
|
55
143
|
|
|
56
144
|
def cai(self, organism: str = "sce") -> float:
|
|
57
145
|
"""docstring."""
|
|
@@ -59,11 +147,11 @@ class Seq(_Seq):
|
|
|
59
147
|
|
|
60
148
|
return _cai(self._data.upper().decode("ASCII"), organism=organism)
|
|
61
149
|
|
|
62
|
-
def rarecodons(self, organism: str = "sce") ->
|
|
150
|
+
def rarecodons(self, organism: str = "sce") -> List[slice]:
|
|
63
151
|
"""docstring."""
|
|
64
|
-
rare =
|
|
152
|
+
rare = rare_codons[organism]
|
|
65
153
|
s = self._data.upper().decode("ASCII")
|
|
66
|
-
slices:
|
|
154
|
+
slices: List[slice] = []
|
|
67
155
|
for i in range(0, len(self) // 3):
|
|
68
156
|
x, y = i * 3, i * 3 + 3
|
|
69
157
|
trip = s[x:y]
|
|
@@ -71,19 +159,19 @@ class Seq(_Seq):
|
|
|
71
159
|
slices.append(slice(x, y, 1))
|
|
72
160
|
return slices
|
|
73
161
|
|
|
74
|
-
def startcodon(self, organism: str = "sce") ->
|
|
162
|
+
def startcodon(self, organism: str = "sce") -> Optional[float]:
|
|
75
163
|
"""docstring."""
|
|
76
164
|
return _start[organism].get(self._data.upper().decode("ASCII")[:3])
|
|
77
165
|
|
|
78
|
-
def stopcodon(self, organism: str = "sce") ->
|
|
166
|
+
def stopcodon(self, organism: str = "sce") -> Optional[float]:
|
|
79
167
|
"""docstring."""
|
|
80
168
|
return _stop[organism].get(self._data.upper().decode("ASCII")[-3:])
|
|
81
169
|
|
|
82
|
-
def express(self, organism: str = "sce") ->
|
|
170
|
+
def express(self, organism: str = "sce") -> PrettyTable:
|
|
83
171
|
"""docstring."""
|
|
84
|
-
x =
|
|
172
|
+
x = PrettyTable(
|
|
85
173
|
["cds", "len", "cai", "gc", "sta", "stp", "n-end"]
|
|
86
|
-
+
|
|
174
|
+
+ rare_codons[organism]
|
|
87
175
|
+ ["rare"]
|
|
88
176
|
)
|
|
89
177
|
val = []
|
|
@@ -98,12 +186,12 @@ class Seq(_Seq):
|
|
|
98
186
|
val.append(self.startcodon())
|
|
99
187
|
val.append(self.stopcodon())
|
|
100
188
|
val.append(
|
|
101
|
-
_n_end[organism].get(
|
|
189
|
+
_n_end[organism].get(seq3(self[3:6].translate())),
|
|
102
190
|
)
|
|
103
191
|
s = self._data.upper().decode("ASCII")
|
|
104
192
|
trps = [s[i * 3 : i * 3 + 3] for i in range(0, len(s) // 3)]
|
|
105
193
|
tot = 0
|
|
106
|
-
for cdn in
|
|
194
|
+
for cdn in rare_codons[organism]:
|
|
107
195
|
cnt = trps.count(cdn)
|
|
108
196
|
tot += cnt
|
|
109
197
|
val.append(cnt)
|
|
@@ -111,13 +199,13 @@ class Seq(_Seq):
|
|
|
111
199
|
x.add_row(val)
|
|
112
200
|
return x
|
|
113
201
|
|
|
114
|
-
def orfs2(self, minsize: int = 30) ->
|
|
202
|
+
def orfs2(self, minsize: int = 30) -> List[str]:
|
|
115
203
|
"""docstring."""
|
|
116
|
-
orf =
|
|
117
|
-
f"ATG(?:...){{{minsize},}}?(?:TAG|TAA|TGA)", flags=
|
|
204
|
+
orf = re.compile(
|
|
205
|
+
f"ATG(?:...){{{minsize},}}?(?:TAG|TAA|TGA)", flags=re.IGNORECASE
|
|
118
206
|
)
|
|
119
207
|
start = 0
|
|
120
|
-
matches:
|
|
208
|
+
matches: List[slice] = []
|
|
121
209
|
s = self._data.decode("ASCII")
|
|
122
210
|
|
|
123
211
|
while True:
|
|
@@ -129,7 +217,7 @@ class Seq(_Seq):
|
|
|
129
217
|
break
|
|
130
218
|
return sorted([self[sl] for sl in matches], key=len, reverse=True)
|
|
131
219
|
|
|
132
|
-
def orfs(self, minsize: int = 100) ->
|
|
220
|
+
def orfs(self, minsize: int = 100) -> List[Tuple[int, int]]:
|
|
133
221
|
dna = self._data.decode("ASCII")
|
|
134
222
|
from pydna.utils import three_frame_orfs
|
|
135
223
|
|
|
@@ -154,18 +242,20 @@ class Seq(_Seq):
|
|
|
154
242
|
----------
|
|
155
243
|
.. [#] http://wiki.christophchamp.com/index.php/SEGUID
|
|
156
244
|
"""
|
|
157
|
-
return
|
|
245
|
+
return lsseguid(
|
|
246
|
+
self._data.decode("ascii").upper(), alphabet="{DNA-extended},AU"
|
|
247
|
+
)
|
|
158
248
|
|
|
159
|
-
def __getitem__(self, key):
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
249
|
+
# def __getitem__(self, key):
|
|
250
|
+
# result = super().__getitem__(key)
|
|
251
|
+
# try:
|
|
252
|
+
# result.__class__ = self.__class__
|
|
253
|
+
# except TypeError:
|
|
254
|
+
# pass
|
|
255
|
+
# return result
|
|
166
256
|
|
|
167
257
|
def reverse_complement(self):
|
|
168
|
-
return self.__class__(
|
|
258
|
+
return self.__class__(rc(self._data))
|
|
169
259
|
|
|
170
260
|
rc = reverse_complement
|
|
171
261
|
|
|
@@ -215,7 +305,7 @@ class ProteinSeq(_Seq):
|
|
|
215
305
|
----------
|
|
216
306
|
.. [#] http://wiki.christophchamp.com/index.php/SEGUID
|
|
217
307
|
"""
|
|
218
|
-
return
|
|
308
|
+
return lsseguid(
|
|
219
309
|
self._data.decode("utf8").upper(), alphabet="{protein-extended}"
|
|
220
310
|
)
|
|
221
311
|
|
pydna/seqrecord.py
CHANGED
|
@@ -14,34 +14,30 @@ the :class:`pydna._pretty_str.pretty_str` class instread of str for a
|
|
|
14
14
|
nicer output in the IPython shell.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
+
from Bio.SeqFeature import SeqFeature
|
|
18
|
+
from pydna._pretty import pretty_str as ps
|
|
17
19
|
|
|
18
|
-
from
|
|
19
|
-
from pydna.
|
|
20
|
+
from pydna.seq import ProteinSeq
|
|
21
|
+
from pydna.common_sub_strings import common_sub_strings
|
|
20
22
|
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
+
from Bio.Data.CodonTable import TranslationError
|
|
24
|
+
from Bio.SeqRecord import SeqRecord
|
|
25
|
+
from Bio.SeqFeature import SimpleLocation
|
|
26
|
+
from Bio.SeqFeature import CompoundLocation
|
|
27
|
+
from pydna.seq import Seq
|
|
28
|
+
from pydna._pretty import PrettyTable
|
|
23
29
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
from
|
|
27
|
-
from Bio.SeqFeature import CompoundLocation as _CompoundLocation
|
|
28
|
-
from pydna.seq import Seq as _Seq
|
|
29
|
-
from pydna._pretty import PrettyTable as _PrettyTable
|
|
30
|
-
|
|
31
|
-
import re as _re
|
|
32
|
-
import pickle as _pickle
|
|
33
|
-
from copy import copy as _copy
|
|
30
|
+
import re
|
|
31
|
+
import pickle
|
|
32
|
+
from copy import copy
|
|
34
33
|
|
|
35
34
|
from pydna import _PydnaWarning
|
|
36
|
-
from warnings import warn
|
|
35
|
+
from warnings import warn
|
|
37
36
|
|
|
38
|
-
# import logging as _logging
|
|
39
37
|
import datetime
|
|
40
38
|
|
|
41
|
-
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
42
|
-
|
|
43
39
|
|
|
44
|
-
class SeqRecord(
|
|
40
|
+
class SeqRecord(SeqRecord):
|
|
45
41
|
"""
|
|
46
42
|
A subclass of the Biopython SeqRecord class.
|
|
47
43
|
|
|
@@ -62,7 +58,7 @@ class SeqRecord(_SeqRecord):
|
|
|
62
58
|
letter_annotations=None,
|
|
63
59
|
):
|
|
64
60
|
if isinstance(seq, str):
|
|
65
|
-
seq =
|
|
61
|
+
seq = Seq(seq)
|
|
66
62
|
super().__init__(
|
|
67
63
|
seq,
|
|
68
64
|
id=id,
|
|
@@ -76,23 +72,21 @@ class SeqRecord(_SeqRecord):
|
|
|
76
72
|
self._fix_attributes()
|
|
77
73
|
|
|
78
74
|
def _fix_attributes(self):
|
|
79
|
-
self.id =
|
|
80
|
-
self.name =
|
|
81
|
-
self.description =
|
|
75
|
+
self.id = ps(self.id)
|
|
76
|
+
self.name = ps(self.name)
|
|
77
|
+
self.description = ps(self.description)
|
|
82
78
|
|
|
83
79
|
self.annotations.update({"molecule_type": "DNA"})
|
|
84
80
|
self.map_target = None
|
|
85
81
|
|
|
86
82
|
if not hasattr(self.seq, "transcribe"):
|
|
87
|
-
self.seq =
|
|
83
|
+
self.seq = Seq(self.seq)
|
|
88
84
|
|
|
89
85
|
self.seq._data = b"".join(self.seq._data.split()) # remove whitespaces
|
|
90
|
-
self.annotations = {
|
|
91
|
-
_pretty_str(k): _pretty_str(v) for k, v in self.annotations.items()
|
|
92
|
-
}
|
|
86
|
+
self.annotations = {ps(k): ps(v) for k, v in self.annotations.items()}
|
|
93
87
|
|
|
94
88
|
@classmethod
|
|
95
|
-
def from_Bio_SeqRecord(clc, sr:
|
|
89
|
+
def from_Bio_SeqRecord(clc, sr: SeqRecord):
|
|
96
90
|
"""Creates a pydnaSeqRecord from a Biopython SeqRecord."""
|
|
97
91
|
# https://stackoverflow.com/questions/15404256/changing-the-\
|
|
98
92
|
# class-of-a-python-object-casting
|
|
@@ -110,7 +104,7 @@ class SeqRecord(_SeqRecord):
|
|
|
110
104
|
"""Alias for name property."""
|
|
111
105
|
if len(value) > 16:
|
|
112
106
|
shortvalue = value[:16]
|
|
113
|
-
|
|
107
|
+
warn(
|
|
114
108
|
("locus property {} truncated" "to 16 chars {}").format(
|
|
115
109
|
value, shortvalue
|
|
116
110
|
),
|
|
@@ -189,7 +183,7 @@ class SeqRecord(_SeqRecord):
|
|
|
189
183
|
"""
|
|
190
184
|
try:
|
|
191
185
|
self.seq.translate(table=table, cds=True)
|
|
192
|
-
except
|
|
186
|
+
except TranslationError:
|
|
193
187
|
return False
|
|
194
188
|
else:
|
|
195
189
|
return True
|
|
@@ -197,7 +191,7 @@ class SeqRecord(_SeqRecord):
|
|
|
197
191
|
def translate(self):
|
|
198
192
|
"""docstring."""
|
|
199
193
|
p = super().translate()
|
|
200
|
-
return ProteinSeqRecord(
|
|
194
|
+
return ProteinSeqRecord(ProteinSeq(p.seq))
|
|
201
195
|
|
|
202
196
|
def add_colors_to_features_for_ape(self):
|
|
203
197
|
"""Assign colors to features.
|
|
@@ -296,19 +290,19 @@ class SeqRecord(_SeqRecord):
|
|
|
296
290
|
qualifiers["label"] = ["orf{}".format(y - x)]
|
|
297
291
|
|
|
298
292
|
try:
|
|
299
|
-
location =
|
|
293
|
+
location = SimpleLocation(x, y, strand=strand)
|
|
300
294
|
except ValueError as err:
|
|
301
295
|
if self.circular:
|
|
302
|
-
location =
|
|
296
|
+
location = CompoundLocation(
|
|
303
297
|
(
|
|
304
|
-
|
|
305
|
-
|
|
298
|
+
SimpleLocation(x, len(self.seq), strand=strand),
|
|
299
|
+
SimpleLocation(0, y, strand=strand),
|
|
306
300
|
)
|
|
307
301
|
)
|
|
308
302
|
else:
|
|
309
303
|
raise err
|
|
310
304
|
|
|
311
|
-
sf =
|
|
305
|
+
sf = SeqFeature(location, type=type_, qualifiers=qualifiers)
|
|
312
306
|
|
|
313
307
|
self.features.append(sf)
|
|
314
308
|
|
|
@@ -333,7 +327,7 @@ class SeqRecord(_SeqRecord):
|
|
|
333
327
|
| 0 | L:ft2 | --> | 2 | 4 | 2 | misc | no |
|
|
334
328
|
+-----+---------------+-----+-----+-----+-----+------+------+
|
|
335
329
|
"""
|
|
336
|
-
x =
|
|
330
|
+
x = PrettyTable(
|
|
337
331
|
["Ft#", "Label or Note", "Dir", "Sta", "End", "Len", "type", "orf?"]
|
|
338
332
|
)
|
|
339
333
|
x.align["Ft#"] = "r" # Left align
|
|
@@ -444,7 +438,7 @@ class SeqRecord(_SeqRecord):
|
|
|
444
438
|
result = self.annotations.get("comment", "")
|
|
445
439
|
if newcomment:
|
|
446
440
|
self.annotations["comment"] = (result + "\n" + newcomment).strip()
|
|
447
|
-
result =
|
|
441
|
+
result = ps(self.annotations["comment"])
|
|
448
442
|
return result
|
|
449
443
|
|
|
450
444
|
def datefunction():
|
|
@@ -481,18 +475,18 @@ class SeqRecord(_SeqRecord):
|
|
|
481
475
|
"""
|
|
482
476
|
chksum = self.seq.seguid()
|
|
483
477
|
oldcomment = self.annotations.get("comment", "")
|
|
484
|
-
oldstamp =
|
|
478
|
+
oldstamp = re.findall(r"..seguid=\S{27}", oldcomment)
|
|
485
479
|
if oldstamp and oldstamp[0] == chksum:
|
|
486
|
-
return
|
|
480
|
+
return ps(oldstamp[0])
|
|
487
481
|
elif oldstamp:
|
|
488
|
-
|
|
482
|
+
warn(
|
|
489
483
|
f"Stamp change.\nNew: {chksum}\nOld: {oldstamp[0]}",
|
|
490
484
|
_PydnaWarning,
|
|
491
485
|
)
|
|
492
486
|
self.annotations["comment"] = (
|
|
493
487
|
f"{oldcomment}\n" f"{tool} {chksum} {now()} {comment}"
|
|
494
488
|
).strip()
|
|
495
|
-
return
|
|
489
|
+
return ps(chksum)
|
|
496
490
|
|
|
497
491
|
def lcs(self, other, *args, limit=25, **kwargs):
|
|
498
492
|
"""Return the longest common substring between the sequence.
|
|
@@ -533,16 +527,16 @@ class SeqRecord(_SeqRecord):
|
|
|
533
527
|
else:
|
|
534
528
|
r = str(other.lower())
|
|
535
529
|
|
|
536
|
-
olaps =
|
|
530
|
+
olaps = common_sub_strings(str(self.seq).lower(), r, limit=limit or 25)
|
|
537
531
|
|
|
538
532
|
try:
|
|
539
533
|
start_in_self, start_in_other, length = olaps.pop(0)
|
|
540
534
|
except IndexError:
|
|
541
|
-
result =
|
|
535
|
+
result = SeqFeature()
|
|
542
536
|
else:
|
|
543
537
|
label = "sequence" if not hasattr(other, "name") else other.name
|
|
544
|
-
result =
|
|
545
|
-
|
|
538
|
+
result = SeqFeature(
|
|
539
|
+
SimpleLocation(start_in_self, start_in_self + length, strand=1),
|
|
546
540
|
type=kwargs.get("type") or "read",
|
|
547
541
|
qualifiers={
|
|
548
542
|
"label": [kwargs.get("label") or label],
|
|
@@ -566,8 +560,8 @@ class SeqRecord(_SeqRecord):
|
|
|
566
560
|
for slc in self.seq.rarecodons(organism):
|
|
567
561
|
cdn = self.seq._data[slc].decode("ASCII")
|
|
568
562
|
sfs.append(
|
|
569
|
-
|
|
570
|
-
|
|
563
|
+
SeqFeature(
|
|
564
|
+
SimpleLocation(slc.start, slc.stop),
|
|
571
565
|
type=f"rare_codon_{organism}",
|
|
572
566
|
qualifiers={"label": [cdn]},
|
|
573
567
|
)
|
|
@@ -588,7 +582,7 @@ class SeqRecord(_SeqRecord):
|
|
|
588
582
|
|
|
589
583
|
def copy(self):
|
|
590
584
|
"""docstring."""
|
|
591
|
-
return
|
|
585
|
+
return copy(self)
|
|
592
586
|
|
|
593
587
|
def __lt__(self, other):
|
|
594
588
|
"""docstring."""
|
|
@@ -625,11 +619,11 @@ class SeqRecord(_SeqRecord):
|
|
|
625
619
|
|
|
626
620
|
def __str__(self):
|
|
627
621
|
"""docstring."""
|
|
628
|
-
return
|
|
622
|
+
return ps(super().__str__())
|
|
629
623
|
|
|
630
624
|
def __repr__(self):
|
|
631
625
|
"""docstring."""
|
|
632
|
-
return
|
|
626
|
+
return ps(super().__repr__())
|
|
633
627
|
|
|
634
628
|
def __format__(self, format):
|
|
635
629
|
"""docstring."""
|
|
@@ -641,14 +635,14 @@ class SeqRecord(_SeqRecord):
|
|
|
641
635
|
return text
|
|
642
636
|
|
|
643
637
|
if format == "pydnafasta":
|
|
644
|
-
return
|
|
638
|
+
return ps(
|
|
645
639
|
f">{self.id} {len(self)} bp {dict(((True, 'circular'), (False, 'linear')))[self.seq.circular]}\n{str(self.seq)}\n"
|
|
646
640
|
)
|
|
647
641
|
if format == "primer":
|
|
648
|
-
return
|
|
642
|
+
return ps(
|
|
649
643
|
f">{self.id} {len(self)}-mer{removeprefix(self.description, self.name).strip()}\n{str(self.seq)}\n"
|
|
650
644
|
)
|
|
651
|
-
return
|
|
645
|
+
return ps(super().__format__(format))
|
|
652
646
|
|
|
653
647
|
def __add__(self, other):
|
|
654
648
|
"""docstring."""
|
|
@@ -664,9 +658,7 @@ class SeqRecord(_SeqRecord):
|
|
|
664
658
|
|
|
665
659
|
def __getitem__(self, index):
|
|
666
660
|
"""docstring."""
|
|
667
|
-
from pydna.utils import
|
|
668
|
-
identifier_from_string as _identifier_from_string,
|
|
669
|
-
) # TODO: clean this up
|
|
661
|
+
from pydna.utils import identifier_from_string
|
|
670
662
|
|
|
671
663
|
answer = super().__getitem__(index)
|
|
672
664
|
if len(answer) < 2:
|
|
@@ -678,8 +670,8 @@ class SeqRecord(_SeqRecord):
|
|
|
678
670
|
identifier = " ".join(sf.qualifiers["label"])
|
|
679
671
|
elif "note" in sf.qualifiers:
|
|
680
672
|
identifier = " ".join(sf.qualifiers["note"])
|
|
681
|
-
answer.id =
|
|
682
|
-
answer.name =
|
|
673
|
+
answer.id = identifier_from_string(identifier)[:16]
|
|
674
|
+
answer.name = identifier_from_string(f"part_{self.name}")[:16]
|
|
683
675
|
return answer
|
|
684
676
|
|
|
685
677
|
def __bool__(self):
|
|
@@ -705,8 +697,8 @@ class SeqRecord(_SeqRecord):
|
|
|
705
697
|
if not pth.suffix:
|
|
706
698
|
pth = pth.with_suffix(".pickle")
|
|
707
699
|
with open(pth, "wb") as f:
|
|
708
|
-
|
|
709
|
-
return
|
|
700
|
+
pickle.dump(self, f, protocol=protocol)
|
|
701
|
+
return ps(pth)
|
|
710
702
|
|
|
711
703
|
|
|
712
704
|
class ProteinSeqRecord(SeqRecord):
|
|
@@ -739,4 +731,4 @@ class ProteinSeqRecord(SeqRecord):
|
|
|
739
731
|
|
|
740
732
|
def __format__(self, format):
|
|
741
733
|
"""docstring."""
|
|
742
|
-
return
|
|
734
|
+
return ps(SeqRecord.__format__(self, format))
|
pydna/sequence_picker.py
CHANGED
|
@@ -6,16 +6,13 @@
|
|
|
6
6
|
# as part of this package.
|
|
7
7
|
|
|
8
8
|
from pydna.dseqrecord import Dseqrecord
|
|
9
|
-
import os
|
|
9
|
+
import os
|
|
10
10
|
|
|
11
|
-
# import logging as _logging
|
|
12
11
|
from Bio.Blast import NCBIWWW
|
|
13
12
|
from Bio.Blast import NCBIXML
|
|
14
13
|
|
|
15
|
-
# _module_logger = _logging.getLogger("pydna." + __name__)
|
|
16
14
|
|
|
17
|
-
|
|
18
|
-
email = _os.getenv("pydna_email")
|
|
15
|
+
email = os.getenv("pydna_email")
|
|
19
16
|
tool = "pydna"
|
|
20
17
|
|
|
21
18
|
|
pydna/sequence_regex.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
from pydna.dseqrecord import Dseqrecord
|
|
2
|
+
from pydna.dseqrecord import Dseqrecord
|
|
3
3
|
import re
|
|
4
|
-
from Bio.Data.IUPACData import ambiguous_dna_values
|
|
4
|
+
from Bio.Data.IUPACData import ambiguous_dna_values
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
custom_ambiguous_only_dna_values = {**ambiguous_dna_values}
|
|
7
7
|
for normal_base in "ACGT":
|
|
8
|
-
del
|
|
8
|
+
del custom_ambiguous_only_dna_values[normal_base]
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def compute_regex_site(site: str) -> str:
|
|
@@ -19,7 +19,7 @@ def compute_regex_site(site: str) -> str:
|
|
|
19
19
|
The regex pattern.
|
|
20
20
|
"""
|
|
21
21
|
upper_site = site.upper()
|
|
22
|
-
for k, v in
|
|
22
|
+
for k, v in custom_ambiguous_only_dna_values.items():
|
|
23
23
|
if len(v) > 1:
|
|
24
24
|
upper_site = upper_site.replace(k, f"[{''.join(v)}]")
|
|
25
25
|
|
|
@@ -28,7 +28,7 @@ def compute_regex_site(site: str) -> str:
|
|
|
28
28
|
return upper_site
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def dseqrecord_finditer(pattern: str, seq:
|
|
31
|
+
def dseqrecord_finditer(pattern: str, seq: Dseqrecord) -> list[re.Match]:
|
|
32
32
|
"""
|
|
33
33
|
Finds all matches of a regex pattern in a Dseqrecord.
|
|
34
34
|
|