pydna 5.5.4__py3-none-any.whl → 5.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydna/gateway.py CHANGED
@@ -1,8 +1,8 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  from Bio.Seq import reverse_complement
3
- from pydna.dseqrecord import Dseqrecord as _Dseqrecord
3
+ from pydna.dseqrecord import Dseqrecord
4
4
  import re
5
- import itertools as _itertools
5
+ import itertools
6
6
  from Bio.SeqFeature import SimpleLocation, SeqFeature
7
7
  from pydna.utils import shift_location
8
8
  from pydna.sequence_regex import compute_regex_site, dseqrecord_finditer
@@ -79,7 +79,7 @@ primer_design_attB = {
79
79
 
80
80
 
81
81
  def gateway_overlap(
82
- seqx: _Dseqrecord, seqy: _Dseqrecord, reaction: str, greedy: bool
82
+ seqx: Dseqrecord, seqy: Dseqrecord, reaction: str, greedy: bool
83
83
  ) -> list[tuple[int, int, int]]:
84
84
  """
85
85
  Find gateway overlaps. If greedy is True, it uses a more greedy consensus site to find attP sites,
@@ -110,7 +110,7 @@ def gateway_overlap(
110
110
  if len(matches_y) == 0:
111
111
  continue
112
112
 
113
- for match_x, match_y in _itertools.product(matches_x, matches_y):
113
+ for match_x, match_y in itertools.product(matches_x, matches_y):
114
114
  # Find the overlap sequence within each match, and use the
115
115
  # core 7 pbs that are constant
116
116
  overlap_x = re.search(overlap_regex, match_x.group())
@@ -133,7 +133,7 @@ def gateway_overlap(
133
133
 
134
134
 
135
135
  def find_gateway_sites(
136
- seq: _Dseqrecord, greedy: bool
136
+ seq: Dseqrecord, greedy: bool
137
137
  ) -> dict[str, list[SimpleLocation]]:
138
138
  """Find all gateway sites in a sequence and return a dictionary with the name and positions of the sites."""
139
139
  gateway_sites = gateway_sites_greedy if greedy else gateway_sites_conservative
@@ -154,7 +154,7 @@ def find_gateway_sites(
154
154
  return out
155
155
 
156
156
 
157
- def annotate_gateway_sites(seq: _Dseqrecord, greedy: bool) -> _Dseqrecord:
157
+ def annotate_gateway_sites(seq: Dseqrecord, greedy: bool) -> Dseqrecord:
158
158
  sites = find_gateway_sites(seq, greedy)
159
159
  for site in sites:
160
160
  for loc in sites[site]:
pydna/gel.py CHANGED
@@ -9,7 +9,7 @@
9
9
 
10
10
  """docstring."""
11
11
 
12
- import math as _math
12
+ import math
13
13
  from pydna.ladders import GeneRuler_1kb_plus as _mwstd
14
14
 
15
15
 
@@ -31,8 +31,8 @@ def gel(
31
31
  samples=None, gel_length=600, margin=50, interpolator=interpolator(mwstd=_mwstd)
32
32
  ):
33
33
  import numpy as np
34
- from PIL import Image as Image
35
- from PIL import ImageDraw as ImageDraw
34
+ from PIL import Image
35
+ from PIL import ImageDraw
36
36
 
37
37
  """docstring."""
38
38
  max_intensity = 256
@@ -54,7 +54,7 @@ def gel(
54
54
 
55
55
  for lane_number, lane in enumerate(samples):
56
56
  for band in lane:
57
- log = _math.log(len(band), 10)
57
+ log = math.log(len(band), 10)
58
58
  height = (band.m() / (240 * log)) * 1e10
59
59
  peak_centre = interpolator(len(band)) * scale + start
60
60
  max_spread = 10
@@ -68,7 +68,7 @@ def gel(
68
68
  y2 = peak_centre + i
69
69
  intensity = (
70
70
  height
71
- * _math.exp(
71
+ * math.exp(
72
72
  -float(((y1 - peak_centre) ** 2)) / (2 * (band_spread**2))
73
73
  )
74
74
  * max_intensity
pydna/genbank.py CHANGED
@@ -11,21 +11,17 @@ The function can be used if the environmental variable **pydna_email** has
11
11
  been set to a valid email address. The easiest way to do this permanantly is to edit the
12
12
  `pydna.ini` file. See the documentation of :func:`pydna.open_config_folder`"""
13
13
 
14
- # from pydna.utils import memorize as _memorize
15
- from pydna.genbankrecord import GenbankRecord as _GenbankRecord
16
- from pydna.readers import read as _read
17
14
 
18
- from Bio import Entrez as _Entrez
19
- from typing import Literal as _Literal, Optional as _Optional
20
- import re as _re
21
- import os as _os
15
+ from pydna.opencloning_models import NCBISequenceSource
16
+ from pydna.readers import read
17
+ from pydna.dseqrecord import Dseqrecord
22
18
 
23
- # import logging as _logging
19
+ from Bio import Entrez
20
+ from Bio.SeqFeature import SimpleLocation
24
21
 
25
- # _module_logger = _logging.getLogger("pydna." + __name__)
26
-
27
-
28
- # TODO http://httpbin.org/ use for testing?
22
+ from typing import Literal, Optional
23
+ import re
24
+ import os
29
25
 
30
26
 
31
27
  class Genbank:
@@ -54,15 +50,11 @@ class Genbank:
54
50
  *,
55
51
  tool: str = "pydna",
56
52
  ) -> None:
57
- if not _re.match(
58
- r"[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}", users_email, _re.IGNORECASE
53
+ if not re.match(
54
+ r"[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}", users_email, re.IGNORECASE
59
55
  ):
60
56
  raise ValueError("email address {} is not valid.".format(users_email))
61
57
 
62
- # _module_logger.info("#### Genbank ititiation ####")
63
- # _module_logger.info("Genbank initiated with email: %s", users_email)
64
- # _module_logger.info("Genbank initiated with tool : %s", tool)
65
-
66
58
  if users_email == "someone@example.com":
67
59
  raise ValueError(
68
60
  "you have to set your email address in order to download from Genbank"
@@ -78,10 +70,10 @@ class Genbank:
78
70
  def nucleotide(
79
71
  self,
80
72
  item: str,
81
- seq_start: _Optional[int] = None,
82
- seq_stop: _Optional[int] = None,
83
- strand: _Literal[1, 2] = 1,
84
- ) -> _GenbankRecord:
73
+ seq_start: Optional[int] = None,
74
+ seq_stop: Optional[int] = None,
75
+ strand: Literal[1, 2] = 1,
76
+ ) -> Dseqrecord:
85
77
  """This method downloads a genbank nuclotide record from genbank. This method is
86
78
  cached by default. This can be controlled by editing the **pydna_cached_funcs** environment
87
79
  variable. The best way to do this permanently is to edit the edit the
@@ -120,7 +112,7 @@ class Genbank:
120
112
  "2", 2, "-" or "-1", the antisense (Crick) strand is returned, otherwise
121
113
  the sense (Watson) strand is returned.
122
114
 
123
- Result is returned as a :class:`pydna.genbankrecord.GenbankRecord` object.
115
+ Result is returned as a :class:`Dseqrecord` object.
124
116
 
125
117
  References
126
118
  ----------
@@ -129,15 +121,15 @@ class Genbank:
129
121
  .. [#] http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch
130
122
  """
131
123
  matches = (
132
- (1, _re.search(r"(REGION:\s(?P<start>\d+)\.\.(?P<stop>\d+))", item)),
124
+ (1, re.search(r"(REGION:\s(?P<start>\d+)\.\.(?P<stop>\d+))", item)),
133
125
  (
134
126
  2,
135
- _re.search(
127
+ re.search(
136
128
  r"(REGION: complement\((?P<start>\d+)\.\.(?P<stop>\d+)\))", item
137
129
  ),
138
130
  ),
139
- (1, _re.search(r"(:|\s)(?P<start>\d+)-(?P<stop>\d+)", item)),
140
- (2, _re.search(r"(:|\s)c(?P<start>\d+)-(?P<stop>\d+)", item)),
131
+ (1, re.search(r"(:|\s)(?P<start>\d+)-(?P<stop>\d+)", item)),
132
+ (2, re.search(r"(:|\s)c(?P<start>\d+)-(?P<stop>\d+)", item)),
141
133
  )
142
134
 
143
135
  for strand_, match in matches:
@@ -156,18 +148,10 @@ class Genbank:
156
148
  except (KeyError, AttributeError):
157
149
  strand = 1
158
150
 
159
- # _module_logger.info("#### Genbank download ####")
160
- # _module_logger.info("item %s", item)
161
- # _module_logger.info("start %s", seq_start)
162
- # _module_logger.info("stop %s", seq_stop)
163
-
164
- # _module_logger.info("strand %s", str(strand))
151
+ Entrez.email = self.email
152
+ Entrez.tool = self.tool
165
153
 
166
- _Entrez.email = self.email
167
- _Entrez.tool = self.tool
168
-
169
- # _module_logger.info("Entrez.email %s", self.email)
170
- text = _Entrez.efetch(
154
+ text = Entrez.efetch(
171
155
  db="nuccore",
172
156
  id=item,
173
157
  rettype="gbwithparts",
@@ -177,14 +161,30 @@ class Genbank:
177
161
  retmode="text",
178
162
  ).read()
179
163
 
180
- # _module_logger.info("text[:160] %s", text[:160])
164
+ result = read(text)
165
+ # TODO: Address this for cases where only one is defined
166
+ if seq_start is not None and seq_stop is not None:
167
+ location = SimpleLocation(
168
+ int(seq_start) - 1, int(seq_stop), -1 if strand == 2 else strand
169
+ )
170
+ elif seq_start is None and seq_stop is None:
171
+ location = None
172
+ elif seq_stop is not None:
173
+ location = SimpleLocation(0, int(seq_stop), -1 if strand == 2 else strand)
174
+ else:
175
+ st = int(seq_start) - 1
176
+ location = SimpleLocation(
177
+ st, st + len(result), -1 if strand == 2 else strand
178
+ )
181
179
 
182
- return _GenbankRecord(
183
- _read(text), item=item, start=seq_start, stop=seq_stop, strand=strand
180
+ result.source = NCBISequenceSource(
181
+ repository_id=item,
182
+ coordinates=location,
184
183
  )
184
+ return result
185
185
 
186
186
 
187
- def genbank(accession: str = "CS570233.1", *args, **kwargs) -> _GenbankRecord:
187
+ def genbank(accession: str = "CS570233.1", *args, email=None, **kwargs) -> Dseqrecord:
188
188
  """
189
189
  Download a genbank nuclotide record.
190
190
 
@@ -229,9 +229,6 @@ def genbank(accession: str = "CS570233.1", *args, **kwargs) -> _GenbankRecord:
229
229
  //
230
230
 
231
231
  """
232
- email = _os.getenv("pydna_email")
233
- # _module_logger.info("#### genbank function called ####")
234
- # _module_logger.info("email %s", email)
235
- # _module_logger.info("accession %s", email)
232
+ email = email or os.getenv("pydna_email")
236
233
  gb = Genbank(email)
237
234
  return gb.nucleotide(accession, *args, **kwargs)
pydna/genbankfixer.py CHANGED
@@ -24,63 +24,63 @@ This should not be a difficult fix. The returned result has two properties,
24
24
  which is the formatted genbank string."""
25
25
 
26
26
 
27
- import re as _re
28
- import pyparsing as _pp
27
+ import re
28
+ import pyparsing as pp
29
29
 
30
30
  GoodLocus = (
31
- _pp.Literal("LOCUS")
32
- + _pp.Word(_pp.alphas + _pp.nums + "-_()." + "\\").setResultsName("name")
33
- + _pp.Word(_pp.nums).setResultsName("size")
34
- + _pp.Suppress(_pp.CaselessLiteral("bp"))
35
- + _pp.Word(_pp.alphas + "-").setResultsName("seqtype")
36
- + (_pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular")).setResultsName(
31
+ pp.Literal("LOCUS")
32
+ + pp.Word(pp.alphas + pp.nums + "-_()." + "\\").setResultsName("name")
33
+ + pp.Word(pp.nums).setResultsName("size")
34
+ + pp.Suppress(pp.CaselessLiteral("bp"))
35
+ + pp.Word(pp.alphas + "-").setResultsName("seqtype")
36
+ + (pp.CaselessLiteral("linear") | pp.CaselessLiteral("circular")).setResultsName(
37
37
  "topology"
38
38
  )
39
- + _pp.Optional(_pp.Word(_pp.alphas), default=" ").setResultsName("divcode")
40
- + _pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date")
39
+ + pp.Optional(pp.Word(pp.alphas), default=" ").setResultsName("divcode")
40
+ + pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date")
41
41
  )
42
42
 
43
43
  # Older versions of ApE don't include a LOCUS name! Need separate def for this case:
44
44
  BrokenLocus1 = (
45
- _pp.Literal("LOCUS").setResultsName("name")
46
- + _pp.Word(_pp.nums).setResultsName("size")
47
- + _pp.Suppress(_pp.CaselessLiteral("bp"))
48
- + _pp.Word(_pp.alphas + "-").setResultsName("seqtype")
49
- + (_pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular")).setResultsName(
45
+ pp.Literal("LOCUS").setResultsName("name")
46
+ + pp.Word(pp.nums).setResultsName("size")
47
+ + pp.Suppress(pp.CaselessLiteral("bp"))
48
+ + pp.Word(pp.alphas + "-").setResultsName("seqtype")
49
+ + (pp.CaselessLiteral("linear") | pp.CaselessLiteral("circular")).setResultsName(
50
50
  "topology"
51
51
  )
52
- + _pp.Optional(_pp.Word(_pp.alphas), default=" ").setResultsName("divcode")
53
- + _pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date")
52
+ + pp.Optional(pp.Word(pp.alphas), default=" ").setResultsName("divcode")
53
+ + pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date")
54
54
  )
55
55
 
56
56
  # LOCUS YEplac181 5741 bp DNA SYN
57
57
  BrokenLocus2 = (
58
- _pp.Literal("LOCUS")
59
- + _pp.Word(_pp.alphas + _pp.nums + "-_()." + "\\").setResultsName("name")
60
- + _pp.Word(_pp.nums).setResultsName("size")
61
- + _pp.Suppress(_pp.CaselessLiteral("bp"))
62
- + _pp.Word(_pp.alphas + "-").setResultsName("seqtype")
63
- + _pp.Optional(
64
- _pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular"),
58
+ pp.Literal("LOCUS")
59
+ + pp.Word(pp.alphas + pp.nums + "-_()." + "\\").setResultsName("name")
60
+ + pp.Word(pp.nums).setResultsName("size")
61
+ + pp.Suppress(pp.CaselessLiteral("bp"))
62
+ + pp.Word(pp.alphas + "-").setResultsName("seqtype")
63
+ + pp.Optional(
64
+ pp.CaselessLiteral("linear") | pp.CaselessLiteral("circular"),
65
65
  default="linear",
66
66
  ).setResultsName("topology")
67
- + _pp.Optional(_pp.Word(_pp.alphas), default=" ").setResultsName("divcode")
68
- + _pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date")
67
+ + pp.Optional(pp.Word(pp.alphas), default=" ").setResultsName("divcode")
68
+ + pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date")
69
69
  )
70
70
 
71
71
  BrokenLocus3 = (
72
- _pp.Literal("LOCUS")
73
- + _pp.Word(_pp.alphas + _pp.nums + "-_()." + "\\").setResultsName("name")
74
- + _pp.Word(_pp.nums).setResultsName("size")
75
- + _pp.Suppress(_pp.CaselessLiteral("bp"))
76
- + _pp.Word(_pp.alphas + "-").setResultsName("seqtype")
77
- + _pp.Optional(
78
- _pp.CaselessLiteral("linear") | _pp.CaselessLiteral("circular"),
72
+ pp.Literal("LOCUS")
73
+ + pp.Word(pp.alphas + pp.nums + "-_()." + "\\").setResultsName("name")
74
+ + pp.Word(pp.nums).setResultsName("size")
75
+ + pp.Suppress(pp.CaselessLiteral("bp"))
76
+ + pp.Word(pp.alphas + "-").setResultsName("seqtype")
77
+ + pp.Optional(
78
+ pp.CaselessLiteral("linear") | pp.CaselessLiteral("circular"),
79
79
  default="linear",
80
80
  ).setResultsName("topology")
81
- + _pp.Word(_pp.alphas).setResultsName("divcode")
82
- + _pp.Optional(
83
- _pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date"),
81
+ + pp.Word(pp.alphas).setResultsName("divcode")
82
+ + pp.Optional(
83
+ pp.Regex(r"(\d{2})-(\S{3})-(\d{4})").setResultsName("date"),
84
84
  default="19-MAR-1970",
85
85
  ).setResultsName("date")
86
86
  )
@@ -95,14 +95,13 @@ LocusEntry = GoodLocus | BrokenLocus1 | BrokenLocus2 | BrokenLocus3
95
95
  # (Though these entries are generally useless when it comes to hacking on DNA)
96
96
 
97
97
  # All entries in a genbank file headed by an all-caps title with no space between start-of-line and title
98
- CapWord = _pp.Word("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
98
+ CapWord = pp.Word("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
99
99
  # after titled line, all subsequent lines have to have at least one space in front of them
100
100
  # this is how we split up the genbank record
101
- SpacedLine = _pp.White(min=1) + _pp.CharsNotIn("\n") + _pp.LineEnd()
101
+ SpacedLine = pp.White(min=1) + pp.CharsNotIn("\n") + pp.LineEnd()
102
102
  # HeaderLine = CapWord + CharsNotIn("\n") + LineEnd()
103
- GenericEntry = _pp.Group(
104
- CapWord
105
- + _pp.Combine(_pp.CharsNotIn("\n") + _pp.LineEnd() + _pp.ZeroOrMore(SpacedLine))
103
+ GenericEntry = pp.Group(
104
+ CapWord + pp.Combine(pp.CharsNotIn("\n") + pp.LineEnd() + pp.ZeroOrMore(SpacedLine))
106
105
  ).setResultsName("generics", listAllMatches=True)
107
106
 
108
107
 
@@ -135,28 +134,28 @@ GenericEntry = _pp.Group(
135
134
  #
136
135
  # if you don't know where something is, don't use it or guess and move on
137
136
 
138
- LPAREN = _pp.Suppress("(")
139
- RPAREN = _pp.Suppress(")")
140
- SEP = _pp.Suppress(_pp.Literal(".."))
137
+ LPAREN = pp.Suppress("(")
138
+ RPAREN = pp.Suppress(")")
139
+ SEP = pp.Suppress(pp.Literal(".."))
141
140
 
142
141
  # recognize numbers w. < & > uncertainty specs, then strip the <> chars to make it fixed
143
- gbIndex = _pp.Word(_pp.nums + "<>").setParseAction(
142
+ gbIndex = pp.Word(pp.nums + "<>").setParseAction(
144
143
  lambda s, l_, t: int(t[0].replace("<", "").replace(">", ""))
145
144
  )
146
- SimpleSlice = _pp.Group(gbIndex + SEP + gbIndex) | _pp.Group(gbIndex).setParseAction(
145
+ SimpleSlice = pp.Group(gbIndex + SEP + gbIndex) | pp.Group(gbIndex).setParseAction(
147
146
  lambda s, l_, t: [[t[0][0], t[0][0]]]
148
147
  )
149
148
 
150
149
  # recursive def for nested function syntax: f( g(), g() )
151
- complexSlice = _pp.Forward()
150
+ complexSlice = pp.Forward()
152
151
  (
153
152
  complexSlice
154
- << (_pp.Literal("complement") | _pp.Literal("join"))
153
+ << (pp.Literal("complement") | pp.Literal("join"))
155
154
  + LPAREN
156
- + (_pp.delimitedList(complexSlice) | _pp.delimitedList(SimpleSlice))
155
+ + (pp.delimitedList(complexSlice) | pp.delimitedList(SimpleSlice))
157
156
  + RPAREN
158
157
  )
159
- featLocation = _pp.Group(SimpleSlice | complexSlice)
158
+ featLocation = pp.Group(SimpleSlice | complexSlice)
160
159
 
161
160
 
162
161
  def parseGBLoc(s, l_, t):
@@ -183,7 +182,7 @@ featLocation.setParseAction(parseGBLoc)
183
182
 
184
183
 
185
184
  def strip_multiline(s, l_, t):
186
- whitespace = _re.compile("[\n]{1}[ ]+")
185
+ whitespace = re.compile("[\n]{1}[ ]+")
187
186
  return whitespace.sub(" ", t[0])
188
187
 
189
188
 
@@ -192,59 +191,57 @@ def toInt(s, l_, t):
192
191
 
193
192
 
194
193
  # Quoted KeyVal: /key="value"
195
- QuoteFeaturekeyval = _pp.Group(
196
- _pp.Suppress("/")
197
- + _pp.Word(_pp.alphas + _pp.nums + "_-")
198
- + _pp.Suppress("=")
199
- + _pp.QuotedString('"', multiline=True).setParseAction(strip_multiline)
194
+ QuoteFeaturekeyval = pp.Group(
195
+ pp.Suppress("/")
196
+ + pp.Word(pp.alphas + pp.nums + "_-")
197
+ + pp.Suppress("=")
198
+ + pp.QuotedString('"', multiline=True).setParseAction(strip_multiline)
200
199
  )
201
200
 
202
201
  # UnQuoted KeyVal: /key=value (I'm assuming it doesn't do multilines this way? wrong! ApE does store long labels this way! sigh.)
203
202
  # NoQuoteFeaturekeyval = Group(Suppress('/') + Word(alphas+nums+"_-") + Suppress('=') + OneOrMore(CharsNotIn("\n")) )
204
203
  keyvalspacedline = (
205
- _pp.White(exact=21)
206
- + _pp.CharsNotIn("/")
207
- + _pp.OneOrMore(_pp.CharsNotIn("\n"))
208
- + _pp.LineEnd()
204
+ pp.White(exact=21)
205
+ + pp.CharsNotIn("/")
206
+ + pp.OneOrMore(pp.CharsNotIn("\n"))
207
+ + pp.LineEnd()
209
208
  )
210
- NoQuoteFeaturekeyval = _pp.Group(
211
- _pp.Suppress("/")
212
- + _pp.Word(_pp.alphas + _pp.nums + "_-")
213
- + _pp.Suppress("=")
214
- + _pp.Combine(
215
- _pp.CharsNotIn("\n") + _pp.LineEnd() + _pp.ZeroOrMore(keyvalspacedline)
216
- )
209
+ NoQuoteFeaturekeyval = pp.Group(
210
+ pp.Suppress("/")
211
+ + pp.Word(pp.alphas + pp.nums + "_-")
212
+ + pp.Suppress("=")
213
+ + pp.Combine(pp.CharsNotIn("\n") + pp.LineEnd() + pp.ZeroOrMore(keyvalspacedline))
217
214
  )
218
215
 
219
216
  # Special Case for Numerical Vals: /bases=12 OR /bases="12"
220
- NumFeaturekeyval = _pp.Group(
221
- _pp.Suppress("/")
222
- + _pp.Word(_pp.alphas + _pp.nums + "_-")
223
- + _pp.Suppress("=")
224
- + (_pp.Suppress('"') + _pp.Word(_pp.nums).setParseAction(toInt) + _pp.Suppress('"'))
225
- | (_pp.Word(_pp.nums).setParseAction(toInt))
217
+ NumFeaturekeyval = pp.Group(
218
+ pp.Suppress("/")
219
+ + pp.Word(pp.alphas + pp.nums + "_-")
220
+ + pp.Suppress("=")
221
+ + (pp.Suppress('"') + pp.Word(pp.nums).setParseAction(toInt) + pp.Suppress('"'))
222
+ | (pp.Word(pp.nums).setParseAction(toInt))
226
223
  )
227
224
 
228
225
  # Key Only KeyVal: /pseudo
229
226
  # post-parse convert it into a pair to resemble the structure of the first three cases i.e. [pseudo, True]
230
- FlagFeaturekeyval = _pp.Group(
231
- _pp.Suppress("/") + _pp.Word(_pp.alphas + _pp.nums + "_-")
227
+ FlagFeaturekeyval = pp.Group(
228
+ pp.Suppress("/") + pp.Word(pp.alphas + pp.nums + "_-")
232
229
  ).setParseAction(lambda s, l_, t: [[t[0][0], True]])
233
230
 
234
- Feature = _pp.Group(
235
- _pp.Word(_pp.alphas + _pp.nums + "_-").setParseAction(
231
+ Feature = pp.Group(
232
+ pp.Word(pp.alphas + pp.nums + "_-").setParseAction(
236
233
  lambda s, l_, t: [["type", t[0]]]
237
234
  )
238
235
  + featLocation.setResultsName("location")
239
- + _pp.OneOrMore(
236
+ + pp.OneOrMore(
240
237
  NumFeaturekeyval | QuoteFeaturekeyval | NoQuoteFeaturekeyval | FlagFeaturekeyval
241
238
  )
242
239
  )
243
240
 
244
241
  FeaturesEntry = (
245
- _pp.Literal("FEATURES")
246
- + _pp.Literal("Location/Qualifiers")
247
- + _pp.Group(_pp.OneOrMore(Feature)).setResultsName("features")
242
+ pp.Literal("FEATURES")
243
+ + pp.Literal("Location/Qualifiers")
244
+ + pp.Group(pp.OneOrMore(Feature)).setResultsName("features")
248
245
  )
249
246
 
250
247
  # ===============================================================================
@@ -252,12 +249,12 @@ FeaturesEntry = (
252
249
 
253
250
  # sequence is just a column-spaced big table of dna nucleotides
254
251
  # should it recognize full IUPAC alphabet? NCBI uses n for unknown region
255
- Sequence = _pp.OneOrMore(
256
- _pp.Suppress(_pp.Word(_pp.nums)) + _pp.OneOrMore(_pp.Word("ACGTacgtNn"))
252
+ Sequence = pp.OneOrMore(
253
+ pp.Suppress(pp.Word(pp.nums)) + pp.OneOrMore(pp.Word("ACGTacgtNn"))
257
254
  )
258
255
 
259
256
  # Group( ) hides the setResultsName names def'd inside, such that one needs to first access this group and then access the dict of contents inside
260
- SequenceEntry = _pp.Suppress(_pp.Literal("ORIGIN")) + Sequence.setParseAction(
257
+ SequenceEntry = pp.Suppress(pp.Literal("ORIGIN")) + Sequence.setParseAction(
261
258
  lambda s, l_, t: "".join(t)
262
259
  ).setResultsName("sequence")
263
260
 
@@ -266,13 +263,13 @@ SequenceEntry = _pp.Suppress(_pp.Literal("ORIGIN")) + Sequence.setParseAction(
266
263
  # Final GenBank Parser
267
264
 
268
265
  # GB files with multiple records split by "//" sequence at beginning of line
269
- GBEnd = _pp.Literal("//")
266
+ GBEnd = pp.Literal("//")
270
267
 
271
268
  # Begin w. LOCUS, slurp all entries, then stop at the end!
272
- GB = LocusEntry + _pp.OneOrMore(FeaturesEntry | SequenceEntry | GenericEntry) + GBEnd
269
+ GB = LocusEntry + pp.OneOrMore(FeaturesEntry | SequenceEntry | GenericEntry) + GBEnd
273
270
 
274
271
  # NCBI often returns sets of GB files
275
- multipleGB = _pp.OneOrMore(_pp.Group(GB))
272
+ multipleGB = pp.OneOrMore(pp.Group(GB))
276
273
 
277
274
  # ===============================================================================
278
275
  # End Genbank Parser
@@ -284,7 +281,7 @@ multipleGB = _pp.OneOrMore(_pp.Group(GB))
284
281
 
285
282
 
286
283
  def strip_indent(str):
287
- whitespace = _re.compile("[\n]{1}(COMMENT){0,1}[ ]+")
284
+ whitespace = re.compile("[\n]{1}(COMMENT){0,1}[ ]+")
288
285
  return whitespace.sub("\n", str)
289
286
 
290
287
 
@@ -588,9 +585,9 @@ def gbtext_clean(gbtext):
588
585
 
589
586
  jseqlist = toJSON(gbtext)
590
587
  jseq = jseqlist.pop()
591
- from collections import namedtuple as _namedtuple
592
- from pydna._pretty import pretty_str as _pretty_str
588
+ from collections import namedtuple
589
+ from pydna._pretty import pretty_str as ps
593
590
 
594
- Result = _namedtuple("Result", "gbtext jseq")
595
- result = Result(_pretty_str(toGB(jseq).strip()), jseq)
591
+ Result = namedtuple("Result", "gbtext jseq")
592
+ result = Result(ps(toGB(jseq).strip()), jseq)
596
593
  return result
pydna/ladders.py CHANGED
@@ -16,17 +16,16 @@ a gel image. Exampel can be found in scripts/molecular_weight_standards.ods.
16
16
  """
17
17
 
18
18
 
19
- from pydna.fakeseq import FakeSeq as _FakeSeq
19
+ from pydna.fakeseq import FakeSeq
20
20
 
21
21
 
22
22
  PennStateLadder = [
23
- _FakeSeq(int(n))
24
- for n in (10000, 7750, 5000, 4000, 3000, 2000, 1500, 1000, 750, 500)
23
+ FakeSeq(int(n)) for n in (10000, 7750, 5000, 4000, 3000, 2000, 1500, 1000, 750, 500)
25
24
  ]
26
25
 
27
26
 
28
27
  GeneRuler_1kb = [
29
- _FakeSeq(int(n))
28
+ FakeSeq(int(n))
30
29
  for n in (
31
30
  10000,
32
31
  8000,
@@ -49,7 +48,7 @@ GeneRuler_1kb = [
49
48
  # https://docs.google.com/spreadsheets/d/1vN0y75ibxPrG6yJQjq1uF2FXP0L-qGSn_fzInUHeTs4/edit#gid=0
50
49
 
51
50
  GeneRuler_1kb_plus = [
52
- _FakeSeq(ln, n=n * 1e-15, rf=rf)
51
+ FakeSeq(ln, n=n * 1e-15, rf=rf)
53
52
  for ln, n, rf in (
54
53
  # (length, fmol, Rf )
55
54
  (20000, 1.538, 0.000),
@@ -72,7 +71,7 @@ GeneRuler_1kb_plus = [
72
71
 
73
72
 
74
73
  HI_LO_DNA_MARKER = [
75
- _FakeSeq(ln, n=n * 1e-15, rf=rf)
74
+ FakeSeq(ln, n=n * 1e-15, rf=rf)
76
75
  for ln, n, rf in (
77
76
  # (length, fmol, Rf )
78
77
  (10000, 4.545, 0.000),
@@ -121,16 +120,16 @@ HI_LO_DNA_MARKER = [
121
120
 
122
121
  FakeGel = [
123
122
  [
124
- _FakeSeq(1000),
125
- _FakeSeq(2000),
123
+ FakeSeq(1000),
124
+ FakeSeq(2000),
126
125
  ],
127
126
  [
128
- _FakeSeq(3000),
129
- _FakeSeq(4000),
127
+ FakeSeq(3000),
128
+ FakeSeq(4000),
130
129
  ],
131
130
  [
132
- _FakeSeq(5000),
133
- _FakeSeq(6000),
131
+ FakeSeq(5000),
132
+ FakeSeq(6000),
134
133
  ],
135
134
  PennStateLadder,
136
135
  ]