pydna 5.5.3__py3-none-any.whl → 5.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydna/amplicon.py CHANGED
@@ -10,22 +10,17 @@
10
10
  This class is not meant to be use directly but is
11
11
  used by the :mod:`amplify` module"""
12
12
 
13
- from pydna.tm import dbd_program as _dbd_program
14
- from pydna.tm import program as _program
15
- from pydna.primer import Primer as _Primer
16
- from pydna._pretty import pretty_str as _pretty_str
17
- from pydna.dseqrecord import Dseqrecord as _Dseqrecord
18
- from pydna.seqrecord import SeqRecord as _SeqRecord
19
- import textwrap as _textwrap
20
- import copy as _copy
13
+ from pydna.tm import dbd_program
14
+ from pydna.tm import program
15
+ from pydna.primer import Primer
16
+ from pydna._pretty import pretty_str
17
+ from pydna.dseqrecord import Dseqrecord
18
+ from pydna.seqrecord import SeqRecord
19
+ import textwrap
20
+ import copy
21
21
 
22
- # import logging as _logging
23
22
 
24
-
25
- # _module_logger = _logging.getLogger("pydna." + __name__)
26
-
27
-
28
- class Amplicon(_Dseqrecord):
23
+ class Amplicon(Dseqrecord):
29
24
  """The Amplicon class holds information about a PCR reaction involving two
30
25
  primers and one template. This class is used by the Anneal class and is not
31
26
  meant to be instantiated directly.
@@ -69,12 +64,12 @@ class Amplicon(_Dseqrecord):
69
64
  return obj
70
65
 
71
66
  def __getitem__(self, sl):
72
- answer = _copy.copy(self)
67
+ answer = copy.copy(self)
73
68
  answer.seq = answer.seq.__getitem__(sl)
74
69
  # answer.seq.alphabet = self.seq.alphabet
75
- sr = _SeqRecord("n" * len(self))
70
+ sr = SeqRecord("n" * len(self))
76
71
  sr.features = self.features
77
- answer.features = _SeqRecord.__getitem__(sr, sl).features
72
+ answer.features = SeqRecord.__getitem__(sr, sl).features
78
73
  return answer
79
74
 
80
75
  def __repr__(self):
@@ -90,8 +85,8 @@ class Amplicon(_Dseqrecord):
90
85
  def reverse_complement(self):
91
86
  r = type(self)(super().reverse_complement())
92
87
  r.template = self.template.rc()
93
- r.forward_primer = _copy.copy(self.reverse_primer)
94
- r.reverse_primer = _copy.copy(self.forward_primer)
88
+ r.forward_primer = copy.copy(self.reverse_primer)
89
+ r.reverse_primer = copy.copy(self.forward_primer)
95
90
  r.forward_primer.position, r.reverse_primer.position = (
96
91
  r.reverse_primer.position,
97
92
  r.forward_primer.position,
@@ -143,23 +138,23 @@ class Amplicon(_Dseqrecord):
143
138
  {" " * ft}3{fzc}...{rzc}5
144
139
  """
145
140
  # breakpoint()
146
- return _pretty_str(_textwrap.dedent(f).strip("\n"))
141
+ return pretty_str(textwrap.dedent(f).strip("\n"))
147
142
 
148
143
  def set_forward_primer_footprint(self, length):
149
- self.forward_primer = _Primer(
144
+ self.forward_primer = Primer(
150
145
  self.forward_primer.tail + self.seq[:length], footprint=length
151
146
  )
152
147
 
153
148
  def set_reverse_primer_footprint(self, length):
154
- self.reverse_primer = _Primer(
149
+ self.reverse_primer = Primer(
155
150
  self.reverse_primer.tail + self.seq[:length], footprint=length
156
151
  )
157
152
 
158
153
  def program(self):
159
- return _program(self)
154
+ return program(self)
160
155
 
161
156
  def dbd_program(self):
162
- return _dbd_program(self)
157
+ return dbd_program(self)
163
158
 
164
159
  def primers(self):
165
160
  return self.forward_primer, self.reverse_primer
pydna/amplify.py CHANGED
@@ -13,48 +13,22 @@ PCR product. The Anneal class should be used if more flexibility is required.
13
13
  Primers with 5' tails as well as inverse PCR on circular templates are handled
14
14
  correctly."""
15
15
 
16
- from pydna._pretty import pretty_str as _pretty_str
17
- from pydna.utils import flatten as _flatten
18
-
19
- # from pydna.utils import memorize as _memorize
20
- from pydna.utils import rc as _rc, shift_feature as _shift_feature
21
- from pydna.amplicon import Amplicon as _Amplicon
22
- from pydna.primer import Primer as _Primer
23
- from pydna.seqrecord import SeqRecord as _SeqRecord
24
- from pydna.dseqrecord import Dseqrecord as _Dseqrecord
25
- from Bio.SeqFeature import SeqFeature as _SeqFeature
26
- from Bio.SeqFeature import SimpleLocation as _SimpleLocation
27
- from Bio.SeqFeature import CompoundLocation as _CompoundLocation
28
- from pydna.seq import Seq as _Seq
29
- import itertools as _itertools
30
- import re as _re
31
- import copy as _copy
32
- import operator as _operator
33
-
34
- # import os as _os
35
-
36
- # import logging as _logging
37
-
38
- # _module_logger = _logging.getLogger("pydna." + __name__)
39
-
40
- _table = { # IUPAC Ambiguity Codes for Nucleotide Degeneracy and U for Uracile
41
- "A": "A",
42
- "C": "C",
43
- "G": "G",
44
- "T": "T",
45
- "U": "A", # XXX
46
- "R": "(A|G)",
47
- "Y": "(C|T)",
48
- "S": "(G|C)",
49
- "W": "(A|T)",
50
- "K": "(G|T)",
51
- "M": "(A|C)",
52
- "B": "(C|G|T)",
53
- "D": "(A|G|T)",
54
- "H": "(A|C|T)",
55
- "V": "(A|C|G)",
56
- "N": "(A|G|C|T)",
57
- }
16
+ from pydna._pretty import pretty_str
17
+ from pydna.utils import flatten
18
+ from pydna.utils import shift_feature
19
+ from pydna.amplicon import Amplicon
20
+ from pydna.primer import Primer
21
+ from pydna.seqrecord import SeqRecord
22
+ from pydna.dseqrecord import Dseqrecord
23
+ from Bio.SeqFeature import SeqFeature
24
+ from Bio.SeqFeature import SimpleLocation
25
+ from Bio.SeqFeature import CompoundLocation
26
+ from pydna.seq import Seq
27
+ import re
28
+ import copy
29
+ import operator
30
+ from pydna.alphabet import iupac_compl_regex
31
+ from pydna.utils import anneal_from_left
58
32
 
59
33
 
60
34
  def _annealing_positions(primer, template, limit):
@@ -70,13 +44,14 @@ def _annealing_positions(primer, template, limit):
70
44
 
71
45
  <- - - - - - - - - - template - - - - - - - - - - - - - >
72
46
 
73
- <------- start (int) ------>
74
- 5'-...gctactacacacgtactgactgcctccaagatagagtcagtaaccacactcgat...3'
47
+ < ----- start = 26 ------>
48
+ 5'- gctactacacacgtactgactgcctccaagatagagtcagtaaccacactcgatag...3'
75
49
  ||||||||||||||||||||||||||||||||||||||||||||||||
76
50
  3'-gttctatctcagtcattggtgtATAGTG-5'
77
51
 
78
52
  <-footprint length -->
79
53
 
54
+
80
55
  Parameters
81
56
  ----------
82
57
  primer : string
@@ -85,7 +60,7 @@ def _annealing_positions(primer, template, limit):
85
60
  template : string
86
61
  The template sequence 5'-3'
87
62
 
88
- limit : int = 15, optional
63
+ limit : int
89
64
  footprint needs to be at least of length limit.
90
65
 
91
66
  Returns
@@ -94,32 +69,37 @@ def _annealing_positions(primer, template, limit):
94
69
  [ (start1, footprint1), (start2, footprint2) ,..., ]
95
70
  """
96
71
 
72
+ # under_tail
73
+ # anchor AACCACACTCGAT
74
+ # CAAGATAGAGTCAGT
75
+ # |||||||||||||||
76
+ # gttctatctcagtca
77
+ # ttggtgtATAGTG revprimer
78
+ # tail
79
+ #
80
+ # | <- limit -> |
81
+
97
82
  # return empty list if primer too short
98
83
  if len(primer) < limit:
99
84
  return []
100
85
 
101
- prc = _rc(primer)
86
+ revprimer = primer[::-1]
102
87
 
103
88
  # head is minimum part of primer that must anneal
104
- head = prc[:limit].upper()
89
+ head = revprimer[:limit].upper()
90
+ tail = revprimer[limit:].upper()
105
91
 
106
92
  # Make regex pattern that reflects extended IUPAC DNA code
107
- head = "".join(_table[key] for key in head)
108
-
109
- positions = [m.start() for m in _re.finditer(f"(?={head})", template, _re.I)]
110
-
111
- if positions:
112
- tail = prc[limit:].lower()
113
- length = len(tail)
114
- results = []
115
- for match_start in positions:
116
- tm = template[match_start + limit : match_start + limit + length].lower()
117
- footprint = len(
118
- list(_itertools.takewhile(lambda x: x[0] == x[1], zip(tail, tm)))
119
- )
120
- results.append((match_start, footprint + limit))
121
- return results
122
- return []
93
+ head_regex = "".join(iupac_compl_regex[key] for key in head)
94
+ primer_regex = f"(?:({head_regex})(.{{0,{len(primer) - limit}}}))"
95
+
96
+ results = []
97
+ for m in re.finditer(primer_regex, template.upper()):
98
+ anchor, under_tail = m.groups()
99
+ match_start = m.start()
100
+ match_extension = anneal_from_left(tail, under_tail[::-1])
101
+ results.append((match_start, limit + match_extension))
102
+ return results
123
103
 
124
104
 
125
105
  # class _Memoize(type):
@@ -219,7 +199,7 @@ class Anneal(object): # ), metaclass=_Memoize):
219
199
 
220
200
  """
221
201
  self.primers = primers
222
- self.template = _copy.deepcopy(template)
202
+ self.template = copy.deepcopy(template)
223
203
 
224
204
  self.limit = limit
225
205
  self.kwargs = kwargs
@@ -242,7 +222,7 @@ class Anneal(object): # ), metaclass=_Memoize):
242
222
  for p in self.primers:
243
223
  self.forward_primers.extend(
244
224
  (
245
- _Primer(
225
+ Primer(
246
226
  p,
247
227
  # template = self.template,
248
228
  position=tcl - pos - min(self.template.seq.ovhg, 0),
@@ -254,7 +234,7 @@ class Anneal(object): # ), metaclass=_Memoize):
254
234
  )
255
235
  self.reverse_primers.extend(
256
236
  (
257
- _Primer(
237
+ Primer(
258
238
  p,
259
239
  # template = self.template,
260
240
  position=pos + max(0, self.template.seq.ovhg),
@@ -265,16 +245,16 @@ class Anneal(object): # ), metaclass=_Memoize):
265
245
  )
266
246
  )
267
247
 
268
- self.forward_primers.sort(key=_operator.attrgetter("position"))
269
- self.reverse_primers.sort(key=_operator.attrgetter("position"), reverse=True)
248
+ self.forward_primers.sort(key=operator.attrgetter("position"))
249
+ self.reverse_primers.sort(key=operator.attrgetter("position"), reverse=True)
270
250
 
271
251
  for fp in self.forward_primers:
272
252
  if fp.position - fp._fp >= 0:
273
253
  start = fp.position - fp._fp
274
254
  end = fp.position
275
255
  self.template.features.append(
276
- _SeqFeature(
277
- _SimpleLocation(start, end, strand=1),
256
+ SeqFeature(
257
+ SimpleLocation(start, end, strand=1),
278
258
  type="primer_bind",
279
259
  qualifiers={
280
260
  "label": [fp.name],
@@ -287,11 +267,11 @@ class Anneal(object): # ), metaclass=_Memoize):
287
267
  else:
288
268
  start = len(self.template) - fp._fp + fp.position
289
269
  end = start + fp._fp - len(self.template)
290
- sf = _SeqFeature(
291
- _CompoundLocation(
270
+ sf = SeqFeature(
271
+ CompoundLocation(
292
272
  [
293
- _SimpleLocation(start, len(self.template)),
294
- _SimpleLocation(0, end),
273
+ SimpleLocation(start, len(self.template)),
274
+ SimpleLocation(0, end),
295
275
  ]
296
276
  ),
297
277
  type="primer_bind",
@@ -309,8 +289,8 @@ class Anneal(object): # ), metaclass=_Memoize):
309
289
  start = rp.position
310
290
  end = rp.position + rp._fp
311
291
  self.template.features.append(
312
- _SeqFeature(
313
- _SimpleLocation(start, end, strand=-1),
292
+ SeqFeature(
293
+ SimpleLocation(start, end, strand=-1),
314
294
  type="primer_bind",
315
295
  qualifiers={
316
296
  "label": [rp.name],
@@ -324,11 +304,11 @@ class Anneal(object): # ), metaclass=_Memoize):
324
304
  start = rp.position
325
305
  end = rp.position + rp._fp - len(self.template)
326
306
  self.template.features.append(
327
- _SeqFeature(
328
- _CompoundLocation(
307
+ SeqFeature(
308
+ CompoundLocation(
329
309
  [
330
- _SimpleLocation(0, end, strand=-1),
331
- _SimpleLocation(start, len(self.template), strand=-1),
310
+ SimpleLocation(0, end, strand=-1),
311
+ SimpleLocation(start, len(self.template), strand=-1),
332
312
  ],
333
313
  ),
334
314
  type="primer_bind",
@@ -368,15 +348,15 @@ class Anneal(object): # ), metaclass=_Memoize):
368
348
  continue
369
349
  # Shift features to the right if there was a tail
370
350
  shift_amount = len(fp.tail)
371
- feats = [_shift_feature(f, shift_amount, None) for f in feats]
351
+ feats = [shift_feature(f, shift_amount, None) for f in feats]
372
352
 
373
353
  if tpl.circular and fp.position == rp.position:
374
- prd = _Dseqrecord(fp) + _Dseqrecord(rp).reverse_complement()
354
+ prd = Dseqrecord(fp) + Dseqrecord(rp).reverse_complement()
375
355
  else:
376
356
  prd = (
377
- _Dseqrecord(fp)
357
+ Dseqrecord(fp)
378
358
  + tpl[fp.position : rp.position]
379
- + _Dseqrecord(rp).reverse_complement()
359
+ + Dseqrecord(rp).reverse_complement()
380
360
  )
381
361
  prd.features = feats
382
362
  full_tmpl_features = [
@@ -393,16 +373,16 @@ class Anneal(object): # ), metaclass=_Memoize):
393
373
  new_identifier = " ".join(ft.qualifiers["note"])
394
374
 
395
375
  from pydna.utils import (
396
- identifier_from_string as _identifier_from_string,
376
+ identifier_from_string,
397
377
  ) # TODO: clean this up
398
378
 
399
379
  prd.name = (
400
- _identifier_from_string(new_identifier)[:16]
380
+ identifier_from_string(new_identifier)[:16]
401
381
  or self.kwargs.get("name")
402
382
  or f"{len(prd)}bp_PCR_prod"[:16]
403
383
  )
404
384
  prd.id = (
405
- _identifier_from_string(new_identifier)[:16]
385
+ identifier_from_string(new_identifier)[:16]
406
386
  or self.kwargs.get("id")
407
387
  or f"{len(prd)}bp"[:16]
408
388
  )
@@ -410,7 +390,7 @@ class Anneal(object): # ), metaclass=_Memoize):
410
390
  "description"
411
391
  ) or "pcr_product_{}_{}".format(fp.description, rp.description)
412
392
 
413
- amplicon = _Amplicon(
393
+ amplicon = Amplicon(
414
394
  prd,
415
395
  template=tpl,
416
396
  forward_primer=fp,
@@ -456,12 +436,12 @@ class Anneal(object): # ), metaclass=_Memoize):
456
436
  )
457
437
  else:
458
438
  mystring += "No reverse primers anneal...\n"
459
- return _pretty_str(mystring.strip())
439
+ return pretty_str(mystring.strip())
460
440
 
461
441
  report = __str__
462
442
 
463
443
 
464
- def pcr(*args, **kwargs) -> _Amplicon:
444
+ def pcr(*args, **kwargs) -> Amplicon:
465
445
  """pcr is a convenience function for the Anneal class to simplify its
466
446
  usage, especially from the command line. If more than one or no PCR
467
447
  product is formed, a ValueError is raised.
@@ -523,15 +503,15 @@ tatcgactgtatcatctgatagcac")
523
503
 
524
504
  """
525
505
 
526
- output = _flatten(args) # flatten
506
+ output = flatten(args) # flatten
527
507
  new = []
528
508
  for s in output:
529
509
  if hasattr(s, "watson"):
530
- s = _SeqRecord(_Seq(s.watson))
510
+ s = SeqRecord(Seq(s.watson))
531
511
  elif hasattr(s, "transcribe"):
532
- s = _SeqRecord(s)
512
+ s = SeqRecord(s)
533
513
  elif isinstance(s, str):
534
- s = _SeqRecord(_Seq(s))
514
+ s = SeqRecord(Seq(s))
535
515
  elif hasattr(s, "features"):
536
516
  pass
537
517
  else:
@@ -546,7 +526,7 @@ tatcgactgtatcatctgatagcac")
546
526
  new = [new[0].forward_primer, new[0].reverse_primer, new[0].template]
547
527
 
548
528
  if not hasattr(new[-1].seq, "watson"):
549
- new[-1] = _Dseqrecord(s)
529
+ new[-1] = Dseqrecord(s)
550
530
 
551
531
  anneal_primers = Anneal(new[:-1], new[-1], **kwargs)
552
532