pydna 5.5.1__py3-none-any.whl → 5.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydna/seq.py CHANGED
@@ -25,9 +25,10 @@ from Bio.Seq import Seq as _Seq
25
25
  from pydna._pretty import PrettyTable as _PrettyTable
26
26
 
27
27
  from typing import List as _List, Optional as _Optional, Tuple as _Tuple
28
- import logging as _logging
29
28
 
30
- _module_logger = _logging.getLogger("pydna." + __name__)
29
+ # import logging as _logging
30
+
31
+ # _module_logger = _logging.getLogger("pydna." + __name__)
31
32
 
32
33
 
33
34
  class Seq(_Seq):
@@ -43,7 +44,9 @@ class Seq(_Seq):
43
44
  **kwargs,
44
45
  ) -> "ProteinSeq":
45
46
  """Translate.."""
46
- p = super().translate(*args, stop_symbol=stop_symbol, to_stop=to_stop, cds=cds, gap=gap, **kwargs)
47
+ p = super().translate(
48
+ *args, stop_symbol=stop_symbol, to_stop=to_stop, cds=cds, gap=gap, **kwargs
49
+ )
47
50
  return ProteinSeq(p._data)
48
51
 
49
52
  def gc(self) -> float:
@@ -78,10 +81,17 @@ class Seq(_Seq):
78
81
 
79
82
  def express(self, organism: str = "sce") -> _PrettyTable:
80
83
  """docstring."""
81
- x = _PrettyTable(["cds", "len", "cai", "gc", "sta", "stp", "n-end"] + _rare_codons[organism] + ["rare"])
84
+ x = _PrettyTable(
85
+ ["cds", "len", "cai", "gc", "sta", "stp", "n-end"]
86
+ + _rare_codons[organism]
87
+ + ["rare"]
88
+ )
82
89
  val = []
83
90
 
84
- val.append(f"{self._data.upper().decode('ASCII')[:3]}..." f"{self._data.upper().decode('ASCII')[-3:]}")
91
+ val.append(
92
+ f"{self._data.upper().decode('ASCII')[:3]}..."
93
+ f"{self._data.upper().decode('ASCII')[-3:]}"
94
+ )
85
95
  val.append(len(self) / 3)
86
96
  val.append(self.cai(organism))
87
97
  val.append(self.gc())
@@ -103,7 +113,9 @@ class Seq(_Seq):
103
113
 
104
114
  def orfs2(self, minsize: int = 30) -> _List[str]:
105
115
  """docstring."""
106
- orf = _re.compile(f"ATG(?:...){{{minsize},}}?(?:TAG|TAA|TGA)", flags=_re.IGNORECASE)
116
+ orf = _re.compile(
117
+ f"ATG(?:...){{{minsize},}}?(?:TAG|TAA|TGA)", flags=_re.IGNORECASE
118
+ )
107
119
  start = 0
108
120
  matches: _List[slice] = []
109
121
  s = self._data.decode("ASCII")
@@ -203,7 +215,9 @@ class ProteinSeq(_Seq):
203
215
  ----------
204
216
  .. [#] http://wiki.christophchamp.com/index.php/SEGUID
205
217
  """
206
- return _lsseguid(self._data.decode("utf8").upper(), alphabet="{protein-extended}")
218
+ return _lsseguid(
219
+ self._data.decode("utf8").upper(), alphabet="{protein-extended}"
220
+ )
207
221
 
208
222
  def __getitem__(self, key):
209
223
  result = super().__getitem__(key)
@@ -232,14 +246,3 @@ class ProteinSeq(_Seq):
232
246
  Guruprasad K., Reddy B.V.B., Pandit M.W. Protein Engineering 4:155-161(1990).
233
247
  """
234
248
  return self._pa().instability_index()
235
-
236
-
237
- if __name__ == "__main__":
238
- import os as _os
239
-
240
- cached = _os.getenv("pydna_cached_funcs", "")
241
- _os.environ["pydna_cached_funcs"] = ""
242
- import doctest
243
-
244
- doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
245
- _os.environ["pydna_cached_funcs"] = cached
pydna/seqrecord.py CHANGED
@@ -35,10 +35,10 @@ from copy import copy as _copy
35
35
  from pydna import _PydnaWarning
36
36
  from warnings import warn as _warn
37
37
 
38
- import logging as _logging
38
+ # import logging as _logging
39
39
  import datetime
40
40
 
41
- _module_logger = _logging.getLogger("pydna." + __name__)
41
+ # _module_logger = _logging.getLogger("pydna." + __name__)
42
42
 
43
43
 
44
44
  class SeqRecord(_SeqRecord):
@@ -87,7 +87,9 @@ class SeqRecord(_SeqRecord):
87
87
  self.seq = _Seq(self.seq)
88
88
 
89
89
  self.seq._data = b"".join(self.seq._data.split()) # remove whitespaces
90
- self.annotations = {_pretty_str(k): _pretty_str(v) for k, v in self.annotations.items()}
90
+ self.annotations = {
91
+ _pretty_str(k): _pretty_str(v) for k, v in self.annotations.items()
92
+ }
91
93
 
92
94
  @classmethod
93
95
  def from_Bio_SeqRecord(clc, sr: _SeqRecord):
@@ -109,7 +111,9 @@ class SeqRecord(_SeqRecord):
109
111
  if len(value) > 16:
110
112
  shortvalue = value[:16]
111
113
  _warn(
112
- ("locus property {} truncated" "to 16 chars {}").format(value, shortvalue),
114
+ ("locus property {} truncated" "to 16 chars {}").format(
115
+ value, shortvalue
116
+ ),
113
117
  _PydnaWarning,
114
118
  stacklevel=2,
115
119
  )
@@ -193,7 +197,7 @@ class SeqRecord(_SeqRecord):
193
197
  def translate(self):
194
198
  """docstring."""
195
199
  p = super().translate()
196
- return ProteinSeqRecord(_ProteinSeq(p.seq[:-1]))
200
+ return ProteinSeqRecord(_ProteinSeq(p.seq))
197
201
 
198
202
  def add_colors_to_features_for_ape(self):
199
203
  """Assign colors to features.
@@ -239,7 +243,9 @@ class SeqRecord(_SeqRecord):
239
243
  f.qualifiers["ApEinfo_fwdcolor"] = [cols[i % len(cols)]]
240
244
  f.qualifiers["ApEinfo_revcolor"] = [cols[::-1][i % len(cols)]]
241
245
 
242
- def add_feature(self, x=None, y=None, seq=None, type_="misc", strand=1, *args, **kwargs):
246
+ def add_feature(
247
+ self, x=None, y=None, seq=None, type_="misc", strand=1, *args, **kwargs
248
+ ):
243
249
  """Add a feature of type misc to the feature list of the sequence.
244
250
 
245
251
  Parameters
@@ -327,7 +333,9 @@ class SeqRecord(_SeqRecord):
327
333
  | 0 | L:ft2 | --> | 2 | 4 | 2 | misc | no |
328
334
  +-----+---------------+-----+-----+-----+-----+------+------+
329
335
  """
330
- x = _PrettyTable(["Ft#", "Label or Note", "Dir", "Sta", "End", "Len", "type", "orf?"])
336
+ x = _PrettyTable(
337
+ ["Ft#", "Label or Note", "Dir", "Sta", "End", "Len", "type", "orf?"]
338
+ )
331
339
  x.align["Ft#"] = "r" # Left align
332
340
  x.align["Label or Note"] = "l" # Left align
333
341
  x.align["Len"] = "r"
@@ -357,7 +365,8 @@ class SeqRecord(_SeqRecord):
357
365
  len(sf),
358
366
  sf.type,
359
367
  {True: "yes", False: "no"}[
360
- self.extract_feature(i).isorf() or self.extract_feature(i).reverse_complement().isorf()
368
+ self.extract_feature(i).isorf()
369
+ or self.extract_feature(i).reverse_complement().isorf()
361
370
  ],
362
371
  ]
363
372
  )
@@ -480,7 +489,9 @@ class SeqRecord(_SeqRecord):
480
489
  f"Stamp change.\nNew: {chksum}\nOld: {oldstamp[0]}",
481
490
  _PydnaWarning,
482
491
  )
483
- self.annotations["comment"] = (f"{oldcomment}\n" f"{tool} {chksum} {now()} {comment}").strip()
492
+ self.annotations["comment"] = (
493
+ f"{oldcomment}\n" f"{tool} {chksum} {now()} {comment}"
494
+ ).strip()
484
495
  return _pretty_str(chksum)
485
496
 
486
497
  def lcs(self, other, *args, limit=25, **kwargs):
@@ -729,14 +740,3 @@ class ProteinSeqRecord(SeqRecord):
729
740
  def __format__(self, format):
730
741
  """docstring."""
731
742
  return _pretty_str(_SeqRecord.__format__(self, format))
732
-
733
-
734
- if __name__ == "__main__":
735
- import os as _os
736
-
737
- cached = _os.getenv("pydna_cached_funcs", "")
738
- _os.environ["pydna_cached_funcs"] = ""
739
- import doctest
740
-
741
- doctest.testmod(verbose=True, optionflags=(doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE))
742
- _os.environ["pydna_cached_funcs"] = cached
pydna/sequence_picker.py CHANGED
@@ -7,11 +7,12 @@
7
7
 
8
8
  from pydna.dseqrecord import Dseqrecord
9
9
  import os as _os
10
- import logging as _logging
10
+
11
+ # import logging as _logging
11
12
  from Bio.Blast import NCBIWWW
12
13
  from Bio.Blast import NCBIXML
13
14
 
14
- _module_logger = _logging.getLogger("pydna." + __name__)
15
+ # _module_logger = _logging.getLogger("pydna." + __name__)
15
16
 
16
17
 
17
18
  email = _os.getenv("pydna_email")
@@ -51,13 +52,3 @@ def genbank_accession(s: str) -> Dseqrecord:
51
52
  description=(f"{best_alignment.accession} " f"REGION: {start}..{stop}"),
52
53
  )
53
54
  return result
54
-
55
-
56
- if __name__ == "__main__":
57
- cached = _os.getenv("pydna_cached_funcs", "")
58
- _os.environ["pydna_cached_funcs"] = ""
59
- import doctest
60
-
61
- doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
62
- _os.environ["pydna_cached_funcs"] = cached
63
- pass
@@ -0,0 +1,44 @@
1
+ # -*- coding: utf-8 -*-
2
+ from pydna.dseqrecord import Dseqrecord as _Dseqrecord
3
+ import re
4
+ from Bio.Data.IUPACData import ambiguous_dna_values as _ambiguous_dna_values
5
+
6
+ ambiguous_only_dna_values = {**_ambiguous_dna_values}
7
+ for normal_base in "ACGT":
8
+ del ambiguous_only_dna_values[normal_base]
9
+
10
+
11
+ def compute_regex_site(site: str) -> str:
12
+ """
13
+ Creates a regex pattern from a string that may contain degenerate bases.
14
+
15
+ Args:
16
+ site: The string to convert to a regex pattern.
17
+
18
+ Returns:
19
+ The regex pattern.
20
+ """
21
+ upper_site = site.upper()
22
+ for k, v in ambiguous_only_dna_values.items():
23
+ if len(v) > 1:
24
+ upper_site = upper_site.replace(k, f"[{''.join(v)}]")
25
+
26
+ # Make case insensitive
27
+ upper_site = f"(?i){upper_site}"
28
+ return upper_site
29
+
30
+
31
+ def dseqrecord_finditer(pattern: str, seq: _Dseqrecord) -> list[re.Match]:
32
+ """
33
+ Finds all matches of a regex pattern in a Dseqrecord.
34
+
35
+ Args:
36
+ pattern: The regex pattern to search for.
37
+ seq: The Dseqrecord to search in.
38
+
39
+ Returns:
40
+ A list of matches.
41
+ """
42
+ query = str(seq.seq) if not seq.circular else str(seq.seq) * 2
43
+ matches = re.finditer(pattern, query)
44
+ return (m for m in matches if m.start() <= len(seq))
pydna/tm.py CHANGED
@@ -213,7 +213,9 @@ def dbd_program(amplicon, tm=tm_dbd, ta=ta_dbd):
213
213
 
214
214
  """
215
215
  PfuSso7d_extension_rate = 15 # seconds/kB PCR product
216
- extension_time_PfuSso7d = max(10, int(PfuSso7d_extension_rate * len(amplicon) / 1000)) # seconds
216
+ extension_time_PfuSso7d = max(
217
+ 10, int(PfuSso7d_extension_rate * len(amplicon) / 1000)
218
+ ) # seconds
217
219
 
218
220
  # The program returned is eaither a two step or three step progrem
219
221
  # This depends on the tm and length of the primers in the
@@ -324,7 +326,10 @@ def tmbresluc(primer: str, *args, primerc=500.0, saltc=50, **kwargs):
324
326
  dH += _thermodynamic_data.dHBr[n1 - 97][n2 - 97]
325
327
  dS += _thermodynamic_data.dSBr[n1 - 97][n2 - 97]
326
328
 
327
- tm = (dH / (1.9872 * _math.log(pri / 1600) + dS) + (16.6 * _math.log(saltc)) / _math.log(10)) - 273.15
329
+ tm = (
330
+ dH / (1.9872 * _math.log(pri / 1600) + dS)
331
+ + (16.6 * _math.log(saltc)) / _math.log(10)
332
+ ) - 273.15
328
333
 
329
334
  return tm
330
335
 
@@ -365,25 +370,18 @@ def tm_neb(primer, conc=0.5, prodcode="q5-0"):
365
370
  try:
366
371
  res = requests.get(url, params=params, headers=headers)
367
372
  except requests.exceptions.ConnectionError as e:
368
- raise requests.exceptions.ConnectionError("Could not connect to NEB API.") from e
373
+ raise requests.exceptions.ConnectionError(
374
+ "Could not connect to NEB API."
375
+ ) from e
369
376
  if res.status_code != 200:
370
377
  if "error" in res.json():
371
378
  raise requests.exceptions.HTTPError(res.status_code, res.json()["error"])
372
379
  else:
373
- raise requests.exceptions.HTTPError(res.status_code, res.text) # pragma: no cover
380
+ raise requests.exceptions.HTTPError(
381
+ res.status_code, res.text
382
+ ) # pragma: no cover
374
383
  r = res.json()
375
384
  if r["success"]:
376
385
  return r["data"]["tm1"]
377
386
  else:
378
387
  raise requests.exceptions.HTTPError(r["error"])
379
-
380
-
381
- if __name__ == "__main__":
382
- import os as _os
383
-
384
- cached = _os.getenv("pydna_cached_funcs", "")
385
- _os.environ["pydna_cached_funcs"] = ""
386
- import doctest
387
-
388
- doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
389
- _os.environ["pydna_cached_funcs"] = cached
pydna/types.py ADDED
@@ -0,0 +1,41 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Types used in the pydna package.
4
+ """
5
+
6
+ from typing import (
7
+ TYPE_CHECKING,
8
+ Tuple as _Tuple,
9
+ Union as _Union,
10
+ TypeVar as _TypeVar,
11
+ Iterable as _Iterable,
12
+ Callable as _Callable,
13
+ )
14
+
15
+ if TYPE_CHECKING:
16
+ from Bio.Restriction import AbstractCut as _AbstractCut
17
+ from Bio.Restriction import RestrictionBatch as _RestrictionBatch
18
+ from pydna.dseq import Dseq
19
+ from Bio.SeqFeature import Location as _Location
20
+ from pydna.dseqrecord import Dseqrecord as _Dseqrecord
21
+
22
+
23
+ # To represent any subclass of Dseq
24
+ DseqType = _TypeVar("DseqType", bound="Dseq")
25
+ EnzymesType = _TypeVar(
26
+ "EnzymesType", "_RestrictionBatch", _Iterable["_AbstractCut"], "_AbstractCut"
27
+ )
28
+ CutSiteType = _Tuple[_Tuple[int, int], _Union["_AbstractCut", None]]
29
+ AssemblyEdgeType = _Tuple[int, int, "_Location | None", "_Location | None"]
30
+ AssemblySubFragmentType = _Tuple[int, "_Location | None", "_Location | None"]
31
+ EdgeRepresentationAssembly = list[AssemblyEdgeType]
32
+ SubFragmentRepresentationAssembly = list[AssemblySubFragmentType]
33
+
34
+
35
+ # Type alias that describes overlap between two sequences x and y
36
+ # the two first numbers are the positions where the overlap starts on x and y
37
+ # the third number is the length of the overlap
38
+ SequenceOverlap = _Tuple[int, int, int]
39
+ AssemblyAlgorithmType = _Callable[
40
+ ["_Dseqrecord", "_Dseqrecord", int], list[SequenceOverlap]
41
+ ]
pydna/utils.py CHANGED
@@ -8,13 +8,15 @@
8
8
 
9
9
  from Bio.Data.IUPACData import ambiguous_dna_complement as _ambiguous_dna_complement
10
10
  from Bio.Seq import _maketrans
11
- import shelve as _shelve
12
- import os as _os
11
+
12
+ # import shelve as _shelve
13
+ # import os as _os
13
14
  import re as _re
14
- import logging as _logging
15
- import base64 as _base64
16
- import pickle as _pickle
17
- import hashlib as _hashlib
15
+
16
+ # import logging as _logging
17
+ # import base64 as _base64
18
+ # import pickle as _pickle
19
+ # import hashlib as _hashlib
18
20
  import keyword as _keyword
19
21
  import collections as _collections
20
22
  import itertools as _itertools
@@ -31,13 +33,14 @@ from pydna.codon import rare_codons as _rare_codons
31
33
 
32
34
  from Bio.SeqFeature import SimpleLocation as _sl
33
35
  from Bio.SeqFeature import CompoundLocation as _cl
36
+ from Bio.SeqFeature import Location as _Location
34
37
 
35
38
  from typing import Union as _Union, TypeVar as _TypeVar, List as _List
36
39
 
37
40
  # For functions that take str or bytes as input and return str or bytes as output, matching the input type
38
41
  StrOrBytes = _TypeVar("StrOrBytes", str, bytes)
39
42
 
40
- _module_logger = _logging.getLogger("pydna." + __name__)
43
+ # _module_logger = _logging.getLogger("pydna." + __name__)
41
44
  _ambiguous_dna_complement.update({"U": "A"})
42
45
  _complement_table = _maketrans(_ambiguous_dna_complement)
43
46
 
@@ -71,7 +74,9 @@ def three_frame_orfs(
71
74
  pass
72
75
  else:
73
76
  if stopindex - startindex >= limit:
74
- orfs.append((frame, startindex * 3 + frame, (stopindex + 1) * 3 + frame))
77
+ orfs.append(
78
+ (frame, startindex * 3 + frame, (stopindex + 1) * 3 + frame)
79
+ )
75
80
  # print(stopindex, startindex, limit)
76
81
  return orfs
77
82
 
@@ -82,13 +87,17 @@ def shift_location(original_location, shift, lim):
82
87
  strand = original_location.strand
83
88
  if lim is None:
84
89
  if min(original_location) + shift < 0:
85
- raise ValueError("Shift moves location below zero, use a `lim` to loop around if sequence is circular.")
90
+ raise ValueError(
91
+ "Shift moves location below zero, use a `lim` to loop around if sequence is circular."
92
+ )
86
93
  lim = _sys.maxsize
87
94
 
88
95
  for part in original_location.parts:
89
96
  new_start = (part.start + shift) % lim
90
97
  new_end = (part.end + shift) % lim or lim
91
- old_start, old_end = (newparts[-1].start, newparts[-1].end) if len(newparts) else (None, None)
98
+ old_start, old_end = (
99
+ (newparts[-1].start, newparts[-1].end) if len(newparts) else (None, None)
100
+ )
92
101
 
93
102
  # The "join with old" cases are for features with multiple parts
94
103
  # in which consecutive parts do not have any bases between them.
@@ -278,49 +287,49 @@ def complement(sequence: str):
278
287
  return sequence.translate(_complement_table)
279
288
 
280
289
 
281
- def memorize(filename):
282
- """Cache functions and classes.
290
+ # def memorize(filename):
291
+ # """Cache functions and classes.
283
292
 
284
- see pydna.download
285
- """
293
+ # see pydna.download
294
+ # """
286
295
 
287
- def decorator(f):
288
- def wrappee(*args, **kwargs):
289
- _module_logger.info("#### memorizer ####")
290
- _module_logger.info("cache filename = %s", filename)
291
- _module_logger.info(
292
- "os.environ['pydna_cached_funcs'] = %s",
293
- _os.getenv("pydna_cached_funcs", ""),
294
- )
295
- if filename not in _os.getenv("pydna_cached_funcs", ""):
296
- _module_logger.info("cache filename not among cached functions, made it new!")
297
- return f(*args, **kwargs)
298
- key = _base64.urlsafe_b64encode(_hashlib.sha1(_pickle.dumps((args, kwargs))).digest()).decode("ascii")
299
- _module_logger.info("key = %s", key)
300
- cache = _shelve.open(
301
- _os.path.join(_os.environ["pydna_data_dir"], identifier_from_string(filename)),
302
- writeback=False,
303
- )
304
- try:
305
- result = cache[key]
306
- except KeyError:
307
- _module_logger.info(
308
- "no result for key %s in shelve %s",
309
- key,
310
- identifier_from_string(filename),
311
- )
312
- result = f(*args, **kwargs)
313
- _module_logger.info("made it new!")
314
- cache[key] = result
315
- _module_logger.info("saved result under key %s", key)
316
- else:
317
- _module_logger.info("found %s in cache", key)
318
- cache.close()
319
- return result
296
+ # def decorator(f):
297
+ # def wrappee(*args, **kwargs):
298
+ # _module_logger.info("#### memorizer ####")
299
+ # _module_logger.info("cache filename = %s", filename)
300
+ # _module_logger.info(
301
+ # "os.environ['pydna_cached_funcs'] = %s",
302
+ # _os.getenv("pydna_cached_funcs", ""),
303
+ # )
304
+ # if filename not in _os.getenv("pydna_cached_funcs", ""):
305
+ # _module_logger.info("cache filename not among cached functions, made it new!")
306
+ # return f(*args, **kwargs)
307
+ # key = _base64.urlsafe_b64encode(_hashlib.sha1(_pickle.dumps((args, kwargs))).digest()).decode("ascii")
308
+ # _module_logger.info("key = %s", key)
309
+ # cache = _shelve.open(
310
+ # _os.path.join(_os.environ["pydna_data_dir"], identifier_from_string(filename)),
311
+ # writeback=False,
312
+ # )
313
+ # try:
314
+ # result = cache[key]
315
+ # except KeyError:
316
+ # _module_logger.info(
317
+ # "no result for key %s in shelve %s",
318
+ # key,
319
+ # identifier_from_string(filename),
320
+ # )
321
+ # result = f(*args, **kwargs)
322
+ # _module_logger.info("made it new!")
323
+ # cache[key] = result
324
+ # _module_logger.info("saved result under key %s", key)
325
+ # else:
326
+ # _module_logger.info("found %s in cache", key)
327
+ # cache.close()
328
+ # return result
320
329
 
321
- return wrappee
330
+ # return wrappee
322
331
 
323
- return decorator
332
+ # return decorator
324
333
 
325
334
 
326
335
  def identifier_from_string(s: str) -> str:
@@ -505,7 +514,11 @@ def randomORF(length, maxlength=None):
505
514
  starts = ("ATG",)
506
515
  stops = ("TAA", "TAG", "TGA")
507
516
 
508
- return random.choice(starts) + "".join([random.choice(cdns) for x in range(length)]) + random.choice(stops)
517
+ return (
518
+ random.choice(starts)
519
+ + "".join([random.choice(cdns) for x in range(length)])
520
+ + random.choice(stops)
521
+ )
509
522
 
510
523
 
511
524
  def randomprot(length, maxlength=None):
@@ -614,7 +627,9 @@ def eq(*args, **kwargs):
614
627
  if kwargs["circular"] is False:
615
628
  topology = "linear"
616
629
  else:
617
- topology = set([arg.circular if hasattr(arg, "circular") else None for arg in args])
630
+ topology = set(
631
+ [arg.circular if hasattr(arg, "circular") else None for arg in args]
632
+ )
618
633
 
619
634
  if len(topology) != 1:
620
635
  raise ValueError("sequences have different topologies")
@@ -625,7 +640,10 @@ def eq(*args, **kwargs):
625
640
  topology = "circular"
626
641
 
627
642
  args = [arg.seq if hasattr(arg, "seq") else arg for arg in args]
628
- args_string_list = [arg.watson.lower() if hasattr(arg, "watson") else str(arg).lower() for arg in args]
643
+ args_string_list = [
644
+ arg.watson.lower() if hasattr(arg, "watson") else str(arg).lower()
645
+ for arg in args
646
+ ]
629
647
 
630
648
  length = set((len(s) for s in args_string_list))
631
649
 
@@ -735,10 +753,107 @@ def locations_overlap(loc1: _Union[_sl, _cl], loc2: _Union[_sl, _cl], seq_len):
735
753
  return False
736
754
 
737
755
 
738
- if __name__ == "__main__":
739
- cached = _os.getenv("pydna_cached_funcs", "")
740
- _os.environ["pydna_cached_funcs"] = ""
741
- import doctest
756
+ def sum_is_sticky(
757
+ three_prime_end: tuple[str, str],
758
+ five_prime_end: tuple[str, str],
759
+ partial: bool = False,
760
+ ) -> int:
761
+ """Return the overlap length if the 3' end of seq1 and 5' end of seq2 ends are sticky and compatible for ligation.
762
+ Return 0 if they are not compatible."""
763
+ type_seq1, sticky_seq1 = three_prime_end
764
+ type_seq2, sticky_seq2 = five_prime_end
765
+
766
+ if (
767
+ "blunt" != type_seq2
768
+ and type_seq2 == type_seq1
769
+ and str(sticky_seq2) == str(rc(sticky_seq1))
770
+ ):
771
+ return len(sticky_seq1)
772
+
773
+ if not partial:
774
+ return 0
775
+
776
+ if type_seq1 != type_seq2 or type_seq2 == "blunt":
777
+ return 0
778
+ elif type_seq2 == "5'":
779
+ sticky_seq1 = str(rc(sticky_seq1))
780
+ elif type_seq2 == "3'":
781
+ sticky_seq2 = str(rc(sticky_seq2))
782
+
783
+ ovhg_len = min(len(sticky_seq1), len(sticky_seq2))
784
+ # [::-1] to try the longest overhangs first
785
+ for i in range(1, ovhg_len + 1)[::-1]:
786
+ if sticky_seq1[-i:] == sticky_seq2[:i]:
787
+ return i
788
+ else:
789
+ return 0
790
+
791
+
792
+ def limit_iterator(iterator, limit):
793
+ """
794
+ Call the function with an iterator to raise an error if the number of items is greater than the limit.
795
+ """
796
+ for i, x in enumerate(iterator):
797
+ if i >= limit:
798
+ raise ValueError(f"Too many possible paths (more than {limit})")
799
+ yield x
800
+
801
+
802
+ def create_location(
803
+ start: int, end: int, lim: int, strand: int | None = None
804
+ ) -> _Location:
805
+ """
806
+ Create a location object from a start and end position.
807
+ If the end position is less than the start position, the location is circular. It handles negative positions.
808
+
809
+ Parameters
810
+ ----------
811
+ start : int
812
+ The start position of the location.
813
+ end : int
814
+ The end position of the location.
815
+ lim : int
816
+ The length of the sequence.
817
+ strand : int, optional
818
+ The strand of the location. None, 1 or -1.
742
819
 
743
- doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
744
- _os.environ["pydna_cached_funcs"] = cached
820
+ Returns
821
+ -------
822
+ location : Location
823
+ The location object. Can be a SimpleLocation or a CompoundLocation if the feature spans the origin of
824
+ a circular sequence.
825
+
826
+ Examples
827
+ --------
828
+ >>> from pydna.utils import create_location
829
+ >>> str(create_location(0, 5, 10,-1))
830
+ '[0:5](-)'
831
+ >>> str(create_location(0, 5, 10,+1))
832
+ '[0:5](+)'
833
+ >>> str(create_location(0, 5, 10))
834
+ '[0:5]'
835
+ >>> str(create_location(8, 2, 10))
836
+ 'join{[8:10], [0:2]}'
837
+ >>> str(create_location(8, 2, 10,-1))
838
+ 'join{[0:2](-), [8:10](-)}'
839
+ >>> str(create_location(-2, 2, 10))
840
+ 'join{[8:10], [0:2]}'
841
+
842
+ Note this special case, 0 is the same as len(seq)
843
+ >>> str(create_location(5, 0, 10))
844
+ '[5:10]'
845
+
846
+ Note the special case where if start and end are the same,
847
+ the location spans the entire sequence (it's not empty).
848
+ >>> str(create_location(5, 5, 10))
849
+ 'join{[5:10], [0:5]}'
850
+
851
+ """
852
+ while start < 0:
853
+ start += lim
854
+ while end < 0:
855
+ end += lim
856
+ if end > start:
857
+ return _sl(start, end, strand)
858
+ else:
859
+ return shift_location(_sl(start, end + lim, strand), 0, lim)