pydna 5.5.3__py3-none-any.whl → 5.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pydna/assembly.py CHANGED
@@ -42,41 +42,28 @@ sequences separating the overlapping regions form edges.
42
42
  The NetworkX package is used to trace linear and circular paths through the
43
43
  graph.
44
44
  """
45
- import os as _os
46
- from Bio.SeqFeature import SeqFeature as _SeqFeature
47
- from Bio.SeqFeature import ExactPosition as _ExactPosition
48
- from Bio.SeqFeature import SimpleLocation as _SimpleLocation
49
- from Bio.SeqFeature import CompoundLocation as _CompoundLocation
50
- from pydna.utils import rc as _rc
51
-
52
- # from pydna.utils import memorize as _memorize
53
- from pydna._pretty import pretty_str as _pretty_str
54
- from pydna.contig import Contig as _Contig
55
- from pydna.common_sub_strings import common_sub_strings, Match as _Match
56
-
57
- from pydna.dseqrecord import Dseqrecord as _Dseqrecord
58
- import networkx as _nx
59
-
60
- from copy import deepcopy as _deepcopy
61
- from typing import (
62
- Callable as _Callable,
63
- Dict as _Dict,
64
- List as _List,
65
- NamedTuple as _NamedTuple,
66
- TypedDict as _TypedDict,
67
- )
68
- import itertools as _itertools
45
+ import os
46
+ from Bio.SeqFeature import SeqFeature
47
+ from Bio.SeqFeature import ExactPosition
48
+ from Bio.SeqFeature import SimpleLocation
49
+ from Bio.SeqFeature import CompoundLocation
50
+ from pydna.utils import rc
69
51
 
70
- # import logging as _logging
52
+ from pydna._pretty import pretty_str as ps
53
+ from pydna.contig import Contig
54
+ from pydna.common_sub_strings import common_sub_strings, Match
71
55
 
72
- # from func_timeout import func_set_timeout
73
- # from wrapt_timeout_decorator import timeout
74
- from pydna.threading_timer_decorator_exit import exit_after
56
+ from pydna.dseqrecord import Dseqrecord
57
+ import networkx as nx
58
+
59
+ from copy import deepcopy
60
+ from typing import Callable, Dict, List, NamedTuple, TypedDict
61
+ import itertools
75
62
 
76
- # _module_logger = _logging.getLogger("pydna." + __name__)
63
+ from pydna.threading_timer_decorator_exit import exit_after
77
64
 
78
65
 
79
- class Assembly(object): # , metaclass=_Memoize):
66
+ class Assembly(object):
80
67
  """Assembly of a list of linear DNA fragments into linear or circular
81
68
  constructs. The Assembly is meant to replace the Assembly method as it
82
69
  is easier to use. Accepts a list of Dseqrecords (source fragments) to
@@ -123,13 +110,13 @@ class Assembly(object): # , metaclass=_Memoize):
123
110
 
124
111
  def __init__(
125
112
  self,
126
- frags: _List[_Dseqrecord],
113
+ frags: List[Dseqrecord],
127
114
  limit: int = 25,
128
- algorithm: _Callable[[str, str, int], _List[_Match]] = common_sub_strings,
115
+ algorithm: Callable[[str, str, int], List[Match]] = common_sub_strings,
129
116
  ) -> None:
130
117
  # Fragments is a string subclass with some extra properties
131
118
  # The order of the fragments has significance
132
- fragments: _List[_FragmentDict] = [
119
+ fragments: List[_FragmentDict] = [
133
120
  {
134
121
  "upper": str(f.seq).upper(),
135
122
  "mixed": str(f.seq),
@@ -142,7 +129,7 @@ class Assembly(object): # , metaclass=_Memoize):
142
129
 
143
130
  # rcfragments is a dict with fragments as keys and the reverse
144
131
  # complement as value
145
- rcfragments: _Dict[str, _FragmentDict] = {
132
+ rcfragments: Dict[str, _FragmentDict] = {
146
133
  f["mixed"]: {
147
134
  "upper": str(frc.seq).upper(),
148
135
  "mixed": str(frc.seq),
@@ -163,7 +150,7 @@ class Assembly(object): # , metaclass=_Memoize):
163
150
  # all combinations of fragments are compared.
164
151
  # see https://docs.python.org/3.10/library/itertools.html
165
152
  # itertools.combinations('ABCD', 2)--> AB AC AD BC BD CD
166
- for first, secnd in _itertools.combinations(fragments, 2):
153
+ for first, secnd in itertools.combinations(fragments, 2):
167
154
  if first["upper"] == secnd["upper"]:
168
155
  continue
169
156
 
@@ -222,7 +209,7 @@ class Assembly(object): # , metaclass=_Memoize):
222
209
  # multidigraph.html
223
210
 
224
211
  order = 0
225
- G = _nx.MultiDiGraph()
212
+ G = nx.MultiDiGraph()
226
213
  # loop through all fragments their and reverse complements
227
214
 
228
215
  for f in fragments:
@@ -231,7 +218,7 @@ class Assembly(object): # , metaclass=_Memoize):
231
218
  for f in rcfragments.values():
232
219
  f["nodes"] = sorted(set(f["nodes"]))
233
220
 
234
- for f in _itertools.chain(fragments, rcfragments.values()):
221
+ for f in itertools.chain(fragments, rcfragments.values()):
235
222
  # nodes are sorted in place in the order of their position
236
223
  # duplicates are removed (same position and sequence)
237
224
  # along the fragment since nodes are a tuple (position(int),
@@ -250,7 +237,7 @@ class Assembly(object): # , metaclass=_Memoize):
250
237
  start2,
251
238
  length2,
252
239
  node2,
253
- ) in _itertools.combinations(f["nodes"], 2):
240
+ ) in itertools.combinations(f["nodes"], 2):
254
241
  feats = [
255
242
  ft
256
243
  for ft in f["features"]
@@ -270,7 +257,7 @@ class Assembly(object): # , metaclass=_Memoize):
270
257
  name=f["name"],
271
258
  ) # string
272
259
 
273
- self.G = _nx.create_empty_copy(G)
260
+ self.G = nx.create_empty_copy(G)
274
261
  self.G.add_edges_from(
275
262
  sorted(
276
263
  G.edges(data=True), key=lambda t: len(t[2].get("seq", 1)), reverse=True
@@ -282,9 +269,9 @@ class Assembly(object): # , metaclass=_Memoize):
282
269
  self.rcfragments = rcfragments
283
270
  self.algorithm = algorithm
284
271
 
285
- @exit_after(int(_os.getenv("pydna_assembly_limit", 10)))
272
+ @exit_after(int(os.getenv("pydna_assembly_limit", 10)))
286
273
  def assemble_linear(self, start=None, end=None, max_nodes=None):
287
- G = _nx.MultiDiGraph(self.G)
274
+ G = nx.MultiDiGraph(self.G)
288
275
 
289
276
  G.add_nodes_from(["begin", "begin_rc", "end", "end_rc"], length=0)
290
277
 
@@ -354,16 +341,12 @@ class Assembly(object): # , metaclass=_Memoize):
354
341
  max_nodes = max_nodes or len(self.fragments)
355
342
 
356
343
  linearpaths = list(
357
- _itertools.chain(
358
- _nx.all_simple_paths(_nx.DiGraph(G), "begin", "end", cutoff=max_nodes),
359
- _nx.all_simple_paths(
360
- _nx.DiGraph(G), "begin", "end_rc", cutoff=max_nodes
361
- ),
362
- _nx.all_simple_paths(
363
- _nx.DiGraph(G), "begin_rc", "end", cutoff=max_nodes
364
- ),
365
- _nx.all_simple_paths(
366
- _nx.DiGraph(G), "begin_rc", "end_rc", cutoff=max_nodes
344
+ itertools.chain(
345
+ nx.all_simple_paths(nx.DiGraph(G), "begin", "end", cutoff=max_nodes),
346
+ nx.all_simple_paths(nx.DiGraph(G), "begin", "end_rc", cutoff=max_nodes),
347
+ nx.all_simple_paths(nx.DiGraph(G), "begin_rc", "end", cutoff=max_nodes),
348
+ nx.all_simple_paths(
349
+ nx.DiGraph(G), "begin_rc", "end_rc", cutoff=max_nodes
367
350
  ),
368
351
  )
369
352
  )
@@ -379,7 +362,7 @@ class Assembly(object): # , metaclass=_Memoize):
379
362
  e.append((u, v, d))
380
363
  edgelol.append(e)
381
364
 
382
- for edges in _itertools.product(*edgelol):
365
+ for edges in itertools.product(*edgelol):
383
366
  # TODO explain
384
367
  if [
385
368
  True
@@ -392,14 +375,14 @@ class Assembly(object): # , metaclass=_Memoize):
392
375
 
393
376
  if key in lps:
394
377
  continue # TODO: is this test needed?
395
- sg = _nx.DiGraph()
378
+ sg = nx.DiGraph()
396
379
  sg.add_edges_from(edges)
397
380
  sg.add_nodes_from((n, d) for n, d in G.nodes(data=True) if n in lp)
398
381
 
399
382
  edgefeatures = []
400
383
  offset = 0
401
384
  for u, v, e in edges:
402
- feats = _deepcopy(e["features"])
385
+ feats = deepcopy(e["features"])
403
386
  for f in feats:
404
387
  f.location += offset - e["piece"].start
405
388
  edgefeatures.extend(feats)
@@ -409,7 +392,7 @@ class Assembly(object): # , metaclass=_Memoize):
409
392
 
410
393
  return sorted(
411
394
  (
412
- _Contig.from_string(
395
+ Contig.from_string(
413
396
  lp[0],
414
397
  features=lp[1],
415
398
  graph=lp[2],
@@ -423,11 +406,11 @@ class Assembly(object): # , metaclass=_Memoize):
423
406
  reverse=True,
424
407
  )
425
408
 
426
- @exit_after(int(_os.getenv("pydna_assembly_limit", 10)))
409
+ @exit_after(int(os.getenv("pydna_assembly_limit", 10)))
427
410
  def assemble_circular(self, length_bound=None):
428
411
  cps = {} # circular assembly
429
412
  cpsrc = {}
430
- cpaths = sorted(_nx.simple_cycles(self.G, length_bound=length_bound), key=len)
413
+ cpaths = sorted(nx.simple_cycles(self.G, length_bound=length_bound), key=len)
431
414
  cpaths_sorted = []
432
415
  for cpath in cpaths:
433
416
  order, node = min((self.G.nodes[node]["order"], node) for node in cpath)
@@ -449,7 +432,7 @@ class Assembly(object): # , metaclass=_Memoize):
449
432
  e.append((u, v, d))
450
433
  edgelol.append(e)
451
434
 
452
- for edges in _itertools.product(*edgelol):
435
+ for edges in itertools.product(*edgelol):
453
436
  if [
454
437
  True
455
438
  for ((u, v, e), (x, y, z)) in zip(edges, edges[1:])
@@ -461,7 +444,7 @@ class Assembly(object): # , metaclass=_Memoize):
461
444
 
462
445
  if key in cps or key in cpsrc:
463
446
  continue # TODO: is test in cpsrc needed?
464
- sg = _nx.DiGraph()
447
+ sg = nx.DiGraph()
465
448
  sg.add_edges_from(edges)
466
449
  sg.add_nodes_from((n, d) for n, d in self.G.nodes(data=True) if n in cp)
467
450
 
@@ -469,7 +452,7 @@ class Assembly(object): # , metaclass=_Memoize):
469
452
  offset = 0
470
453
 
471
454
  for u, v, e in edges:
472
- feats = _deepcopy(e["features"])
455
+ feats = deepcopy(e["features"])
473
456
  for feat in feats:
474
457
  feat.location += offset
475
458
  edgefeatures.extend(feats)
@@ -478,18 +461,18 @@ class Assembly(object): # , metaclass=_Memoize):
478
461
  if f.location.start > len(ct) and f.location.end > len(ct):
479
462
  f.location += -len(ct)
480
463
  elif f.location.end > len(ct):
481
- f.location = _CompoundLocation(
464
+ f.location = CompoundLocation(
482
465
  (
483
- _SimpleLocation(
484
- f.location.start, _ExactPosition(len(ct))
466
+ SimpleLocation(
467
+ f.location.start, ExactPosition(len(ct))
485
468
  ),
486
- _SimpleLocation(
487
- _ExactPosition(0), f.location.end - len(ct)
469
+ SimpleLocation(
470
+ ExactPosition(0), f.location.end - len(ct)
488
471
  ),
489
472
  )
490
473
  )
491
474
 
492
- cps[key] = cpsrc[_rc(key)] = (
475
+ cps[key] = cpsrc[rc(key)] = (
493
476
  ct,
494
477
  edgefeatures,
495
478
  sg,
@@ -499,7 +482,7 @@ class Assembly(object): # , metaclass=_Memoize):
499
482
 
500
483
  return sorted(
501
484
  (
502
- _Contig.from_string(
485
+ Contig.from_string(
503
486
  cp[0],
504
487
  features=cp[1],
505
488
  graph=cp[2],
@@ -513,9 +496,9 @@ class Assembly(object): # , metaclass=_Memoize):
513
496
  reverse=True,
514
497
  )
515
498
 
516
- def __repr__(self) -> _pretty_str:
499
+ def __repr__(self) -> ps:
517
500
  # https://pyformat.info
518
- return _pretty_str(
501
+ return ps(
519
502
  "Assembly\n"
520
503
  "fragments..: {sequences}\n"
521
504
  "limit(bp)..: {limit}\n"
@@ -532,34 +515,34 @@ class Assembly(object): # , metaclass=_Memoize):
532
515
 
533
516
 
534
517
  example_fragments = (
535
- _Dseqrecord("AacgatCAtgctcc", name="a"),
536
- _Dseqrecord("TtgctccTAAattctgc", name="b"),
537
- _Dseqrecord("CattctgcGAGGacgatG", name="c"),
518
+ Dseqrecord("AacgatCAtgctcc", name="a"),
519
+ Dseqrecord("TtgctccTAAattctgc", name="b"),
520
+ Dseqrecord("CattctgcGAGGacgatG", name="c"),
538
521
  )
539
522
 
540
523
 
541
524
  linear_results = (
542
- _Dseqrecord("AacgatCAtgctccTAAattctgcGAGGacgatG", name="abc"),
543
- _Dseqrecord("ggagcaTGatcgtCCTCgcagaatG", name="ac_rc"),
544
- _Dseqrecord("AacgatG", name="ac"),
525
+ Dseqrecord("AacgatCAtgctccTAAattctgcGAGGacgatG", name="abc"),
526
+ Dseqrecord("ggagcaTGatcgtCCTCgcagaatG", name="ac_rc"),
527
+ Dseqrecord("AacgatG", name="ac"),
545
528
  )
546
529
 
547
530
 
548
531
  circular_results = (
549
- _Dseqrecord("acgatCAtgctccTAAattctgcGAGG", name="abc", circular=True),
550
- _Dseqrecord("ggagcaTGatcgtCCTCgcagaatTTA", name="abc_rc", circular=True),
532
+ Dseqrecord("acgatCAtgctccTAAattctgcGAGG", name="abc", circular=True),
533
+ Dseqrecord("ggagcaTGatcgtCCTCgcagaatTTA", name="abc_rc", circular=True),
551
534
  )
552
535
 
553
536
 
554
- class _NodeTuple(_NamedTuple):
537
+ class _NodeTuple(NamedTuple):
555
538
  start: int
556
539
  length: int
557
540
  shared_seq: str # uppercase
558
541
 
559
542
 
560
- class _FragmentDict(_TypedDict):
543
+ class _FragmentDict(TypedDict):
561
544
  upper: str
562
545
  mixed: str
563
546
  name: str
564
- features: _List[_SeqFeature]
565
- nodes: _List[_NodeTuple]
547
+ features: List[SeqFeature]
548
+ nodes: List[_NodeTuple]