bscampp 1.0.1a0__tar.gz → 1.0.1b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. bscampp-1.0.1b0/CHANGELOG.md +9 -0
  2. {bscampp-1.0.1a0/bscampp.egg-info → bscampp-1.0.1b0}/PKG-INFO +6 -1
  3. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/README.md +5 -0
  4. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/__init__.py +1 -1
  5. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/functions.py +24 -9
  6. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/pipeline.py +34 -13
  7. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/utils.py +22 -21
  8. {bscampp-1.0.1a0 → bscampp-1.0.1b0/bscampp.egg-info}/PKG-INFO +6 -1
  9. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp.egg-info/SOURCES.txt +2 -1
  10. bscampp-1.0.1b0/tests/test_dry_run.py +11 -0
  11. bscampp-1.0.1a0/CHANGELOG.md +0 -3
  12. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/LICENSE +0 -0
  13. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/MANIFEST.in +0 -0
  14. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/configs.py +0 -0
  15. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/default.config +0 -0
  16. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/init_configs.py +0 -0
  17. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/jobs.py +0 -0
  18. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/tools/epa-ng +0 -0
  19. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/tools/hamming_distance/CMakeLists.txt +0 -0
  20. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/tools/hamming_distance/fragment_hamming +0 -0
  21. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/tools/hamming_distance/hamming +0 -0
  22. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/tools/hamming_distance/homology +0 -0
  23. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/tools/hamming_distance/src/fragment_hamming.cpp +0 -0
  24. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/tools/hamming_distance/src/fragment_tree_hamming.cpp +0 -0
  25. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/tools/hamming_distance/src/fragment_tree_hamming_new.cpp +0 -0
  26. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/tools/hamming_distance/src/homology.cpp +0 -0
  27. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/tools/hamming_distance/src/new_hamming.cpp +0 -0
  28. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp/tools/pplacer +0 -0
  29. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp.egg-info/dependency_links.txt +0 -0
  30. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp.egg-info/entry_points.txt +0 -0
  31. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp.egg-info/requires.txt +0 -0
  32. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/bscampp.egg-info/top_level.txt +0 -0
  33. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/pyproject.toml +0 -0
  34. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/requirements.txt +0 -0
  35. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/run_bscampp.py +0 -0
  36. {bscampp-1.0.1a0 → bscampp-1.0.1b0}/setup.cfg +0 -0
@@ -0,0 +1,9 @@
1
+ # BSCAMPP v1.0.1b
2
+ 1. Removed redundant codes and fixed missing variables.
3
+ 2. Added badges for PyPI installation and current Python Build, etc.
4
+ 3. Added a single Pytest for dry-running BSCAMPP under `tests/`.
5
+ 4. For 3, added the `dry_run` parameter to function `pipeline.py:bscampp_pipeline`.
6
+
7
+ # BSCAMPP v1.0.1a
8
+ 1. Completed features with both `epa-ng` and `pplacer` support.
9
+ 2. Refactorized all codes and worked out the PyPI installation for release.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: bscampp
3
- Version: 1.0.1a0
3
+ Version: 1.0.1b0
4
4
  Summary: BSCAMPP - A Scalable Phylogenetic Placement Tool
5
5
  Author-email: Eleanor Wedell <ewedell2@illinois.edu>, Chengze Shen <chengze5@illinois.edu>
6
6
  License: MIT License
@@ -51,6 +51,11 @@ Requires-Dist: treeswift>=1.1.45
51
51
  Requires-Dist: taxtastic>=0.9.3
52
52
 
53
53
  # BSCAMPP - A Scalable Phylogenetic Placement Method and Framework
54
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/bscampp)](https://pypi.org/project/bscampp/)
55
+ [![PyPI - Version](https://img.shields.io/pypi/v/bscampp?color=blue)](https://pypi.org/project/bscampp/#history)
56
+ [![Build Status](https://img.shields.io/github/actions/workflow/status/ewedell/BSCAMPP/python-package.yml?branch=main&label=build)](https://github.com/ewedell/BSCAMPP/)
57
+ [![PyPI - License](https://img.shields.io/pypi/l/bscampp?color=blue)](https://github.com/ewedell/BSCAMPP/blob/main/LICENSE)
58
+ [![Changelog](https://img.shields.io/badge/CHANGELOG-grey)](https://github.com/ewedell/BSCAMPP/blob/main/CHANGELOG.md)
54
59
 
55
60
  **Table of Contents**
56
61
  1. [Overview](#overview)
@@ -1,4 +1,9 @@
1
1
  # BSCAMPP - A Scalable Phylogenetic Placement Method and Framework
2
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/bscampp)](https://pypi.org/project/bscampp/)
3
+ [![PyPI - Version](https://img.shields.io/pypi/v/bscampp?color=blue)](https://pypi.org/project/bscampp/#history)
4
+ [![Build Status](https://img.shields.io/github/actions/workflow/status/ewedell/BSCAMPP/python-package.yml?branch=main&label=build)](https://github.com/ewedell/BSCAMPP/)
5
+ [![PyPI - License](https://img.shields.io/pypi/l/bscampp?color=blue)](https://github.com/ewedell/BSCAMPP/blob/main/LICENSE)
6
+ [![Changelog](https://img.shields.io/badge/CHANGELOG-grey)](https://github.com/ewedell/BSCAMPP/blob/main/CHANGELOG.md)
2
7
 
3
8
  **Table of Contents**
4
9
  1. [Overview](#overview)
@@ -12,7 +12,7 @@ import logging, os
12
12
  # not really needed for BSCAMPP but safe to update here
13
13
  os.sys.setrecursionlimit(1000000)
14
14
 
15
- __version__ = "1.0.1a"
15
+ __version__ = "1.0.1b"
16
16
  _INSTALL_PATH = __path__[0]
17
17
 
18
18
  # global variables to store all loggers
@@ -15,10 +15,13 @@ Function to read in the placement tree and alignment.
15
15
  If query alignment is provided, will use the provided query instead of
16
16
  the ones (potentially) included in the reference alignment
17
17
  '''
18
- def readData(workdir):
18
+ def readData(workdir, dry_run=False):
19
19
  t0 = time.perf_counter()
20
20
  _LOG.info('Reading in input data...')
21
21
 
22
+ if dry_run:
23
+ return None, dict(), '', dict(), '', dict()
24
+
22
25
  # (1) load reference tree
23
26
  tree = treeswift.read_tree_newick(Configs.tree_path)
24
27
  tree.resolve_polytomies()
@@ -45,10 +48,10 @@ def readData(workdir):
45
48
  # after separating queries from the reference alignment, write
46
49
  # them to to TEMP/
47
50
  qaln_path = os.path.join(workdir, 'qaln.fa')
48
- write_fasta(temp_qaln_path, q_dict)
51
+ write_fasta(qaln_path, q_dict)
49
52
 
50
53
  aln_path = os.path.join(workdir, 'aln.fa')
51
- write_fasta(temp_aln_path, ref_dict)
54
+ write_fasta(aln_path, ref_dict)
52
55
 
53
56
  t1 = time.perf_counter()
54
57
  _LOG.info('Time to read in input data: {} seconds'.format(t1 - t0))
@@ -58,19 +61,22 @@ def readData(workdir):
58
61
  Function to get the closest leaf for each query sequence based on Hamming
59
62
  distance
60
63
  '''
61
- def getClosestLeaves(aln_path, qaln_path, aln, qaln, workdir):
64
+ def getClosestLeaves(aln_path, qaln_path, aln, qaln, workdir, dry_run=False):
62
65
  t0 = time.perf_counter()
63
66
  _LOG.info('Computing closest leaves for query sequences...')
67
+
68
+ if dry_run:
69
+ return dict(), dict()
70
+
64
71
  query_votes_dict = dict()
65
72
  query_top_vote_dict = dict()
66
-
67
73
  tmp_output = os.path.join(workdir, 'closest.txt')
68
74
 
69
75
  cmd = []
70
76
  if Configs.similarityflag:
71
77
  cmd.append(os.path.join(Configs.hamming_distance_dir, 'homology'))
72
78
  else:
73
- if fragment_flag == False:
79
+ if Configs.fragmentflag == False:
74
80
  cmd.append(os.path.join(Configs.hamming_distance_dir, 'hamming'))
75
81
  else:
76
82
  cmd.append(os.path.join(
@@ -115,10 +121,13 @@ def getClosestLeaves(aln_path, qaln_path, aln, qaln, workdir):
115
121
  Function to assign queries to subtrees based on their votes
116
122
  '''
117
123
  def assignQueriesToSubtrees(query_votes_dict, query_top_vote_dict,
118
- tree, leaf_dict):
124
+ tree, leaf_dict, dry_run=False):
119
125
  t0 = time.perf_counter()
120
126
  _LOG.info('Adding query votes to the placement tree...')
121
127
 
128
+ if dry_run:
129
+ return dict(), []
130
+
122
131
  # (1) go over the query votes and add them to corresponding leaves
123
132
  lf_votes = Counter()
124
133
  leaf_queries = dict()
@@ -229,10 +238,13 @@ def placeOneSubtree():
229
238
  Function to perform placement of queries for each subtree
230
239
  '''
231
240
  def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
232
- aln, qaln, cmdline_args, workdir, pool, lock):
241
+ aln, qaln, cmdline_args, workdir, pool, lock, dry_run=False):
233
242
  t0 = time.perf_counter()
234
243
  _LOG.info('Performing placement on each subtree...')
235
244
 
245
+ if dry_run:
246
+ return dict()
247
+
236
248
  # prepare to write an aggregated results to local
237
249
  jplace = dict()
238
250
  utils.add_edge_nbrs(tree)
@@ -381,9 +393,12 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
381
393
  '''
382
394
  Function to write a given jplace object to local output
383
395
  '''
384
- def writeOutputJplace(output_jplace):
396
+ def writeOutputJplace(output_jplace, dry_run=False):
385
397
  t0 = time.perf_counter()
386
398
  _LOG.info('Writing aggregated placements to local...')
399
+
400
+ if dry_run:
401
+ return
387
402
 
388
403
  outpath = os.path.join(Configs.outdir, Configs.outname)
389
404
  outf = open(outpath, 'w')
@@ -22,8 +22,13 @@ def bscampp_pipeline(*args, **kwargs):
22
22
  t0 = time.perf_counter()
23
23
  m = Manager(); lock = m.Lock()
24
24
 
25
+ # set up a dry run if specified
26
+ dry_run = False
27
+ if 'dry_run' in kwargs and isinstance(kwargs['dry_run'], bool):
28
+ dry_run = kwargs['dry_run']
29
+
25
30
  # parse command line arguments and build configurations
26
- parser, cmdline_args = parseArguments()
31
+ parser, cmdline_args = parseArguments(dry_run=dry_run)
27
32
 
28
33
  # initialize multiprocessing (if needed)
29
34
  _LOG.warning('Initializing ProcessPoolExecutor...')
@@ -31,33 +36,39 @@ def bscampp_pipeline(*args, **kwargs):
31
36
  initargs=(parser, cmdline_args,))
32
37
 
33
38
  # (0) temporary files wrote to here
34
- workdir = os.path.join(Configs.outdir, f'tmp{Configs.tmpfilenbr}')
35
- try:
36
- if not os.path.isdir(workdir):
37
- os.makedirs(workdir)
38
- except OSError:
39
- log_exception(_LOG)
39
+ if not dry_run:
40
+ workdir = os.path.join(Configs.outdir, f'tmp{Configs.tmpfilenbr}')
41
+ try:
42
+ if not os.path.isdir(workdir):
43
+ os.makedirs(workdir)
44
+ except OSError:
45
+ log_exception(_LOG)
46
+ else:
47
+ workdir = os.getcwd()
40
48
 
41
49
  # (1) read in tree, alignment, and separate reference sequences from
42
50
  # query sequences
43
- tree, leaf_dict, aln_path, aln, qaln_path, qaln = readData(workdir)
51
+ tree, leaf_dict, aln_path, aln, qaln_path, qaln = readData(workdir,
52
+ dry_run=dry_run)
44
53
 
45
54
  # (2) compute closest leaves for all query sequences
46
55
  query_votes_dict, query_top_vote_dict = getClosestLeaves(
47
- aln_path, qaln_path, aln, qaln, workdir)
56
+ aln_path, qaln_path, aln, qaln, workdir, dry_run=dry_run)
48
57
 
49
58
  # (3) first assign all queries to their closest-leaf subtrees,
50
59
  # then do reassignment to minimize distance between each's top vote
51
60
  # and the subtree's seed leaf
52
61
  new_subtree_dict, placed_query_list = assignQueriesToSubtrees(
53
- query_votes_dict, query_top_vote_dict, tree, leaf_dict)
62
+ query_votes_dict, query_top_vote_dict, tree, leaf_dict,
63
+ dry_run=dry_run)
54
64
 
55
65
  # (4) perform placement for each subtree
56
66
  output_jplace = placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict,
57
- placed_query_list, aln, qaln, cmdline_args, workdir, pool, lock)
67
+ placed_query_list, aln, qaln, cmdline_args, workdir, pool, lock,
68
+ dry_run=dry_run)
58
69
 
59
70
  # (5) write the output jplace to local
60
- writeOutputJplace(output_jplace)
71
+ writeOutputJplace(output_jplace, dry_run=dry_run)
61
72
 
62
73
  # shutdown pool
63
74
  _LOG.warning('Shutting down ProcessPoolExecutor...')
@@ -73,6 +84,11 @@ def bscampp_pipeline(*args, **kwargs):
73
84
  send = time.perf_counter()
74
85
  _LOG.info('BSCAMPP completed in {} seconds...'.format(send - t0))
75
86
 
87
+ if dry_run:
88
+ return True
89
+ else:
90
+ return False
91
+
76
92
  def clean_temp_files():
77
93
  # all temporary files/directories to remove
78
94
  temp_items = [f'tmp{Configs.tmpfilenbr}']
@@ -86,10 +102,15 @@ def clean_temp_files():
86
102
  continue
87
103
  _LOG.info(f'- Removed {temp}')
88
104
 
89
- def parseArguments():
105
+ def parseArguments(dry_run=False):
90
106
  global _root_dir, main_config_path
107
+
91
108
  parser = _init_parser()
92
109
  cmdline_args = sys.argv[1:]
110
+
111
+ if dry_run:
112
+ cmdline_args = ['-i', 'dummy.info', '-t', 'dummy.tre',
113
+ '-a', 'dummy.fa']
93
114
 
94
115
  # build config
95
116
  buildConfigs(parser, cmdline_args)
@@ -8,6 +8,7 @@ from os.path import expanduser,isfile
8
8
  import random
9
9
  import statistics
10
10
  import copy
11
+ import gzip
11
12
 
12
13
  import argparse
13
14
  # reformat argparse help text formatting
@@ -813,25 +814,25 @@ def newick_edge_tokens_node(node):
813
814
  node_to_str[node] = ''.join(out)
814
815
  return node_to_str[node]
815
816
 
816
- def write_tree_newick_edge_tokens(tree, filename, hide_rooted_prefix=False):
817
- '''
818
- Modified from treeswift tree.write_tree_newick()
819
- Write this ``Tree`` to a Newick file
820
- Args:
821
- ``filename`` (``str``): Path to desired output file (plain-text or gzipped)
822
- '''
823
- if not isinstance(filename, str):
824
- raise TypeError("filename must be a str")
825
- treestr = newick_edge_nbr_string(tree)
826
- if hide_rooted_prefix:
827
- if treestr.startswith('[&R]'):
828
- treestr = treestr[4:].strip()
829
- else:
830
- warn("Specified hide_rooted_prefix, but tree was not rooted")
831
- if filename.lower().endswith('.gz'): # gzipped file
832
- f = gopen(expanduser(filename),'wb',9); f.write(treestr.encode()); f.close()
833
- else: # plain-text file
834
- f = open(expanduser(filename),'w'); f.write(treestr); f.close()
817
+ #def write_tree_newick_edge_tokens(tree, filename, hide_rooted_prefix=False):
818
+ # '''
819
+ # Modified from treeswift tree.write_tree_newick()
820
+ # Write this ``Tree`` to a Newick file
821
+ # Args:
822
+ # ``filename`` (``str``): Path to desired output file (plain-text or gzipped)
823
+ # '''
824
+ # if not isinstance(filename, str):
825
+ # raise TypeError("filename must be a str")
826
+ # treestr = newick_edge_nbr_string(tree)
827
+ # if hide_rooted_prefix:
828
+ # if treestr.startswith('[&R]'):
829
+ # treestr = treestr[4:].strip()
830
+ # else:
831
+ # warn("Specified hide_rooted_prefix, but tree was not rooted")
832
+ # if filename.lower().endswith('.gz'): # gzipped file
833
+ # f = gopen(expanduser(filename),'wb',9); f.write(treestr.encode()); f.close()
834
+ # else: # plain-text file
835
+ # f = open(expanduser(filename),'w'); f.write(treestr); f.close()
835
836
 
836
837
  def read_tree_newick_edge_tokens(newick):
837
838
  '''
@@ -850,7 +851,7 @@ def read_tree_newick_edge_tokens(newick):
850
851
  except:
851
852
  raise TypeError("newick must be a str")
852
853
  if newick.lower().endswith('.gz'): # gzipped file
853
- f = gopen(expanduser(newick)); ts = f.read().decode().strip(); f.close()
854
+ f = gzip.open(expanduser(newick)); ts = f.read().decode().strip(); f.close()
854
855
  elif isfile(expanduser(newick)): # plain-text file
855
856
  f = open(expanduser(newick)); ts = f.read().strip(); f.close()
856
857
  else:
@@ -867,7 +868,7 @@ def read_tree_newick_edge_tokens(newick):
867
868
  # end of Newick string
868
869
  if ts[i] == ';':
869
870
  if i != len(ts)-1 or n != t.root:
870
- raise RuntimeError(INVALID_NEWICK)
871
+ raise RuntimeError("INVALID NEWICK")
871
872
 
872
873
  # go to new child
873
874
  elif ts[i] == '(':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: bscampp
3
- Version: 1.0.1a0
3
+ Version: 1.0.1b0
4
4
  Summary: BSCAMPP - A Scalable Phylogenetic Placement Tool
5
5
  Author-email: Eleanor Wedell <ewedell2@illinois.edu>, Chengze Shen <chengze5@illinois.edu>
6
6
  License: MIT License
@@ -51,6 +51,11 @@ Requires-Dist: treeswift>=1.1.45
51
51
  Requires-Dist: taxtastic>=0.9.3
52
52
 
53
53
  # BSCAMPP - A Scalable Phylogenetic Placement Method and Framework
54
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/bscampp)](https://pypi.org/project/bscampp/)
55
+ [![PyPI - Version](https://img.shields.io/pypi/v/bscampp?color=blue)](https://pypi.org/project/bscampp/#history)
56
+ [![Build Status](https://img.shields.io/github/actions/workflow/status/ewedell/BSCAMPP/python-package.yml?branch=main&label=build)](https://github.com/ewedell/BSCAMPP/)
57
+ [![PyPI - License](https://img.shields.io/pypi/l/bscampp?color=blue)](https://github.com/ewedell/BSCAMPP/blob/main/LICENSE)
58
+ [![Changelog](https://img.shields.io/badge/CHANGELOG-grey)](https://github.com/ewedell/BSCAMPP/blob/main/CHANGELOG.md)
54
59
 
55
60
  **Table of Contents**
56
61
  1. [Overview](#overview)
@@ -29,4 +29,5 @@ bscampp/tools/hamming_distance/src/fragment_hamming.cpp
29
29
  bscampp/tools/hamming_distance/src/fragment_tree_hamming.cpp
30
30
  bscampp/tools/hamming_distance/src/fragment_tree_hamming_new.cpp
31
31
  bscampp/tools/hamming_distance/src/homology.cpp
32
- bscampp/tools/hamming_distance/src/new_hamming.cpp
32
+ bscampp/tools/hamming_distance/src/new_hamming.cpp
33
+ tests/test_dry_run.py
@@ -0,0 +1,11 @@
1
+ # tests/test_dry_run.py
2
+ import pytest, os
3
+ from bscampp.pipeline import bscampp_pipeline
4
+
5
+ def test_bscampp_pipeline():
6
+ res = bscampp_pipeline(dry_run=True)
7
+ assert res == True
8
+
9
+ # remove bscampp_output that's created
10
+ if os.path.isdir('bscampp_output'):
11
+ os.rmdir('bscampp_output')
@@ -1,3 +0,0 @@
1
- # BSCAMPP v1.0.1a
2
- 1. Completed features with both `epa-ng` and `pplacer` support.
3
- 2. Refactorized all codes and worked out the PyPI installation for release.
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes