bscampp 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bscampp/__init__.py CHANGED
@@ -12,7 +12,7 @@ import logging, os
12
12
  # not really needed for BSCAMPP but safe to update here
13
13
  os.sys.setrecursionlimit(1000000)
14
14
 
15
- __version__ = "1.0.5"
15
+ __version__ = "1.0.7"
16
16
  _INSTALL_PATH = __path__[0]
17
17
 
18
18
  # global variables to store all loggers
bscampp/configs.py CHANGED
@@ -22,7 +22,6 @@ _LOG = get_logger(__name__)
22
22
  Configuration defined by users and by default values
23
23
  '''
24
24
  class Configs:
25
- global _root_dir
26
25
 
27
26
  # basic input paths
28
27
  info_path = None # info file for pplacer or EPA-ng
bscampp/functions.py CHANGED
@@ -1,17 +1,84 @@
1
1
  import json, time, os, sys
2
2
  import treeswift
3
3
  from collections import defaultdict, Counter
4
+ import subprocess
4
5
 
5
6
  from bscampp import get_logger, log_exception
6
7
  from bscampp.configs import Configs
7
- from bscampp.jobs import EPAngJob, TaxtasticJob, PplacerTaxtasticJob
8
+ from bscampp.jobs import GenericJob, EPAngJob, TaxtasticJob, PplacerTaxtasticJob
8
9
  from bscampp.utils import write_fasta
9
10
  import bscampp.utils as utils
10
11
 
11
12
  import concurrent.futures
12
13
 
14
+ # suppress userwarning when doing subtree suppress_unifurcations
15
+ import warnings
16
+ warnings.filterwarnings("ignore", category=UserWarning)
17
+
13
18
  _LOG = get_logger(__name__)
14
19
 
20
+ ############################# helper functions ################################
21
+ '''
22
+ Function to recompile binaries from the given directory.
23
+ Assumption, the directory contains a CMakeLists.txt file
24
+ '''
25
+ def recompileBinariesFromDir(dir):
26
+ _LOG.warning(f"Recompiling binaries with cmake/make at {dir}")
27
+
28
+ # need to recompile the binaries
29
+ cmake_p = subprocess.Popen(['cmake', dir],
30
+ cwd=dir, stdout=subprocess.PIPE,
31
+ stderr=subprocess.PIPE, text=True)
32
+ cmake_stdout, cmake_stderr = cmake_p.communicate()
33
+
34
+ if cmake_p.returncode != 0:
35
+ _LOG.error("cmake failed!")
36
+ exit(cmake_p.returncode)
37
+ else:
38
+ _LOG.warning("cmake succeeded!")
39
+
40
+ # run make
41
+ make_p = subprocess.Popen(['make'],
42
+ cwd=dir, stdout=subprocess.PIPE,
43
+ stderr=subprocess.PIPE, text=True)
44
+ make_stdout, make_stderr = make_p.communicate()
45
+
46
+ if make_p.returncode != 0:
47
+ _LOG.error(f"make failed!")
48
+ exit(make_p.returncode)
49
+ else:
50
+ _LOG.warning("make succeeded!")
51
+ _LOG.warning(f"Successfully recompiled binaries at {dir}!")
52
+
53
+ '''
54
+ Function to check hamming/fragment_hamming/homology binaries are executable,
55
+ since they were compiled using dynamic library
56
+ '''
57
+ def ensureBinaryExecutable(binpath):
58
+ dir = os.path.dirname(binpath)
59
+
60
+ # binpath does not exist
61
+ b_recompile = False
62
+ if not os.path.exists(binpath):
63
+ _LOG.warning(f"{binpath} does not exist!")
64
+ b_recompile = True
65
+ else:
66
+ p = subprocess.Popen([binpath], stdout=subprocess.PIPE,
67
+ stderr=subprocess.PIPE)
68
+ stdout, stderr = p.communicate()
69
+ # 255 or -1 indicates that the binaries work
70
+ if p.returncode == 255 or p.returncode == -1:
71
+ pass
72
+ else:
73
+ _LOG.warning(f"{binpath} return code is {p.returncode}!")
74
+ b_recompile = True
75
+
76
+ if b_recompile:
77
+ recompileBinariesFromDir(dir)
78
+ return
79
+
80
+ ########################## end of helper functions ############################
81
+
15
82
  '''
16
83
  Function to read in the placement tree and alignment.
17
84
  If query alignment is provided, will use the provided query instead of
@@ -47,17 +114,6 @@ def readData(workdir, dry_run=False):
47
114
  aln_dict = utils.read_data(Configs.aln_path)
48
115
  ref_dict, q_dict = utils.seperate(aln_dict, leaf_dict)
49
116
 
50
- # after separating queries from the reference alignment, write
51
- # them to to TEMP/
52
- # Updated on 3.5.2025 by Chengze Shen
53
- # - regardless of the input choices, write a copy of both reference
54
- # and query alignment to the workdir
55
- qaln_path = os.path.join(workdir, 'qaln.fa')
56
- write_fasta(qaln_path, q_dict)
57
-
58
- aln_path = os.path.join(workdir, 'aln.fa')
59
- write_fasta(aln_path, ref_dict)
60
-
61
117
  # Added on 3.8.2025 by Chengze Shen
62
118
  # - to ensure that any characters from the query has correct names
63
119
  # (e.g., having ":" can cause trouble), have a qname_map that maps
@@ -70,6 +126,22 @@ def readData(workdir, dry_run=False):
70
126
  qname_map[name] = cvt
71
127
  qname_map_rev[cvt] = name
72
128
  qidx += 1
129
+ # modify q_dict as well
130
+ for name, cvt in qname_map.items():
131
+ q_dict[cvt] = q_dict[name]
132
+ q_dict.pop(name)
133
+
134
+ # after separating queries from the reference alignment, write
135
+ # them to to TEMP/
136
+ # Updated on 3.5.2025 by Chengze Shen
137
+ # - regardless of the input choices, write a copy of both reference
138
+ # and query alignment to the workdir
139
+ qaln_path = os.path.join(workdir, 'qaln.fa')
140
+ write_fasta(qaln_path, q_dict)
141
+
142
+ aln_path = os.path.join(workdir, 'aln.fa')
143
+ write_fasta(aln_path, ref_dict)
144
+
73
145
 
74
146
  t1 = time.perf_counter()
75
147
  _LOG.info('Time to read in input data: {} seconds'.format(t1 - t0))
@@ -94,18 +166,29 @@ def getClosestLeaves(aln_path, qaln_path, aln, qaln, workdir, dry_run=False):
94
166
  if Configs.subtreetype == "h":
95
167
  Configs.votes = Configs.subtreesize
96
168
 
97
- cmd = []
98
169
  if Configs.similarityflag:
99
- cmd.append(os.path.join(Configs.hamming_distance_dir, 'homology'))
170
+ job_type = 'homology'
100
171
  else:
101
- if Configs.fragmentflag == False:
102
- cmd.append(os.path.join(Configs.hamming_distance_dir, 'hamming'))
172
+ if Configs.fragmentflag:
173
+ job_type = 'fragment_hamming'
103
174
  else:
104
- cmd.append(os.path.join(
105
- Configs.hamming_distance_dir, 'fragment_hamming'))
175
+ job_type = 'hamming'
176
+ binpath = os.path.join(Configs.hamming_distance_dir, job_type)
177
+ cmd = [binpath]
178
+
179
+ # Added @ 3.9.2025 by Chengze Shen
180
+ # - check if binpath is executable, since the compiled files use dynamic
181
+ # libraries.
182
+ # If works: should have return code 255
183
+ # If not: should have return code 1,
184
+ # recompile the binaries using cmake and make
185
+ ensureBinaryExecutable(binpath)
186
+
106
187
  cmd.extend([aln_path, str(len(aln)), qaln_path, str(len(qaln)),
107
188
  tmp_output, str(Configs.votes)])
108
- os.system(' '.join(cmd))
189
+ job = GenericJob(cmd=cmd, job_type=job_type)
190
+ _ = job.run()
191
+ #os.system(' '.join(cmd))
109
192
 
110
193
  # process closest leaves
111
194
  unusable_queries = set()
@@ -326,7 +409,8 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
326
409
  aln, qaln, cmdline_args, workdir, qname_map, qname_map_rev,
327
410
  pool, lock, dry_run=False):
328
411
  t0 = time.perf_counter()
329
- _LOG.info('Performing placement on each subtree...')
412
+ _LOG.info("Performing placement on each subtree with {}...".format(
413
+ Configs.placement_method))
330
414
 
331
415
  if dry_run:
332
416
  return dict()
@@ -366,10 +450,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
366
450
  if '' in tmp_leaf_dict:
367
451
  del tmp_leaf_dict['']
368
452
  tmp_ref_dict = {label : aln[label] for label in tmp_leaf_dict.keys()}
369
- # Changed @ 3.8.2025 by Chengze Shen
370
- # - wrote converted name for query sequences and convert them
371
- # - back when placements are done
372
- tmp_q_dict = {qname_map[name] : qaln[name] for name in query_list}
453
+ tmp_q_dict = {name : qaln[name] for name in query_list}
373
454
  write_fasta(tmp_aln, tmp_ref_dict)
374
455
  write_fasta(tmp_qaln, tmp_q_dict)
375
456
 
@@ -385,7 +466,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
385
466
  job = EPAngJob(path=Configs.epang_path,
386
467
  info_path=Configs.info_path, tree_path=tmp_tree,
387
468
  aln_path=tmp_aln, qaln_path=tmp_qaln,
388
- outdir=subtree_dir, num_cpus=Configs.num_cpus)
469
+ outdir=subtree_dir, num_cpus=Configs.cpus_per_job)
389
470
  jobs.append(job)
390
471
  ## for EPA-ng, ensure that outpath name is changed to the one we want
391
472
  #_outpath = job.run(logging=f'subtree_{final_subtree_count}')
@@ -405,7 +486,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
405
486
  job = PplacerTaxtasticJob(path=Configs.pplacer_path,
406
487
  refpkg_dir=refpkg_dir,
407
488
  #molecule=Configs.molecule, model=Configs.model,
408
- outpath=tmp_output, num_cpus=Configs.num_cpus,
489
+ outpath=tmp_output, num_cpus=Configs.cpus_per_job,
409
490
  qaln_path=tmp_qaln)
410
491
  #tmp_output = job.run(logging=f'subtree_{final_subtree_count}')
411
492
  jobs.append(job)
@@ -445,8 +526,16 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
445
526
 
446
527
  for tmp_place in place_json["placements"]:
447
528
  # convert qname back using qname_map_rev
448
- qname = qname_map_rev[tmp_place[tgt][0]]
449
- tmp_place[tgt][0] = qname
529
+ tmp_name = tmp_place[tgt][0]
530
+
531
+ # >EPA-ng: tgt=="n" --> qname is string
532
+ if isinstance(tmp_name, str):
533
+ qname = qname_map_rev[tmp_name]
534
+ tmp_place[tgt][0] = qname
535
+ # >pplacer: tgt=="nm" --> qname is a list of two fields
536
+ elif isinstance(tmp_name, list):
537
+ qname = qname_map_rev[tmp_name[0]]
538
+ tmp_place[tgt][0][0] = qname
450
539
  placed_query_list.append(qname)
451
540
 
452
541
  #placed_query_list.append(tmp_place[tgt][0])
@@ -505,6 +594,7 @@ def placeQueriesToSubtrees(tree, leaf_dict, new_subtree_dict, placed_query_list,
505
594
  _LOG.info('Time to place queries to subtrees: {} seconds'.format(t1 - t0))
506
595
  return jplace
507
596
 
597
+
508
598
  '''
509
599
  Function to write a given jplace object to local output
510
600
  '''
bscampp/jobs.py CHANGED
@@ -112,7 +112,7 @@ class Job(object):
112
112
  else:
113
113
  _LOG.error(error_msg + '\nSTDOUT: ' + stdout +
114
114
  '\nSTDERR: ' + stderr + logpath)
115
- exit(1)
115
+ exit(self.returncode)
116
116
  except Exception:
117
117
  log_exception(_LOG)
118
118
 
@@ -123,6 +123,18 @@ class Job(object):
123
123
  raise NotImplementedError(
124
124
  'get_invocation() should be implemented by subclasses.')
125
125
 
126
+ '''
127
+ Generic job that runs the given command, represented as a list of strings
128
+ '''
129
+ class GenericJob(Job):
130
+ def __init__(self, cmd=[], job_type='external'):
131
+ Job.__init__(self)
132
+ self.job_type = job_type
133
+ self.cmd = cmd
134
+
135
+ def get_invocation(self):
136
+ return self.cmd, None
137
+
126
138
  '''
127
139
  A EPA-ng job that runs EPA-ng with given parameters
128
140
  '''
bscampp/pipeline.py CHANGED
@@ -180,8 +180,6 @@ def clean_temp_files():
180
180
  _LOG.info(f'- Removed {temp}')
181
181
 
182
182
  def parseArguments(dry_run=False, method="BSCAMPP"):
183
- global _root_dir, main_config_path
184
-
185
183
  default_outdir = f"{method.lower()}_output"
186
184
  default_outname = f"{method.lower()}_result"
187
185
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: bscampp
3
- Version: 1.0.5
3
+ Version: 1.0.7
4
4
  Summary: BSCAMPP and SCAMPP - Scalable Phylogenetic Placement Tools
5
5
  Author-email: Eleanor Wedell <ewedell2@illinois.edu>, Chengze Shen <chengze5@illinois.edu>
6
6
  License: MIT License
@@ -49,12 +49,13 @@ Requires-Dist: ConfigParser>=5.0.0
49
49
  Requires-Dist: numpy>=1.21.6
50
50
  Requires-Dist: treeswift>=1.1.45
51
51
  Requires-Dist: taxtastic>=0.9.3
52
+ Dynamic: license-file
52
53
 
53
54
  # BSCAMPP and SCAMPP - Two Scalable Phylogenetic Placement Methods and Frameworks
55
+ [![PyPI - Version](https://img.shields.io/pypi/v/bscampp)](https://pypi.org/project/bscampp/#history)
54
56
  [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/bscampp)](https://pypi.org/project/bscampp/)
55
- [![PyPI - Version](https://img.shields.io/pypi/v/bscampp?color=blue)](https://pypi.org/project/bscampp/#history)
56
57
  [![Build Status](https://img.shields.io/github/actions/workflow/status/ewedell/BSCAMPP/python-package.yml?branch=main&label=build)](https://github.com/ewedell/BSCAMPP/)
57
- [![PyPI - License](https://img.shields.io/pypi/l/bscampp?color=blue)](https://github.com/ewedell/BSCAMPP/blob/main/LICENSE)
58
+ [![PyPI - License](https://img.shields.io/pypi/l/bscampp)](https://github.com/ewedell/BSCAMPP/blob/main/LICENSE)
58
59
  [![Changelog](https://img.shields.io/badge/CHANGELOG-grey)](https://github.com/ewedell/BSCAMPP/blob/main/CHANGELOG.md)
59
60
 
60
61
  **Table of Contents**
@@ -1,10 +1,10 @@
1
- bscampp/__init__.py,sha256=ER9JtHb4EYnE1qyPBUmkpbsMeRC_4JDHUla46QUoInw,2289
2
- bscampp/configs.py,sha256=perl6u5hto6J3JV1JMbsTQ6tqr2uGOk-Z9jfzflid0s,6122
1
+ bscampp/__init__.py,sha256=iWcT9i33I9mkOxFgfa7aRxjn_tRPYY7yW6zcuhbjTWk,2289
2
+ bscampp/configs.py,sha256=M53nndokxi3WczhpKGWpxOcI2nXIkgpQKhj5gYpee8w,6101
3
3
  bscampp/default.config,sha256=CEfsUHBy--vwJhEcUuJ0btfuGQWb_lKMVWUIP9f5YGw,112
4
- bscampp/functions.py,sha256=qzlxW-bIJi0woStCzraALPb6VEPlO3CdPfCdfQqT2fQ,20119
4
+ bscampp/functions.py,sha256=ywv3-h1l81YayioSTmNiciBZnbi56zyIGc-5Ni-jXG4,23016
5
5
  bscampp/init_configs.py,sha256=EA9sMN5jWj6zj2b-7tN19LhX2Ef61ByQLxQRLHAqLDM,3600
6
- bscampp/jobs.py,sha256=1FdvpSX_5VxNmJCCYAMdBKy8n68O1TjSET4XU1QULq0,7252
7
- bscampp/pipeline.py,sha256=IPZnXZmVxGGfbVUuGCQh5X9oBq48-6pA9QkuvMGPTag,14000
6
+ bscampp/jobs.py,sha256=v7buZJs1AnNoXiILwu-W8fo3QjxAh3i9Mp7xfmlJvAY,7569
7
+ bscampp/pipeline.py,sha256=J-RQH54R27m6fhzIpGX0MJuE3ZFk5rcnsROpwC_n5CE,13960
8
8
  bscampp/utils.py,sha256=-wns6FaWMKD2wVqjxdBQvjTdagTjywBIaGfqb2mupe4,30039
9
9
  bscampp/tools/epa-ng,sha256=f3EVoZAAOXLN6l521qp-TrWDl5J2nqL3tGgjPaQE9WQ,3772096
10
10
  bscampp/tools/pplacer,sha256=p0H4eo9uuiYoWS_kJbPfauOV99i7BXJdZSiwXIuLxTw,7834576
@@ -17,9 +17,9 @@ bscampp/tools/hamming_distance/src/fragment_tree_hamming.cpp,sha256=xCmyAT-OZJOD
17
17
  bscampp/tools/hamming_distance/src/fragment_tree_hamming_new.cpp,sha256=eKxgODRlpf0hU84QjNhigvRhWCT9tiJZjA5oQFQ1bUk,7404
18
18
  bscampp/tools/hamming_distance/src/homology.cpp,sha256=ZE0uXZWQ-cN4U1Wk5kUr_KKHgzsgA6Sno-IViRa4tmI,6053
19
19
  bscampp/tools/hamming_distance/src/new_hamming.cpp,sha256=fBRm99RquBZgZjaLOn9xDI3cH9NchhrxKbL-11j8fmk,5342
20
- bscampp-1.0.5.dist-info/LICENSE,sha256=HEa4YQdOR0e2Gz-NiOwr9X6aJcZtY0AGmlJQDmfN0Iw,1064
21
- bscampp-1.0.5.dist-info/METADATA,sha256=Nz0xmODp6N_e-u2VAgvegoVKW7134rM7UroUYqO7B0Q,12602
22
- bscampp-1.0.5.dist-info/WHEEL,sha256=5U-5D1CS1IlCq2UZGreCPlzbpvhviDLR_iCQyI6CTvY,91
23
- bscampp-1.0.5.dist-info/entry_points.txt,sha256=4Ft83qHc39tNNpMLgSgFXDHM-vuAB99JtmczCQj5pq8,204
24
- bscampp-1.0.5.dist-info/top_level.txt,sha256=1loGRUAft6Tcdq0f3lHbVwWN7W_SW1srfhAVSpg9DWE,8
25
- bscampp-1.0.5.dist-info/RECORD,,
20
+ bscampp-1.0.7.dist-info/licenses/LICENSE,sha256=HEa4YQdOR0e2Gz-NiOwr9X6aJcZtY0AGmlJQDmfN0Iw,1064
21
+ bscampp-1.0.7.dist-info/METADATA,sha256=2Tk1eF72pNPIttqSsfwiNclB8H0bShbMhca0aAr8wKo,12602
22
+ bscampp-1.0.7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
23
+ bscampp-1.0.7.dist-info/entry_points.txt,sha256=4Ft83qHc39tNNpMLgSgFXDHM-vuAB99JtmczCQj5pq8,204
24
+ bscampp-1.0.7.dist-info/top_level.txt,sha256=1loGRUAft6Tcdq0f3lHbVwWN7W_SW1srfhAVSpg9DWE,8
25
+ bscampp-1.0.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.9.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5