bbmapy 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bbmapy/__init__.py +9 -0
- bbmapy/_version.py +16 -0
- bbmapy/base.py +78 -0
- bbmapy/commands.py +13448 -0
- bbmapy/ex.py +107 -0
- bbmapy/scanner.py +93 -0
- bbmapy-0.0.2.dist-info/LICENSE +1 -0
- bbmapy-0.0.2.dist-info/METADATA +148 -0
- bbmapy-0.0.2.dist-info/RECORD +12 -0
- bbmapy-0.0.2.dist-info/WHEEL +5 -0
- bbmapy-0.0.2.dist-info/entry_points.txt +2 -0
- bbmapy-0.0.2.dist-info/top_level.txt +1 -0
bbmapy/ex.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from bbmapy import bbduk, bbmap, reformat, bbmerge
|
|
3
|
+
|
|
4
|
+
def test_bbduk():
|
|
5
|
+
print("Testing bbduk...")
|
|
6
|
+
bbduk(
|
|
7
|
+
in_file="input.fastq",
|
|
8
|
+
out="output_bbduk.fastq",
|
|
9
|
+
ref="adapters.fa",
|
|
10
|
+
ktrim="r",
|
|
11
|
+
k=23,
|
|
12
|
+
mink=11,
|
|
13
|
+
hdist=1,
|
|
14
|
+
tpe=True,
|
|
15
|
+
tbo=True,
|
|
16
|
+
Xmx="1g"
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
def test_bbmap():
|
|
20
|
+
print("Testing bbmap...")
|
|
21
|
+
bbmap(
|
|
22
|
+
in_file="input.fastq",
|
|
23
|
+
out="output_bbmap.sam",
|
|
24
|
+
ref="reference.fa",
|
|
25
|
+
Xmx="2g",
|
|
26
|
+
t=4,
|
|
27
|
+
vslow=True
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def test_reformat():
|
|
31
|
+
print("Testing reformat...")
|
|
32
|
+
reformat(
|
|
33
|
+
in_file="input.fastq",
|
|
34
|
+
out="output_reformat.fasta",
|
|
35
|
+
fastawrap=80,
|
|
36
|
+
qin=33,
|
|
37
|
+
qout=64
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def test_bbmerge():
|
|
41
|
+
print("Testing bbmerge...")
|
|
42
|
+
bbmerge(
|
|
43
|
+
in1="input_1.fastq",
|
|
44
|
+
in2="input_2.fastq",
|
|
45
|
+
out="output_merged.fastq",
|
|
46
|
+
outu1="unmerged_1.fastq",
|
|
47
|
+
outu2="unmerged_2.fastq",
|
|
48
|
+
strict=True,
|
|
49
|
+
k=60,
|
|
50
|
+
extend2=50,
|
|
51
|
+
rem=True,
|
|
52
|
+
Xmx="1g"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def test_capture_output():
|
|
56
|
+
print("Testing output capture...")
|
|
57
|
+
stdout, stderr = bbduk(
|
|
58
|
+
capture_output=True,
|
|
59
|
+
in_file="input.fastq",
|
|
60
|
+
out="output_capture.fastq",
|
|
61
|
+
ref="adapters.fa",
|
|
62
|
+
ktrim="r",
|
|
63
|
+
k=23
|
|
64
|
+
)
|
|
65
|
+
print("Captured stdout:", stdout[:100] + "..." if stdout else "None")
|
|
66
|
+
print("Captured stderr:", stderr[:100] + "..." if stderr else "None")
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__":
|
|
69
|
+
try:
|
|
70
|
+
os.mkdir("test")
|
|
71
|
+
except:
|
|
72
|
+
os.chdir("test")
|
|
73
|
+
# Create dummy input files for testing
|
|
74
|
+
with open("input.fastq", "w") as f:
|
|
75
|
+
f.write("@seq1\nACGT\n+\nIIII\n")
|
|
76
|
+
with open("input_1.fastq", "w") as f:
|
|
77
|
+
f.write("@seq1\nACGT\n+\nIIII\n")
|
|
78
|
+
with open("input_2.fastq", "w") as f:
|
|
79
|
+
f.write("@seq1\nTGCA\n+\nIIII\n")
|
|
80
|
+
with open("reference.fa", "w") as f:
|
|
81
|
+
f.write(">ref1\nACGTACGTACGT\n")
|
|
82
|
+
with open("adapters.fa", "w") as f:
|
|
83
|
+
f.write(">adapter1\nACGTACGT\n")
|
|
84
|
+
|
|
85
|
+
# Run tests
|
|
86
|
+
test_bbduk()
|
|
87
|
+
test_bbmap()
|
|
88
|
+
# test_reformat()
|
|
89
|
+
test_bbmerge()
|
|
90
|
+
test_capture_output()
|
|
91
|
+
|
|
92
|
+
stdout, stderr = bbduk(
|
|
93
|
+
capture_output=True,
|
|
94
|
+
in_file="phiX174.fasta",
|
|
95
|
+
out="output_capture.fastq",
|
|
96
|
+
ref="phix",
|
|
97
|
+
ktrim="r",
|
|
98
|
+
k=23
|
|
99
|
+
)
|
|
100
|
+
# Clean up dummy files
|
|
101
|
+
for file in ["input.fastq", "input_1.fastq", "input_2.fastq", "reference.fa", "adapters.fa",
|
|
102
|
+
"output_bbduk.fastq", "output_bbmap.sam", "output_reformat.fasta",
|
|
103
|
+
"output_merged.fastq", "unmerged_1.fastq", "unmerged_2.fastq", "output_capture.fastq"]:
|
|
104
|
+
if os.path.exists(file):
|
|
105
|
+
os.remove(file)
|
|
106
|
+
|
|
107
|
+
print("All tests completed.")
|
bbmapy/scanner.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import glob
|
|
3
|
+
import re
|
|
4
|
+
from bbmapy.base import find_bbtools_path
|
|
5
|
+
|
|
6
|
+
def extract_help_message(file_path):
|
|
7
|
+
with open(file_path, 'r') as f:
|
|
8
|
+
content = f.read()
|
|
9
|
+
usage_match = re.search(r'usage\(\)\s*{\s*echo\s*"(.*?)"', content, re.DOTALL)
|
|
10
|
+
if usage_match:
|
|
11
|
+
help_message = usage_match.group(1).strip()
|
|
12
|
+
help_message = help_message.replace('in=', 'in_file=')
|
|
13
|
+
return help_message
|
|
14
|
+
return "No help message found."
|
|
15
|
+
|
|
16
|
+
def scan_bbtools():
|
|
17
|
+
bbtools_path = find_bbtools_path()
|
|
18
|
+
sh_files = glob.glob(os.path.join(bbtools_path, "*.sh"))
|
|
19
|
+
valid_tools = []
|
|
20
|
+
|
|
21
|
+
for sh_file in sh_files:
|
|
22
|
+
with open(sh_file, 'r') as f:
|
|
23
|
+
content = f.read()
|
|
24
|
+
if "bbmap" in content.lower() or "brian" in content.lower():
|
|
25
|
+
tool_name = os.path.basename(sh_file)[:-3] # Remove .sh extension
|
|
26
|
+
help_message = extract_help_message(sh_file)
|
|
27
|
+
valid_tools.append((tool_name, help_message))
|
|
28
|
+
|
|
29
|
+
generate_commands_file(valid_tools)
|
|
30
|
+
|
|
31
|
+
def generate_commands_file(tools):
|
|
32
|
+
output_path = os.path.join(os.path.dirname(__file__), "commands.py")
|
|
33
|
+
with open(output_path, "w") as f:
|
|
34
|
+
f.write("from typing import Union, Tuple\n")
|
|
35
|
+
f.write("from bbpy.base import _pack_args, _run_command\n\n")
|
|
36
|
+
|
|
37
|
+
for tool, help_message in tools:
|
|
38
|
+
method_name = tool.replace('-', '_')
|
|
39
|
+
f.write(f"""
|
|
40
|
+
def {method_name}(capture_output: bool = False, **kwargs) -> Union[None, Tuple[str, str]]:
|
|
41
|
+
\"\"\"
|
|
42
|
+
Wrapper for {tool}.sh
|
|
43
|
+
|
|
44
|
+
Help message:
|
|
45
|
+
{help_message}
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
capture_output (bool): If True, capture and return the output instead of printing it.
|
|
49
|
+
in_file (str): Input file (replaces 'in=' parameter)
|
|
50
|
+
**kwargs: Other arguments for {tool}.sh
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Union[None, Tuple[str, str]]: If capture_output is True, returns (stdout, stderr), else None.
|
|
54
|
+
\"\"\"
|
|
55
|
+
args = _pack_args(kwargs)
|
|
56
|
+
return _run_command("{tool}.sh", args, capture_output)
|
|
57
|
+
""")
|
|
58
|
+
|
|
59
|
+
def main():
|
|
60
|
+
print("Scanning BBTools and generating command wrappers...")
|
|
61
|
+
scan_bbtools()
|
|
62
|
+
print("Command wrappers generated successfully.")
|
|
63
|
+
|
|
64
|
+
if __name__ == "__main__":
|
|
65
|
+
main()
|
|
66
|
+
|
|
67
|
+
# # test:
|
|
68
|
+
# from bbmapy import *
|
|
69
|
+
# # # bbtools = BBTools()
|
|
70
|
+
# bbtools.bbduk()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# stdout, stderr = bbtools.bbduk(
|
|
74
|
+
# capture_output=True,
|
|
75
|
+
# in_file="input.fastq",
|
|
76
|
+
# out="output2.fastq",
|
|
77
|
+
# ktrim="r",
|
|
78
|
+
# k="23",
|
|
79
|
+
# mink="11",
|
|
80
|
+
# hdist="1",
|
|
81
|
+
# tbo=True,
|
|
82
|
+
# tpe=True,
|
|
83
|
+
# minlen="45",
|
|
84
|
+
# ref="adapters",
|
|
85
|
+
# ftm="5",
|
|
86
|
+
# maq="6",
|
|
87
|
+
# maxns="1",
|
|
88
|
+
# ordered=True,
|
|
89
|
+
# memory="6g",
|
|
90
|
+
# threads="4",
|
|
91
|
+
# overwrite="t",
|
|
92
|
+
# stats="stats2.txt"
|
|
93
|
+
# )
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: bbmapy
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: A Python wrapper for BBTools
|
|
5
|
+
License:
|
|
6
|
+
|
|
7
|
+
Project-URL: Homepage, https://github.com/urineri/bbmapy
|
|
8
|
+
Project-URL: Repository, https://github.com/urineri/bbmapy.git
|
|
9
|
+
Project-URL: BBTools Repository, https://bitbucket.org/berkeleylab/jgi-bbtools/src/master/
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: rich>=10.0.0
|
|
17
|
+
|
|
18
|
+
# ppby
|
|
19
|
+
|
|
20
|
+
A Python wrapper for BBTools.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
1. Lazy way - Clone this repository, download bbmap, and install the package:
|
|
24
|
+
```
|
|
25
|
+
git clone https://github.com/urineri/ppby.git
|
|
26
|
+
cd ppby
|
|
27
|
+
cd vendor
|
|
28
|
+
rm bb* -rf
|
|
29
|
+
wget https://sourceforge.net/projects/bbmap/files/latest/download -O bbtools.tar.gz
|
|
30
|
+
tar -xf bbtools.tar.gz
|
|
31
|
+
cd ..
|
|
32
|
+
pip install -e .
|
|
33
|
+
generate-ppby-commands
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
2. Unlazy way - is commented out below this line, that's how much it isn't suggested.
|
|
37
|
+
<!-- Add the BBTools submodule:
|
|
38
|
+
```
|
|
39
|
+
git submodule add https://bitbucket.org/berkeleylab/jgi-bbtools.git vendor/bbtools
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
3. Initialize and update the submodule:
|
|
43
|
+
```
|
|
44
|
+
git submodule init
|
|
45
|
+
git submodule update
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
4. Install the package:
|
|
49
|
+
```
|
|
50
|
+
pip install .
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
5. Generate the commands:
|
|
54
|
+
```
|
|
55
|
+
generate-ppby-commands
|
|
56
|
+
```
|
|
57
|
+
6. Now actually delete the git submodule in vendor and replace it with the sourceforge version (i.e. the lazy way):
|
|
58
|
+
```
|
|
59
|
+
cd vendor/
|
|
60
|
+
rm bb* -rf
|
|
61
|
+
wget https://sourceforge.net/projects/bbmap/files/latest/download -O bbtools.tar.gz
|
|
62
|
+
tar -xf bbtools.tar.gz
|
|
63
|
+
|
|
64
|
+
``` -->
|
|
65
|
+
<!--
|
|
66
|
+
Note: Steps 2 and 3 are only necessary if you're setting up the project for the first time or if the submodule hasn't been added yet. If you're cloning the repository and the submodule has already been added, you can use: -->
|
|
67
|
+
<!--
|
|
68
|
+
```
|
|
69
|
+
git clone --recurse-submodules https://github.com/yourusername/ppby.git
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
This will clone the repository and initialize the submodule in one step... I think? -->
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
## Dependencies
|
|
77
|
+
- Java
|
|
78
|
+
- BBmap (the above steps should download it to the vendor subdirectory...).
|
|
79
|
+
If you rather this to use your own bbmap etc, go to the base.py script and comment out line 51, then uncomment line 52.
|
|
80
|
+
- rich (for pretty printing)
|
|
81
|
+
|
|
82
|
+
## Usage
|
|
83
|
+
|
|
84
|
+
After installation, you can use ppby in your Python scripts like this:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from ppby import bbduk
|
|
88
|
+
|
|
89
|
+
# Basic usage
|
|
90
|
+
bbduk(
|
|
91
|
+
in_file="input.fastq",
|
|
92
|
+
out="output.fastq",
|
|
93
|
+
ktrim="r",
|
|
94
|
+
k="23",
|
|
95
|
+
mink="11",
|
|
96
|
+
hdist="1",
|
|
97
|
+
tbo=True,
|
|
98
|
+
tpe=True,
|
|
99
|
+
minlen="45",
|
|
100
|
+
ref="adapters",
|
|
101
|
+
ftm="5",
|
|
102
|
+
maq="6",
|
|
103
|
+
maxns="1",
|
|
104
|
+
ordered=True,
|
|
105
|
+
threads="4",
|
|
106
|
+
overwrite="t",
|
|
107
|
+
stats="stats.txt"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Using Java flags alongside other arguments
|
|
111
|
+
bbduk(
|
|
112
|
+
Xmx="2g", # Set maximum heap size
|
|
113
|
+
da=True, # Enable assertions
|
|
114
|
+
eoom=True, # Enable out-of-memory termination
|
|
115
|
+
in_file="input.fastq",
|
|
116
|
+
out="output.fastq",
|
|
117
|
+
ktrim="r",
|
|
118
|
+
k="23"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# To capture output
|
|
122
|
+
stdout, stderr = bbduk(
|
|
123
|
+
capture_output=True,
|
|
124
|
+
Xmx="2g",
|
|
125
|
+
in_file="input.fastq",
|
|
126
|
+
out="stdout.fastq",
|
|
127
|
+
# ... other parameters ...
|
|
128
|
+
)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
#### Notes:
|
|
132
|
+
* `in` can be a protected word in python and other code, it is replaced by `in_file` in function calls. `in1`, `in2` are still valid.
|
|
133
|
+
* Java flags (such as `Xmx`, `Xms`, `da`, `ea`, `eoom`) are automatically recognized and handled appropriately. Include them in your function calls just like any other argument.
|
|
134
|
+
* the `capture_output` argument might be switched (stderr --> out and vice verse).
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
## License
|
|
139
|
+
|
|
140
|
+
This project is only a wrapper, please see the actual bbtools repository for license etc.
|
|
141
|
+
Neither the developers of bbtools nor of ppby take any responsibility for how you use this code. All accountability is on you.
|
|
142
|
+
|
|
143
|
+
## Acknowledgments
|
|
144
|
+
|
|
145
|
+
This project only (crudely) wraps BBTools (a.k.a bbmap), which is developed by Brian Bushnell.
|
|
146
|
+
If you use ppby and things don't quite work like you'd like, don't expect the developer of bbmap to help you with this whacky python wrapper.
|
|
147
|
+
If
|
|
148
|
+
Please see the [BBTools website](https://jgi.doe.gov/data-and-tools/bbtools/) for more information about the underlying tools.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
bbmapy/__init__.py,sha256=eCGvvQcUCQENPhenw8bHSW0-WQJfF5-K0REg1_w4d-M,259
|
|
2
|
+
bbmapy/_version.py,sha256=NDHlyIcJZjLz8wKlmD1-pr6me5FHBAYwO_ynLG-37N8,411
|
|
3
|
+
bbmapy/base.py,sha256=s2gIFCTEgzwm26Hkmp-Q9SC-PevqPMjHbKbpIiu-iz0,2870
|
|
4
|
+
bbmapy/commands.py,sha256=0qDcvKRcALc9NTer03atkog_4xVnJiP1c6wNvxzm0N4,644265
|
|
5
|
+
bbmapy/ex.py,sha256=mBn2P1-bQ2grIQ8vcr9AhV5nAQQPuVO9HDsXRGo0ni0,2766
|
|
6
|
+
bbmapy/scanner.py,sha256=zxn_dYw2rtjnCjgNqDtjjhcXdA2F7wCzR6bwwlsJTAU,2742
|
|
7
|
+
bbmapy-0.0.2.dist-info/LICENSE,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
8
|
+
bbmapy-0.0.2.dist-info/METADATA,sha256=KemG83QtmnzfWdA3QYl9WVuDhWO1B4jkeLU-rdirpEk,4247
|
|
9
|
+
bbmapy-0.0.2.dist-info/WHEEL,sha256=uCRv0ZEik_232NlR4YDw4Pv3Ajt5bKvMH13NUU7hFuI,91
|
|
10
|
+
bbmapy-0.0.2.dist-info/entry_points.txt,sha256=-ssrOYU13Y7ELfPtUGijcmchb5ivwJw820tfVpzOiRw,65
|
|
11
|
+
bbmapy-0.0.2.dist-info/top_level.txt,sha256=TyfscNPCDLweR-4NeC9WNO6T9wTbqI1J3gZhyWAVlyU,7
|
|
12
|
+
bbmapy-0.0.2.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
bbmapy
|