biopipen 0.31.6__py3-none-any.whl → 0.31.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

biopipen/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.31.6"
1
+ __version__ = "0.31.7"
biopipen/ns/bam.py CHANGED
@@ -301,3 +301,31 @@ class BamSampling(Proc):
301
301
  "sort_args": [],
302
302
  }
303
303
  script = "file://../scripts/bam/BamSampling.py"
304
+
305
+
306
+ class BamSubsetByBed(Proc):
307
+ """Subset bam file by the regions in a bed file
308
+
309
+ Input:
310
+ bamfile: The bam file
311
+ bedfile: The bed file
312
+
313
+ Output:
314
+ outfile: The output bam file
315
+
316
+ Envs:
317
+ ncores: Number of cores to use
318
+ samtools: Path to samtools executable
319
+ tool: The tool to use, currently only "samtools" is supported
320
+ index: Whether to index the output bam file
321
+ """
322
+ input = "bamfile:file, bedfile:file"
323
+ output = "outfile:file:{{in.bamfile | stem}}-subset.bam"
324
+ lang = config.lang.python
325
+ envs = {
326
+ "ncores": config.misc.ncores,
327
+ "samtools": config.exe.samtools,
328
+ "tool": "samtools",
329
+ "index": True,
330
+ }
331
+ script = "file://../scripts/bam/BamSubsetByBed.py"
biopipen/ns/bed.py CHANGED
@@ -198,3 +198,43 @@ class BedtoolsIntersect(Proc):
198
198
  "postcmd": None,
199
199
  }
200
200
  script = "file://../scripts/bed/BedtoolsIntersect.py"
201
+
202
+
203
+ class BedtoolsMakeWindows(Proc):
204
+ """Make windows from a BED file or genome size file, using `bedtools makewindows`.
205
+
206
+ Input:
207
+ infile: The input BED file or a genome size file
208
+ Type will be detected by the number of columns in the file.
209
+ If it has 3+ columns, it is treated as a BED file, otherwise
210
+ a genome size file.
211
+
212
+ Output:
213
+ outfile: The output BED file
214
+
215
+ Envs:
216
+ bedtools: The path to bedtools
217
+ window (type=int): The size of the windows
218
+ step (type=int): The step size of the windows
219
+ nwin (type=int): The number of windows to be generated
220
+ Exclusive with `window` and `step`.
221
+ Either `nwin` or `window` and `step` should be provided.
222
+ reverse (flag): Reverse numbering of windows in the output
223
+ name (choice): How to name the generated windows/regions
224
+ - none: Do not add any name
225
+ - src: Use the source interval's name
226
+ - winnum: Use the window number
227
+ - srcwinnum: Use the source interval's name and window number
228
+ """ # noqa: E501
229
+ input = "infile:file"
230
+ output = "outfile:file:{{in.infile | stem}}_windows.bed"
231
+ lang = config.lang.python
232
+ envs = {
233
+ "bedtools": config.exe.bedtools,
234
+ "window": None,
235
+ "step": None,
236
+ "nwin": None,
237
+ "reverse": False,
238
+ "name": "none",
239
+ }
240
+ script = "file://../scripts/bed/BedtoolsMakeWindows.py"
@@ -0,0 +1,38 @@
1
+ from pathlib import Path
2
+ from biopipen.utils.misc import run_command, logger
3
+
4
+ # using:
5
+ # samtools view --subsample 0.1 --subsample-seed 1234 --threads 4 -b -o out.bam in.bam
6
+
7
+ bamfile = {{ in.bamfile | repr }} # pyright: ignore # noqa
8
+ bedfile = {{ in.bedfile | repr }} # pyright: ignore # noqa
9
+ outfile = Path({{ out.outfile | repr }}) # pyright: ignore
10
+ ncores = {{ envs.ncores | int }} # pyright: ignore
11
+ samtools = {{ envs.samtools | repr }} # pyright: ignore
12
+ tool = {{ envs.tool | repr }} # pyright: ignore
13
+ should_index = {{ envs.index | repr }} # pyright: ignore
14
+
15
+ if tool != "samtools":
16
+ raise ValueError(
17
+ f"Tool {tool} is not supported. "
18
+ "Currently only samtools is supported."
19
+ )
20
+
21
+ cmd = [
22
+ samtools,
23
+ "view",
24
+ "--target-file",
25
+ bedfile,
26
+ "-b",
27
+ "--threads",
28
+ ncores,
29
+ "-o",
30
+ outfile,
31
+ bamfile
32
+ ]
33
+ run_command(cmd, fg=True)
34
+
35
+ if should_index:
36
+ logger.info("Indexing the output bam file.")
37
+ cmd = [samtools, "index", "-@", ncores, outfile]
38
+ run_command(cmd, fg=True)
@@ -0,0 +1,47 @@
1
+ from pathlib import Path
2
+ from biopipen.utils.misc import run_command, logger
3
+
4
+ infile = Path({{in.afile | repr}}) # pyright: ignore # noqa: #999
5
+ outfile = Path({{in.bfile | repr}}) # pyright: ignore
6
+ bedtools = {{envs.bedtools | repr}} # pyright: ignore
7
+ window = {{envs.window | repr}} # pyright: ignore
8
+ step = {{envs.step | repr}} # pyright: ignore
9
+ nwin = {{envs.nwin | repr}} # pyright: ignore
10
+ reverse = {{envs.reverse | repr}} # pyright: ignore
11
+ name = {{envs.name | repr}} # pyright: ignore
12
+
13
+ if nwin is None and window is None:
14
+ raise ValueError("Either `nwin` or `window` should be provided.")
15
+
16
+ if nwin is not None and window is not None:
17
+ raise ValueError("Either `nwin` or `window` should be provided, not both.")
18
+
19
+ # detect if infile is a genome size file or a bed file
20
+ with infile.open() as f:
21
+ line = f.readline().strip()
22
+ if len(line.split("\t")) > 2:
23
+ is_bed = True
24
+ else:
25
+ is_bed = False
26
+
27
+ if is_bed:
28
+ logger.info("BED file is detected as input.")
29
+ cmd = [bedtools, "makewindows", "-b", infile]
30
+ else:
31
+ logger.info("Genome size file is detected as input.")
32
+ cmd = [bedtools, "makewindows", "-g", infile]
33
+
34
+ if nwin:
35
+ cmd.extend(["-n", nwin])
36
+ elif step is not None:
37
+ cmd.extend(["-w", window, "-s", step])
38
+ else:
39
+ cmd.extend(["-w", window])
40
+
41
+ if reverse:
42
+ cmd.append("-reverse")
43
+
44
+ if name != "none":
45
+ cmd.extend(["-name", name])
46
+
47
+ run_command(cmd, stdout=outfile)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biopipen
3
- Version: 0.31.6
3
+ Version: 0.31.7
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang
@@ -1,4 +1,4 @@
1
- biopipen/__init__.py,sha256=KU7MsdICtcB5jVm5DAaNainBCUqYItaZLSuj12ONgkE,23
1
+ biopipen/__init__.py,sha256=APQVRwZptBFPacKCHqg_tW4g4--qdUtMJoA6GprSuSI,23
2
2
  biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
4
4
  biopipen/core/config.toml,sha256=7IXvviRicZ2D1h6x3BVgbLJ96nsh-ikvZ0sVlQepqFE,1944
@@ -7,8 +7,8 @@ biopipen/core/filters.py,sha256=5bZsbpdW7DCxqiteRdb2gelmXvfqWPmPsFxrpHdWsoE,1298
7
7
  biopipen/core/proc.py,sha256=60lUP3PcUAaKbDETo9N5PEIoeOYrLgcSmuytmrhcx8g,912
8
8
  biopipen/core/testing.py,sha256=lZ_R5ZbYPO2NPuLHdbzg6HbD_f4j8paVVbyeUqwg6FE,3411
9
9
  biopipen/ns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- biopipen/ns/bam.py,sha256=-xVagotsURyOtwKzv72L-2f9P7467OVzgvP96syfHZc,10628
11
- biopipen/ns/bed.py,sha256=HsTCJge7XNfCZyCBJ4iifNKQ5we4VZSpRx8XL8--y5A,6689
10
+ biopipen/ns/bam.py,sha256=OtvzEadx-zpr98PoZoozbQszcQBvtFDMuAtuwH-3JUw,11394
11
+ biopipen/ns/bed.py,sha256=EqpSa7Hx6GImvJNghtV4uDo2PnPXeUt1Yq9AFWJP9_8,8159
12
12
  biopipen/ns/cellranger.py,sha256=yPBoNzVSY74J7uyVucaob5lqZKKru5-hYSM4f4Nr2OY,5553
13
13
  biopipen/ns/cellranger_pipeline.py,sha256=EWkPJTujamNSMQoRnKfhUiIj6TkMfRmCSUbPfd8Tv8E,4011
14
14
  biopipen/ns/cnv.py,sha256=ssQAHf2MB675avoXVRkYy0vGiqIpRpRExywjhbymmBI,7811
@@ -79,6 +79,7 @@ biopipen/reports/vcf/TruvariConsistency.svelte,sha256=BBvtxi1EPmGH7j5M5zMOcLEhKW
79
79
  biopipen/scripts/bam/BamMerge.py,sha256=Gd5P8V-CSsTAA8ZrUxetR-I49GjJ3VJNjrqu7-EZwXQ,3642
80
80
  biopipen/scripts/bam/BamSampling.py,sha256=Pi6CXAbBFVRGh8-0WrkB-3v3oxinfahQk11H0IdBNmQ,2312
81
81
  biopipen/scripts/bam/BamSplitChroms.py,sha256=b7GS2I4X0dLOhlPg_r9-buoIHTWlq6zHI3Rox94LXR8,4893
82
+ biopipen/scripts/bam/BamSubsetByBed.py,sha256=QpY6WDJfbO3k2FdMyfgstFKgTdtOc1beGoUF5FI5EAc,1027
82
83
  biopipen/scripts/bam/CNAClinic.R,sha256=mQXwtShL54HZXGCPqgPKPrU74_6K_8PqtOtG0mgA-F0,5062
83
84
  biopipen/scripts/bam/CNVpytor.py,sha256=hOUli9BDMOoth0or-tjUYC1AP3yNOuxUS6G3Rhcg99s,18000
84
85
  biopipen/scripts/bam/ControlFREEC.py,sha256=oX6iWsos-CfiT_ViDBrKeMOOIVdCKWrB-_MqzLgEF9s,3267
@@ -86,6 +87,7 @@ biopipen/scripts/bed/Bed2Vcf.py,sha256=u0mp_2Y4UtEA839zq9UENesH6Gyiwd4sZQW9wFnBV
86
87
  biopipen/scripts/bed/BedConsensus.py,sha256=gfAxuIalvCEpS0tiOyAJGPYGgHN0L-hm0K37Iteh5yw,2386
87
88
  biopipen/scripts/bed/BedLiftOver.sh,sha256=Y4gBsz9w4zhE29UmWojO6F4PXMMMWC1uCzjrxa19eOs,256
88
89
  biopipen/scripts/bed/BedtoolsIntersect.py,sha256=sFyXPL3kG59xa5eJwHumcQLw3lfabOXsq2-k8IgIqt4,1722
90
+ biopipen/scripts/bed/BedtoolsMakeWindows.py,sha256=Ip4U0ORXti65g7znZeHEbvw2PGlCxoEfeucZmw4wb1o,1428
89
91
  biopipen/scripts/bed/BedtoolsMerge.py,sha256=7mt307V_wWa_ME0VfuMsVX0HgEwfDcZtY_bDvOPjFiQ,368
90
92
  biopipen/scripts/cellranger/CellRangerCount.py,sha256=b9kkHPpq-bFh-3XCUdxdMKg3SsZmEzZFrG3dbtb4fX8,2875
91
93
  biopipen/scripts/cellranger/CellRangerSummary.R,sha256=mVOCIHngEpJIKVD3tMG5UWqS0OQGGjY6yx6ikRcqQU4,11067
@@ -284,7 +286,7 @@ biopipen/utils/reference.py,sha256=oi5evicLwHxF0KAIPNZohBeHJLJQNWFJH0cr2y5pgcg,5
284
286
  biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
285
287
  biopipen/utils/single_cell.R,sha256=pJjYP8bIZpNAtTQ32rOXhZxaM1Y-6D-xUcK3pql9tbk,4316
286
288
  biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
287
- biopipen-0.31.6.dist-info/METADATA,sha256=2NGpF5QMNq7lG0y8MQIGpfFYyRE9lYz17RpF6dEtq0k,882
288
- biopipen-0.31.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
289
- biopipen-0.31.6.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
290
- biopipen-0.31.6.dist-info/RECORD,,
289
+ biopipen-0.31.7.dist-info/METADATA,sha256=dmsDb7Q7iTWvkZjChqUgVqhb2CiQP4j8OA0jBzRYgGY,882
290
+ biopipen-0.31.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
291
+ biopipen-0.31.7.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
292
+ biopipen-0.31.7.dist-info/RECORD,,