pywombat 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pywombat/cli.py +47 -21
- {pywombat-0.1.0.dist-info → pywombat-0.2.0.dist-info}/METADATA +1 -1
- pywombat-0.2.0.dist-info/RECORD +6 -0
- {pywombat-0.1.0.dist-info → pywombat-0.2.0.dist-info}/WHEEL +1 -1
- pywombat-0.1.0.dist-info/RECORD +0 -6
- {pywombat-0.1.0.dist-info → pywombat-0.2.0.dist-info}/entry_points.txt +0 -0
pywombat/cli.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""CLI for wombat tool."""
|
|
2
2
|
|
|
3
|
+
import gzip
|
|
3
4
|
import re
|
|
4
5
|
import warnings
|
|
5
6
|
from pathlib import Path
|
|
@@ -22,9 +23,9 @@ import yaml
|
|
|
22
23
|
"-f",
|
|
23
24
|
"--format",
|
|
24
25
|
"output_format",
|
|
25
|
-
type=click.Choice(["tsv", "parquet"], case_sensitive=False),
|
|
26
|
+
type=click.Choice(["tsv", "tsv.gz", "parquet"], case_sensitive=False),
|
|
26
27
|
default="tsv",
|
|
27
|
-
help="Output format: tsv (default) or parquet.",
|
|
28
|
+
help="Output format: tsv (default), tsv.gz (compressed), or parquet.",
|
|
28
29
|
)
|
|
29
30
|
@click.option("-v", "--verbose", is_flag=True, help="Enable verbose output.")
|
|
30
31
|
@click.option(
|
|
@@ -73,7 +74,13 @@ def cli(
|
|
|
73
74
|
if verbose:
|
|
74
75
|
click.echo(f"Reading input file: {input_file}", err=True)
|
|
75
76
|
|
|
76
|
-
#
|
|
77
|
+
# Detect if file is gzipped based on extension
|
|
78
|
+
is_gzipped = str(input_file).endswith(".gz")
|
|
79
|
+
|
|
80
|
+
if verbose and is_gzipped:
|
|
81
|
+
click.echo("Detected gzipped file", err=True)
|
|
82
|
+
|
|
83
|
+
# Read the TSV file (handles both plain and gzipped)
|
|
77
84
|
df = pl.read_csv(input_file, separator="\t")
|
|
78
85
|
|
|
79
86
|
if verbose:
|
|
@@ -104,6 +111,23 @@ def cli(
|
|
|
104
111
|
click.echo(f"Reading filter config: {filter_config}", err=True)
|
|
105
112
|
filter_config_data = load_filter_config(filter_config)
|
|
106
113
|
|
|
114
|
+
# Determine output prefix
|
|
115
|
+
if output is None:
|
|
116
|
+
# Generate default output prefix from input filename
|
|
117
|
+
input_stem = input_file.name
|
|
118
|
+
# Remove .tsv.gz or .tsv extension
|
|
119
|
+
if input_stem.endswith(".tsv.gz"):
|
|
120
|
+
input_stem = input_stem[:-7] # Remove .tsv.gz
|
|
121
|
+
elif input_stem.endswith(".tsv"):
|
|
122
|
+
input_stem = input_stem[:-4] # Remove .tsv
|
|
123
|
+
|
|
124
|
+
# Add config name if filter is provided
|
|
125
|
+
if filter_config:
|
|
126
|
+
config_name = filter_config.stem # Get basename without extension
|
|
127
|
+
output = f"{input_stem}.{config_name}"
|
|
128
|
+
else:
|
|
129
|
+
output = input_stem
|
|
130
|
+
|
|
107
131
|
# Apply filters and write output
|
|
108
132
|
if filter_config_data:
|
|
109
133
|
apply_filters_and_write(
|
|
@@ -115,25 +139,19 @@ def cli(
|
|
|
115
139
|
)
|
|
116
140
|
else:
|
|
117
141
|
# No filters - write single output file
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
output_path = Path(f"{output}.{output_format}")
|
|
142
|
+
# Construct output filename with prefix and format
|
|
143
|
+
output_path = Path(f"{output}.{output_format}")
|
|
121
144
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
145
|
+
if output_format == "tsv":
|
|
146
|
+
formatted_df.write_csv(output_path, separator="\t")
|
|
147
|
+
elif output_format == "tsv.gz":
|
|
148
|
+
csv_content = formatted_df.write_csv(separator="\t")
|
|
149
|
+
with gzip.open(output_path, "wt") as f:
|
|
150
|
+
f.write(csv_content)
|
|
151
|
+
elif output_format == "parquet":
|
|
152
|
+
formatted_df.write_parquet(output_path)
|
|
126
153
|
|
|
127
|
-
|
|
128
|
-
else:
|
|
129
|
-
# Write to stdout (only for TSV format)
|
|
130
|
-
if output_format != "tsv":
|
|
131
|
-
click.echo(
|
|
132
|
-
"Error: stdout output only supported for TSV format. Use -o to specify an output prefix for parquet.",
|
|
133
|
-
err=True,
|
|
134
|
-
)
|
|
135
|
-
raise click.Abort()
|
|
136
|
-
click.echo(formatted_df.write_csv(separator="\t"), nl=False)
|
|
154
|
+
click.echo(f"Formatted data written to {output_path}", err=True)
|
|
137
155
|
|
|
138
156
|
except Exception as e:
|
|
139
157
|
click.echo(f"Error: {e}", err=True)
|
|
@@ -555,11 +573,15 @@ def apply_impact_filters(
|
|
|
555
573
|
)
|
|
556
574
|
|
|
557
575
|
# Write to file
|
|
558
|
-
output_filename = f"{output_prefix}
|
|
576
|
+
output_filename = f"{output_prefix}.{name}.{output_format}"
|
|
559
577
|
output_path = Path(output_filename)
|
|
560
578
|
|
|
561
579
|
if output_format == "tsv":
|
|
562
580
|
filtered_df.write_csv(output_path, separator="\t")
|
|
581
|
+
elif output_format == "tsv.gz":
|
|
582
|
+
csv_content = filtered_df.write_csv(separator="\t")
|
|
583
|
+
with gzip.open(output_path, "wt") as f:
|
|
584
|
+
f.write(csv_content)
|
|
563
585
|
elif output_format == "parquet":
|
|
564
586
|
filtered_df.write_parquet(output_path)
|
|
565
587
|
|
|
@@ -599,6 +621,10 @@ def apply_filters_and_write(
|
|
|
599
621
|
|
|
600
622
|
if output_format == "tsv":
|
|
601
623
|
filtered_df.write_csv(output_path, separator="\t")
|
|
624
|
+
elif output_format == "tsv.gz":
|
|
625
|
+
csv_content = filtered_df.write_csv(separator="\t")
|
|
626
|
+
with gzip.open(output_path, "wt") as f:
|
|
627
|
+
f.write(csv_content)
|
|
602
628
|
elif output_format == "parquet":
|
|
603
629
|
filtered_df.write_parquet(output_path)
|
|
604
630
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pywombat
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: A CLI tool for processing and filtering bcftools tabulated TSV files with pedigree support
|
|
5
5
|
Project-URL: Homepage, https://github.com/bourgeron-lab/pywombat
|
|
6
6
|
Project-URL: Repository, https://github.com/bourgeron-lab/pywombat
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
pywombat/__init__.py,sha256=iIPN9vJtsIUhl_DiKNnknxCamLinfayodLLFK8y-aJg,54
|
|
2
|
+
pywombat/cli.py,sha256=PZKV6FoqZyGgG7_mMIO2FzyeONdBaCqnhDATYsQJqMo,33899
|
|
3
|
+
pywombat-0.2.0.dist-info/METADATA,sha256=7Qg2XnaTM92pmIewu5fw_vrcQW5JCVkkj2q6mNC9v88,4982
|
|
4
|
+
pywombat-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
5
|
+
pywombat-0.2.0.dist-info/entry_points.txt,sha256=Vt7U2ypbiEgCBlEV71ZPk287H5_HKmPBT4iBu6duEcE,44
|
|
6
|
+
pywombat-0.2.0.dist-info/RECORD,,
|
pywombat-0.1.0.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
pywombat/__init__.py,sha256=iIPN9vJtsIUhl_DiKNnknxCamLinfayodLLFK8y-aJg,54
|
|
2
|
-
pywombat/cli.py,sha256=kUokXfnaSCKLXiCu7jXbYOPlGTtL5wSzocM9gFtPy30,32801
|
|
3
|
-
pywombat-0.1.0.dist-info/METADATA,sha256=3RlA_lLC7hKUxIrhQvvbBKEolYGOl_EVJgyDfDLI0sU,4982
|
|
4
|
-
pywombat-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
5
|
-
pywombat-0.1.0.dist-info/entry_points.txt,sha256=Vt7U2ypbiEgCBlEV71ZPk287H5_HKmPBT4iBu6duEcE,44
|
|
6
|
-
pywombat-0.1.0.dist-info/RECORD,,
|
|
File without changes
|