pywombat 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pywombat
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: A CLI tool for processing and filtering bcftools tabulated TSV files with pedigree support
5
5
  Project-URL: Homepage, https://github.com/bourgeron-lab/pywombat
6
6
  Project-URL: Repository, https://github.com/bourgeron-lab/pywombat
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pywombat"
3
- version = "0.1.0"
3
+ version = "0.2.0"
4
4
  description = "A CLI tool for processing and filtering bcftools tabulated TSV files with pedigree support"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "Freddy Cliquet", email = "fcliquet@pasteur.fr" }]
@@ -1,5 +1,6 @@
1
1
  """CLI for wombat tool."""
2
2
 
3
+ import gzip
3
4
  import re
4
5
  import warnings
5
6
  from pathlib import Path
@@ -22,9 +23,9 @@ import yaml
22
23
  "-f",
23
24
  "--format",
24
25
  "output_format",
25
- type=click.Choice(["tsv", "parquet"], case_sensitive=False),
26
+ type=click.Choice(["tsv", "tsv.gz", "parquet"], case_sensitive=False),
26
27
  default="tsv",
27
- help="Output format: tsv (default) or parquet.",
28
+ help="Output format: tsv (default), tsv.gz (compressed), or parquet.",
28
29
  )
29
30
  @click.option("-v", "--verbose", is_flag=True, help="Enable verbose output.")
30
31
  @click.option(
@@ -73,7 +74,13 @@ def cli(
73
74
  if verbose:
74
75
  click.echo(f"Reading input file: {input_file}", err=True)
75
76
 
76
- # Read the TSV file
77
+ # Detect if file is gzipped based on extension
78
+ is_gzipped = str(input_file).endswith(".gz")
79
+
80
+ if verbose and is_gzipped:
81
+ click.echo("Detected gzipped file", err=True)
82
+
83
+ # Read the TSV file (handles both plain and gzipped)
77
84
  df = pl.read_csv(input_file, separator="\t")
78
85
 
79
86
  if verbose:
@@ -104,6 +111,23 @@ def cli(
104
111
  click.echo(f"Reading filter config: {filter_config}", err=True)
105
112
  filter_config_data = load_filter_config(filter_config)
106
113
 
114
+ # Determine output prefix
115
+ if output is None:
116
+ # Generate default output prefix from input filename
117
+ input_stem = input_file.name
118
+ # Remove .tsv.gz or .tsv extension
119
+ if input_stem.endswith(".tsv.gz"):
120
+ input_stem = input_stem[:-7] # Remove .tsv.gz
121
+ elif input_stem.endswith(".tsv"):
122
+ input_stem = input_stem[:-4] # Remove .tsv
123
+
124
+ # Add config name if filter is provided
125
+ if filter_config:
126
+ config_name = filter_config.stem # Get basename without extension
127
+ output = f"{input_stem}.{config_name}"
128
+ else:
129
+ output = input_stem
130
+
107
131
  # Apply filters and write output
108
132
  if filter_config_data:
109
133
  apply_filters_and_write(
@@ -115,25 +139,19 @@ def cli(
115
139
  )
116
140
  else:
117
141
  # No filters - write single output file
118
- if output:
119
- # Construct output filename with prefix and format
120
- output_path = Path(f"{output}.{output_format}")
142
+ # Construct output filename with prefix and format
143
+ output_path = Path(f"{output}.{output_format}")
121
144
 
122
- if output_format == "tsv":
123
- formatted_df.write_csv(output_path, separator="\t")
124
- elif output_format == "parquet":
125
- formatted_df.write_parquet(output_path)
145
+ if output_format == "tsv":
146
+ formatted_df.write_csv(output_path, separator="\t")
147
+ elif output_format == "tsv.gz":
148
+ csv_content = formatted_df.write_csv(separator="\t")
149
+ with gzip.open(output_path, "wt") as f:
150
+ f.write(csv_content)
151
+ elif output_format == "parquet":
152
+ formatted_df.write_parquet(output_path)
126
153
 
127
- click.echo(f"Formatted data written to {output_path}", err=True)
128
- else:
129
- # Write to stdout (only for TSV format)
130
- if output_format != "tsv":
131
- click.echo(
132
- "Error: stdout output only supported for TSV format. Use -o to specify an output prefix for parquet.",
133
- err=True,
134
- )
135
- raise click.Abort()
136
- click.echo(formatted_df.write_csv(separator="\t"), nl=False)
154
+ click.echo(f"Formatted data written to {output_path}", err=True)
137
155
 
138
156
  except Exception as e:
139
157
  click.echo(f"Error: {e}", err=True)
@@ -555,11 +573,15 @@ def apply_impact_filters(
555
573
  )
556
574
 
557
575
  # Write to file
558
- output_filename = f"{output_prefix}_{name}.{output_format}"
576
+ output_filename = f"{output_prefix}.{name}.{output_format}"
559
577
  output_path = Path(output_filename)
560
578
 
561
579
  if output_format == "tsv":
562
580
  filtered_df.write_csv(output_path, separator="\t")
581
+ elif output_format == "tsv.gz":
582
+ csv_content = filtered_df.write_csv(separator="\t")
583
+ with gzip.open(output_path, "wt") as f:
584
+ f.write(csv_content)
563
585
  elif output_format == "parquet":
564
586
  filtered_df.write_parquet(output_path)
565
587
 
@@ -599,6 +621,10 @@ def apply_filters_and_write(
599
621
 
600
622
  if output_format == "tsv":
601
623
  filtered_df.write_csv(output_path, separator="\t")
624
+ elif output_format == "tsv.gz":
625
+ csv_content = filtered_df.write_csv(separator="\t")
626
+ with gzip.open(output_path, "wt") as f:
627
+ f.write(csv_content)
602
628
  elif output_format == "parquet":
603
629
  filtered_df.write_parquet(output_path)
604
630
 
File without changes
File without changes
File without changes
File without changes
File without changes