pywombat 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pywombat
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: A CLI tool for processing and filtering bcftools tabulated TSV files with pedigree support
5
5
  Project-URL: Homepage, https://github.com/bourgeron-lab/pywombat
6
6
  Project-URL: Repository, https://github.com/bourgeron-lab/pywombat
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pywombat"
3
- version = "0.4.0"
3
+ version = "0.5.0"
4
4
  description = "A CLI tool for processing and filtering bcftools tabulated TSV files with pedigree support"
5
5
  readme = "README.md"
6
6
  authors = [{ name = "Freddy Cliquet", email = "fcliquet@pasteur.fr" }]
@@ -127,7 +127,20 @@ def cli(
127
127
  click.echo("Processing with streaming mode...", err=True)
128
128
 
129
129
  # Build lazy query
130
- lazy_df = pl.scan_csv(input_file, separator="\t")
130
+ # Force certain columns to string type
131
+ string_columns = [
132
+ "FID",
133
+ "sample_id",
134
+ "father_id",
135
+ "mother_id",
136
+ "FatherBarcode",
137
+ "MotherBarcode",
138
+ "sample",
139
+ ]
140
+ schema_overrides = {col: pl.Utf8 for col in string_columns}
141
+ lazy_df = pl.scan_csv(
142
+ input_file, separator="\t", schema_overrides=schema_overrides
143
+ )
131
144
 
132
145
  # Apply formatting transformations
133
146
  lazy_df = format_bcftools_tsv_lazy(lazy_df, pedigree_df)
@@ -185,7 +198,18 @@ def debug_variant(
185
198
  click.echo(f"Debug mode: searching for {chrom}:{pos}", err=True)
186
199
 
187
200
  # Read and format the data
188
- df = pl.read_csv(input_file, separator="\t")
201
+ # Force certain columns to string type
202
+ string_columns = [
203
+ "FID",
204
+ "sample_id",
205
+ "father_id",
206
+ "mother_id",
207
+ "FatherBarcode",
208
+ "MotherBarcode",
209
+ "sample",
210
+ ]
211
+ schema_overrides = {col: pl.Utf8 for col in string_columns}
212
+ df = pl.read_csv(input_file, separator="\t", schema_overrides=schema_overrides)
189
213
  formatted_df = format_bcftools_tsv(df, pedigree_df)
190
214
 
191
215
  # Filter to matching rows
@@ -771,7 +795,18 @@ def read_pedigree(pedigree_path: Path) -> pl.DataFrame:
771
795
  DataFrame with columns: sample_id, father_id, mother_id
772
796
  """
773
797
  # Try reading with header first
774
- df = pl.read_csv(pedigree_path, separator="\t")
798
+ # Force certain columns to string type
799
+ string_columns = [
800
+ "FID",
801
+ "sample_id",
802
+ "father_id",
803
+ "mother_id",
804
+ "FatherBarcode",
805
+ "MotherBarcode",
806
+ "sample",
807
+ ]
808
+ schema_overrides = {col: pl.Utf8 for col in string_columns}
809
+ df = pl.read_csv(pedigree_path, separator="\t", schema_overrides=schema_overrides)
775
810
 
776
811
  # Check if first row has 'FID' in first column (indicates header)
777
812
  if df.columns[0] == "FID" or "sample_id" in df.columns:
File without changes
File without changes
File without changes
File without changes
File without changes