pywombat 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pywombat/cli.py +38 -3
- {pywombat-0.4.0.dist-info → pywombat-0.5.0.dist-info}/METADATA +1 -1
- pywombat-0.5.0.dist-info/RECORD +6 -0
- pywombat-0.4.0.dist-info/RECORD +0 -6
- {pywombat-0.4.0.dist-info → pywombat-0.5.0.dist-info}/WHEEL +0 -0
- {pywombat-0.4.0.dist-info → pywombat-0.5.0.dist-info}/entry_points.txt +0 -0
pywombat/cli.py
CHANGED
|
@@ -127,7 +127,20 @@ def cli(
|
|
|
127
127
|
click.echo("Processing with streaming mode...", err=True)
|
|
128
128
|
|
|
129
129
|
# Build lazy query
|
|
130
|
-
|
|
130
|
+
# Force certain columns to string type
|
|
131
|
+
string_columns = [
|
|
132
|
+
"FID",
|
|
133
|
+
"sample_id",
|
|
134
|
+
"father_id",
|
|
135
|
+
"mother_id",
|
|
136
|
+
"FatherBarcode",
|
|
137
|
+
"MotherBarcode",
|
|
138
|
+
"sample",
|
|
139
|
+
]
|
|
140
|
+
schema_overrides = {col: pl.Utf8 for col in string_columns}
|
|
141
|
+
lazy_df = pl.scan_csv(
|
|
142
|
+
input_file, separator="\t", schema_overrides=schema_overrides
|
|
143
|
+
)
|
|
131
144
|
|
|
132
145
|
# Apply formatting transformations
|
|
133
146
|
lazy_df = format_bcftools_tsv_lazy(lazy_df, pedigree_df)
|
|
@@ -185,7 +198,18 @@ def debug_variant(
|
|
|
185
198
|
click.echo(f"Debug mode: searching for {chrom}:{pos}", err=True)
|
|
186
199
|
|
|
187
200
|
# Read and format the data
|
|
188
|
-
|
|
201
|
+
# Force certain columns to string type
|
|
202
|
+
string_columns = [
|
|
203
|
+
"FID",
|
|
204
|
+
"sample_id",
|
|
205
|
+
"father_id",
|
|
206
|
+
"mother_id",
|
|
207
|
+
"FatherBarcode",
|
|
208
|
+
"MotherBarcode",
|
|
209
|
+
"sample",
|
|
210
|
+
]
|
|
211
|
+
schema_overrides = {col: pl.Utf8 for col in string_columns}
|
|
212
|
+
df = pl.read_csv(input_file, separator="\t", schema_overrides=schema_overrides)
|
|
189
213
|
formatted_df = format_bcftools_tsv(df, pedigree_df)
|
|
190
214
|
|
|
191
215
|
# Filter to matching rows
|
|
@@ -771,7 +795,18 @@ def read_pedigree(pedigree_path: Path) -> pl.DataFrame:
|
|
|
771
795
|
DataFrame with columns: sample_id, father_id, mother_id
|
|
772
796
|
"""
|
|
773
797
|
# Try reading with header first
|
|
774
|
-
|
|
798
|
+
# Force certain columns to string type
|
|
799
|
+
string_columns = [
|
|
800
|
+
"FID",
|
|
801
|
+
"sample_id",
|
|
802
|
+
"father_id",
|
|
803
|
+
"mother_id",
|
|
804
|
+
"FatherBarcode",
|
|
805
|
+
"MotherBarcode",
|
|
806
|
+
"sample",
|
|
807
|
+
]
|
|
808
|
+
schema_overrides = {col: pl.Utf8 for col in string_columns}
|
|
809
|
+
df = pl.read_csv(pedigree_path, separator="\t", schema_overrides=schema_overrides)
|
|
775
810
|
|
|
776
811
|
# Check if first row has 'FID' in first column (indicates header)
|
|
777
812
|
if df.columns[0] == "FID" or "sample_id" in df.columns:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pywombat
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: A CLI tool for processing and filtering bcftools tabulated TSV files with pedigree support
|
|
5
5
|
Project-URL: Homepage, https://github.com/bourgeron-lab/pywombat
|
|
6
6
|
Project-URL: Repository, https://github.com/bourgeron-lab/pywombat
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
pywombat/__init__.py,sha256=iIPN9vJtsIUhl_DiKNnknxCamLinfayodLLFK8y-aJg,54
|
|
2
|
+
pywombat/cli.py,sha256=0nBlwyRu1Q01a0EHcVyIYtKmgezCWA85pQtEXpnuzL4,44535
|
|
3
|
+
pywombat-0.5.0.dist-info/METADATA,sha256=2Py8xwNxZBD18u4r-tJI_mQezMBg4td3ruWOm61MbdA,4982
|
|
4
|
+
pywombat-0.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
5
|
+
pywombat-0.5.0.dist-info/entry_points.txt,sha256=Vt7U2ypbiEgCBlEV71ZPk287H5_HKmPBT4iBu6duEcE,44
|
|
6
|
+
pywombat-0.5.0.dist-info/RECORD,,
|
pywombat-0.4.0.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
pywombat/__init__.py,sha256=iIPN9vJtsIUhl_DiKNnknxCamLinfayodLLFK8y-aJg,54
|
|
2
|
-
pywombat/cli.py,sha256=dg38E39VpdJhKQt3aGSHwSiLWn1W8JnUkcsy3ZUHD5w,43518
|
|
3
|
-
pywombat-0.4.0.dist-info/METADATA,sha256=ZKPTIp9ud2AIVbcujg4ciq900DX-UkGs5oafa41jxTQ,4982
|
|
4
|
-
pywombat-0.4.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
5
|
-
pywombat-0.4.0.dist-info/entry_points.txt,sha256=Vt7U2ypbiEgCBlEV71ZPk287H5_HKmPBT4iBu6duEcE,44
|
|
6
|
-
pywombat-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|