pywombat 1.1.0__tar.gz → 1.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pywombat-1.1.0 → pywombat-1.2.1}/CHANGELOG.md +55 -0
- {pywombat-1.1.0 → pywombat-1.2.1}/PKG-INFO +20 -1
- {pywombat-1.1.0 → pywombat-1.2.1}/README.md +18 -0
- {pywombat-1.1.0 → pywombat-1.2.1}/pyproject.toml +8 -2
- {pywombat-1.1.0 → pywombat-1.2.1}/src/pywombat/cli.py +264 -29
- {pywombat-1.1.0 → pywombat-1.2.1}/uv.lock +46 -1
- {pywombat-1.1.0 → pywombat-1.2.1}/.github/copilot-instructions.md +0 -0
- {pywombat-1.1.0 → pywombat-1.2.1}/.github/workflows/publish.yml +0 -0
- {pywombat-1.1.0 → pywombat-1.2.1}/.gitignore +0 -0
- {pywombat-1.1.0 → pywombat-1.2.1}/.python-version +0 -0
- {pywombat-1.1.0 → pywombat-1.2.1}/QUICKSTART.md +0 -0
- {pywombat-1.1.0 → pywombat-1.2.1}/examples/README.md +0 -0
- {pywombat-1.1.0 → pywombat-1.2.1}/examples/de_novo_mutations.yml +0 -0
- {pywombat-1.1.0 → pywombat-1.2.1}/examples/rare_variants_high_impact.yml +0 -0
- {pywombat-1.1.0 → pywombat-1.2.1}/src/pywombat/__init__.py +0 -0
|
@@ -5,6 +5,61 @@ All notable changes to PyWombat will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.2.1] - 2026-02-05
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
- **Missing Dependency**: Added `pyarrow>=14.0.0` as an explicit dependency
|
|
13
|
+
- Required for Parquet file operations (`scan_parquet`, `write_parquet`)
|
|
14
|
+
- Previously was an implicit dependency through Polars
|
|
15
|
+
- Ensures proper installation on all systems
|
|
16
|
+
|
|
17
|
+
## [1.2.0] - 2026-02-05
|
|
18
|
+
|
|
19
|
+
### Added
|
|
20
|
+
|
|
21
|
+
- **Per-Chromosome DNM Processing**: Dramatically reduced memory usage for de novo mutation (DNM) filtering
|
|
22
|
+
- Processes one chromosome at a time instead of loading all variants into memory
|
|
23
|
+
- Reduces peak memory from (total_variants × samples) to (max_chr_variants × samples)
|
|
24
|
+
- Example: 38 samples, 4.2M variants
|
|
25
|
+
- Before: 200GB+ (OOM failure)
|
|
26
|
+
- After: ~24GB (completes successfully in 20 seconds)
|
|
27
|
+
- **88% memory reduction** for DNM workflows
|
|
28
|
+
|
|
29
|
+
- **Early Frequency Filtering for DNM**: Applies population frequency filters BEFORE melting
|
|
30
|
+
- Frequency filters (fafmax_faf95_max_genomes) applied on wide-format data
|
|
31
|
+
- Quality filters (genomes_filters PASS) applied before melting
|
|
32
|
+
- Reduces data expansion by filtering variants early in the pipeline
|
|
33
|
+
|
|
34
|
+
- **New Helper Functions**:
|
|
35
|
+
- `get_unique_chromosomes()`: Discovers and naturally sorts chromosomes from Parquet files
|
|
36
|
+
- `apply_dnm_prefilters()`: Applies variant-level filters before melting
|
|
37
|
+
- `process_dnm_by_chromosome()`: Orchestrates per-chromosome DNM filtering
|
|
38
|
+
|
|
39
|
+
### Changed
|
|
40
|
+
|
|
41
|
+
- **DNM Filter Architecture**: Refactored `apply_de_novo_filter()` to support `skip_prefilters` parameter
|
|
42
|
+
- Allows separation of variant-level filters (applied before melting) from sample-level filters
|
|
43
|
+
- Prevents double-filtering when prefilters already applied
|
|
44
|
+
|
|
45
|
+
- **Filter Command Routing**: Automatically detects DNM mode and routes to per-chromosome processing
|
|
46
|
+
- Transparent to users - no command syntax changes required
|
|
47
|
+
- Optimized memory usage is automatic when using DNM config with Parquet input
|
|
48
|
+
|
|
49
|
+
### Performance
|
|
50
|
+
|
|
51
|
+
- **DNM Memory Usage**: 88% reduction in peak memory (200GB+ → ~24GB)
|
|
52
|
+
- **DNM Processing Time**: 20 seconds for 38-sample, 4.2M variant dataset (previously failed with OOM)
|
|
53
|
+
- **Throughput**: Successfully processes 6,788 DNM variants from 4.2M input variants
|
|
54
|
+
|
|
55
|
+
### Testing
|
|
56
|
+
|
|
57
|
+
- Added 3 new test cases for DNM optimization:
|
|
58
|
+
- `test_get_unique_chromosomes()`: Verifies chromosome discovery and natural sorting
|
|
59
|
+
- `test_apply_dnm_prefilters()`: Validates frequency prefiltering logic
|
|
60
|
+
- `test_dnm_skip_prefilters()`: Ensures skip_prefilters parameter works correctly
|
|
61
|
+
- Total test suite: 25 tests (all passing)
|
|
62
|
+
|
|
8
63
|
## [1.1.0] - 2026-02-05
|
|
9
64
|
|
|
10
65
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pywombat
|
|
3
|
-
Version: 1.1
|
|
3
|
+
Version: 1.2.1
|
|
4
4
|
Summary: A CLI tool for processing and filtering bcftools tabulated TSV files with pedigree support
|
|
5
5
|
Project-URL: Homepage, https://github.com/bourgeron-lab/pywombat
|
|
6
6
|
Project-URL: Repository, https://github.com/bourgeron-lab/pywombat
|
|
@@ -16,6 +16,7 @@ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
|
16
16
|
Requires-Python: >=3.12
|
|
17
17
|
Requires-Dist: click>=8.1.0
|
|
18
18
|
Requires-Dist: polars>=0.19.0
|
|
19
|
+
Requires-Dist: pyarrow>=14.0.0
|
|
19
20
|
Requires-Dist: pyyaml>=6.0
|
|
20
21
|
Requires-Dist: tqdm>=4.67.1
|
|
21
22
|
Provides-Extra: dev
|
|
@@ -598,8 +599,15 @@ Each configuration file is fully documented with:
|
|
|
598
599
|
2. **Parquet format benefits**:
|
|
599
600
|
- Columnar storage enables selective column loading
|
|
600
601
|
- Pre-filtering before melting (expression filters applied before expanding to per-sample rows)
|
|
602
|
+
- **Per-chromosome processing for DNM**: Automatically processes DNM filtering chromosome-by-chromosome
|
|
601
603
|
- 30% smaller file size vs gzipped TSV
|
|
602
604
|
|
|
605
|
+
3. **De Novo Mutation (DNM) filtering optimization**:
|
|
606
|
+
- Automatically uses per-chromosome processing when DNM mode is enabled
|
|
607
|
+
- Processes one chromosome at a time to reduce peak memory
|
|
608
|
+
- Applies frequency filters before melting to reduce data expansion
|
|
609
|
+
- Example: 38-sample family with 4.2M variants completes in 20 seconds with ~24GB RAM (vs 200GB+ OOM failure)
|
|
610
|
+
|
|
603
611
|
### For All Files
|
|
604
612
|
|
|
605
613
|
3. **Pre-filter with bcftools**: Filter by region/gene before PyWombat
|
|
@@ -608,12 +616,23 @@ Each configuration file is fully documented with:
|
|
|
608
616
|
|
|
609
617
|
### Memory Comparison
|
|
610
618
|
|
|
619
|
+
**Expression Filtering** (e.g., VEP_IMPACT filters):
|
|
620
|
+
|
|
611
621
|
| Approach | 38 samples, 4.2M variants | Memory | Time |
|
|
612
622
|
|----------|---------------------------|--------|------|
|
|
613
623
|
| Direct TSV | ❌ OOM (>200GB) | 200+ GB | Failed |
|
|
614
624
|
| TSV with chunking | ⚠️ Slow | ~30GB | ~3 min |
|
|
615
625
|
| **Parquet + pre-filter** | ✅ **Optimal** | **~1.2GB** | **<1 sec** |
|
|
616
626
|
|
|
627
|
+
**De Novo Mutation (DNM) Filtering**:
|
|
628
|
+
|
|
629
|
+
| Approach | 38 samples, 4.2M variants | Memory | Time | Result |
|
|
630
|
+
|----------|---------------------------|--------|------|--------|
|
|
631
|
+
| Without optimization | ❌ OOM (>200GB) | 200+ GB | Failed | N/A |
|
|
632
|
+
| **Parquet + per-chromosome** | ✅ **Success** | **~24GB** | **20 sec** | **6,788 DNM variants** |
|
|
633
|
+
|
|
634
|
+
*DNM filtering requires sample-level data (cannot pre-filter before melting), but per-chromosome processing reduces peak memory by 88%.*
|
|
635
|
+
|
|
617
636
|
---
|
|
618
637
|
|
|
619
638
|
## Development
|
|
@@ -573,8 +573,15 @@ Each configuration file is fully documented with:
|
|
|
573
573
|
2. **Parquet format benefits**:
|
|
574
574
|
- Columnar storage enables selective column loading
|
|
575
575
|
- Pre-filtering before melting (expression filters applied before expanding to per-sample rows)
|
|
576
|
+
- **Per-chromosome processing for DNM**: Automatically processes DNM filtering chromosome-by-chromosome
|
|
576
577
|
- 30% smaller file size vs gzipped TSV
|
|
577
578
|
|
|
579
|
+
3. **De Novo Mutation (DNM) filtering optimization**:
|
|
580
|
+
- Automatically uses per-chromosome processing when DNM mode is enabled
|
|
581
|
+
- Processes one chromosome at a time to reduce peak memory
|
|
582
|
+
- Applies frequency filters before melting to reduce data expansion
|
|
583
|
+
- Example: 38-sample family with 4.2M variants completes in 20 seconds with ~24GB RAM (vs 200GB+ OOM failure)
|
|
584
|
+
|
|
578
585
|
### For All Files
|
|
579
586
|
|
|
580
587
|
3. **Pre-filter with bcftools**: Filter by region/gene before PyWombat
|
|
@@ -583,12 +590,23 @@ Each configuration file is fully documented with:
|
|
|
583
590
|
|
|
584
591
|
### Memory Comparison
|
|
585
592
|
|
|
593
|
+
**Expression Filtering** (e.g., VEP_IMPACT filters):
|
|
594
|
+
|
|
586
595
|
| Approach | 38 samples, 4.2M variants | Memory | Time |
|
|
587
596
|
|----------|---------------------------|--------|------|
|
|
588
597
|
| Direct TSV | ❌ OOM (>200GB) | 200+ GB | Failed |
|
|
589
598
|
| TSV with chunking | ⚠️ Slow | ~30GB | ~3 min |
|
|
590
599
|
| **Parquet + pre-filter** | ✅ **Optimal** | **~1.2GB** | **<1 sec** |
|
|
591
600
|
|
|
601
|
+
**De Novo Mutation (DNM) Filtering**:
|
|
602
|
+
|
|
603
|
+
| Approach | 38 samples, 4.2M variants | Memory | Time | Result |
|
|
604
|
+
|----------|---------------------------|--------|------|--------|
|
|
605
|
+
| Without optimization | ❌ OOM (>200GB) | 200+ GB | Failed | N/A |
|
|
606
|
+
| **Parquet + per-chromosome** | ✅ **Success** | **~24GB** | **20 sec** | **6,788 DNM variants** |
|
|
607
|
+
|
|
608
|
+
*DNM filtering requires sample-level data (cannot pre-filter before melting), but per-chromosome processing reduces peak memory by 88%.*
|
|
609
|
+
|
|
592
610
|
---
|
|
593
611
|
|
|
594
612
|
## Development
|
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "pywombat"
|
|
3
|
-
version = "1.1
|
|
3
|
+
version = "1.2.1"
|
|
4
4
|
description = "A CLI tool for processing and filtering bcftools tabulated TSV files with pedigree support"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [{ name = "Freddy Cliquet", email = "fcliquet@pasteur.fr" }]
|
|
7
7
|
requires-python = ">=3.12"
|
|
8
|
-
dependencies = [
|
|
8
|
+
dependencies = [
|
|
9
|
+
"polars>=0.19.0",
|
|
10
|
+
"pyarrow>=14.0.0",
|
|
11
|
+
"click>=8.1.0",
|
|
12
|
+
"pyyaml>=6.0",
|
|
13
|
+
"tqdm>=4.67.1",
|
|
14
|
+
]
|
|
9
15
|
license = { text = "MIT" }
|
|
10
16
|
keywords = ["vcf", "bioinformatics", "genomics", "pedigree", "variant-calling"]
|
|
11
17
|
classifiers = [
|
|
@@ -264,6 +264,111 @@ def _process_chunk(
|
|
|
264
264
|
return df
|
|
265
265
|
|
|
266
266
|
|
|
267
|
+
def process_dnm_by_chromosome(
|
|
268
|
+
input_file: Path,
|
|
269
|
+
pedigree_df: pl.DataFrame,
|
|
270
|
+
filter_config: dict,
|
|
271
|
+
output_format: str,
|
|
272
|
+
verbose: bool
|
|
273
|
+
) -> pl.DataFrame:
|
|
274
|
+
"""Process DNM filtering chromosome by chromosome to reduce memory usage.
|
|
275
|
+
|
|
276
|
+
Processes each chromosome separately:
|
|
277
|
+
1. Load one chromosome at a time from Parquet
|
|
278
|
+
2. Apply frequency/quality prefilters (before melting)
|
|
279
|
+
3. Melt samples
|
|
280
|
+
4. Apply DNM filters
|
|
281
|
+
5. Combine results from all chromosomes
|
|
282
|
+
|
|
283
|
+
This reduces peak memory from (total_variants × samples) to
|
|
284
|
+
(max_chr_variants × samples).
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
input_file: Path to Parquet file
|
|
288
|
+
pedigree_df: Pedigree DataFrame with sample relationships
|
|
289
|
+
filter_config: Filter configuration dict
|
|
290
|
+
output_format: Output format (tsv, tsv.gz, parquet)
|
|
291
|
+
verbose: Whether to print progress messages
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
Combined DataFrame with DNM-filtered variants from all chromosomes
|
|
295
|
+
"""
|
|
296
|
+
# Get list of chromosomes
|
|
297
|
+
chromosomes = get_unique_chromosomes(input_file)
|
|
298
|
+
|
|
299
|
+
if verbose:
|
|
300
|
+
click.echo(
|
|
301
|
+
f"DNM per-chromosome processing: {len(chromosomes)} chromosomes", err=True
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
results = []
|
|
305
|
+
dnm_cfg = {}
|
|
306
|
+
dnm_cfg.update(filter_config.get("quality", {}))
|
|
307
|
+
dnm_cfg.update(filter_config.get("dnm", {}))
|
|
308
|
+
|
|
309
|
+
for chrom in chromosomes:
|
|
310
|
+
if verbose:
|
|
311
|
+
click.echo(f"Processing chromosome {chrom}...", err=True)
|
|
312
|
+
|
|
313
|
+
# Load only this chromosome
|
|
314
|
+
lazy_df = pl.scan_parquet(input_file).filter(
|
|
315
|
+
pl.col("#CHROM") == chrom
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Apply frequency filters BEFORE melting (Optimization 2)
|
|
319
|
+
lazy_df = apply_dnm_prefilters(lazy_df, filter_config, verbose=False)
|
|
320
|
+
|
|
321
|
+
# Count variants after prefiltering
|
|
322
|
+
if verbose:
|
|
323
|
+
pre_count = lazy_df.select(pl.count()).collect().item()
|
|
324
|
+
click.echo(f" Chromosome {chrom}: {pre_count} variants after prefilter", err=True)
|
|
325
|
+
|
|
326
|
+
# Collect, melt, and apply DNM filters
|
|
327
|
+
df = lazy_df.collect()
|
|
328
|
+
|
|
329
|
+
if df.shape[0] == 0:
|
|
330
|
+
if verbose:
|
|
331
|
+
click.echo(f" Chromosome {chrom}: No variants after prefilter, skipping", err=True)
|
|
332
|
+
continue
|
|
333
|
+
|
|
334
|
+
formatted_df = format_bcftools_tsv_minimal(df, pedigree_df)
|
|
335
|
+
|
|
336
|
+
if verbose:
|
|
337
|
+
click.echo(
|
|
338
|
+
f" Chromosome {chrom}: {formatted_df.shape[0]} rows after melting", err=True
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# Apply DNM filters (skip prefilters since already applied)
|
|
342
|
+
filtered_df = apply_de_novo_filter(
|
|
343
|
+
formatted_df, dnm_cfg, verbose=False, pedigree_df=pedigree_df,
|
|
344
|
+
skip_prefilters=True
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
if verbose:
|
|
348
|
+
click.echo(
|
|
349
|
+
f" Chromosome {chrom}: {filtered_df.shape[0]} variants passed DNM filter", err=True
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
if filtered_df.shape[0] > 0:
|
|
353
|
+
results.append(filtered_df)
|
|
354
|
+
|
|
355
|
+
# Combine results
|
|
356
|
+
if not results:
|
|
357
|
+
if verbose:
|
|
358
|
+
click.echo("No variants passed DNM filters across all chromosomes", err=True)
|
|
359
|
+
# Return empty DataFrame with correct schema
|
|
360
|
+
return pl.DataFrame()
|
|
361
|
+
|
|
362
|
+
final_df = pl.concat(results)
|
|
363
|
+
|
|
364
|
+
if verbose:
|
|
365
|
+
click.echo(
|
|
366
|
+
f"DNM filtering complete: {final_df.shape[0]} total variants", err=True
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
return final_df
|
|
370
|
+
|
|
371
|
+
|
|
267
372
|
@cli.command("filter")
|
|
268
373
|
@click.argument("input_file", type=click.Path(exists=True, path_type=Path))
|
|
269
374
|
@click.option(
|
|
@@ -391,6 +496,42 @@ def filter_cmd(
|
|
|
391
496
|
# Parquet input: INFO fields already expanded by 'wombat prepare'
|
|
392
497
|
lazy_df = pl.scan_parquet(input_file)
|
|
393
498
|
|
|
499
|
+
# Check if DNM mode is enabled - use per-chromosome processing
|
|
500
|
+
if filter_config_data and filter_config_data.get("dnm", {}).get("enabled", False):
|
|
501
|
+
if verbose:
|
|
502
|
+
click.echo("DNM mode: Using per-chromosome processing for memory efficiency", err=True)
|
|
503
|
+
|
|
504
|
+
# DNM requires pedigree
|
|
505
|
+
if pedigree_df is None:
|
|
506
|
+
click.echo("Error: DNM filtering requires a pedigree file (--pedigree option)", err=True)
|
|
507
|
+
raise click.Abort()
|
|
508
|
+
|
|
509
|
+
# Process DNM filtering chromosome by chromosome
|
|
510
|
+
formatted_df = process_dnm_by_chromosome(
|
|
511
|
+
input_file,
|
|
512
|
+
pedigree_df,
|
|
513
|
+
filter_config_data,
|
|
514
|
+
output_format,
|
|
515
|
+
verbose
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
# Write output directly
|
|
519
|
+
output_path = Path(f"{output}.{output_format}")
|
|
520
|
+
|
|
521
|
+
if output_format == "tsv":
|
|
522
|
+
formatted_df.write_csv(output_path, separator="\t")
|
|
523
|
+
elif output_format == "tsv.gz":
|
|
524
|
+
csv_content = formatted_df.write_csv(separator="\t")
|
|
525
|
+
with gzip.open(output_path, "wt") as f:
|
|
526
|
+
f.write(csv_content)
|
|
527
|
+
elif output_format == "parquet":
|
|
528
|
+
formatted_df.write_parquet(output_path)
|
|
529
|
+
|
|
530
|
+
if verbose:
|
|
531
|
+
click.echo(f"DNM variants written to {output_path}", err=True)
|
|
532
|
+
|
|
533
|
+
return
|
|
534
|
+
|
|
394
535
|
# OPTIMIZATION: Apply expression filter BEFORE melting
|
|
395
536
|
# Expression filters (VEP_IMPACT, etc.) don't depend on sample data
|
|
396
537
|
if filter_config_data and "expression" in filter_config_data:
|
|
@@ -800,11 +941,47 @@ def _pos_in_par(chrom: str, pos: int, par_regions: dict) -> bool:
|
|
|
800
941
|
return False
|
|
801
942
|
|
|
802
943
|
|
|
944
|
+
def get_unique_chromosomes(parquet_file: Path) -> list[str]:
|
|
945
|
+
"""Get list of unique chromosomes from Parquet file, sorted naturally.
|
|
946
|
+
|
|
947
|
+
Args:
|
|
948
|
+
parquet_file: Path to Parquet file
|
|
949
|
+
|
|
950
|
+
Returns:
|
|
951
|
+
Sorted list of chromosome names (e.g., ['1', '2', ..., '22', 'X', 'Y', 'MT'])
|
|
952
|
+
"""
|
|
953
|
+
# Read just the #CHROM column to get unique values
|
|
954
|
+
df = pl.scan_parquet(parquet_file).select("#CHROM").unique().collect()
|
|
955
|
+
chroms = df["#CHROM"].to_list()
|
|
956
|
+
|
|
957
|
+
# Sort chromosomes properly (1, 2, ..., 22, X, Y, MT)
|
|
958
|
+
def chrom_sort_key(chrom: str) -> tuple:
|
|
959
|
+
"""Sort key for natural chromosome ordering."""
|
|
960
|
+
chrom_norm = chrom.replace("chr", "").replace("Chr", "").replace("CHR", "").upper()
|
|
961
|
+
|
|
962
|
+
# Try to parse as integer (autosomes)
|
|
963
|
+
try:
|
|
964
|
+
return (0, int(chrom_norm), "")
|
|
965
|
+
except ValueError:
|
|
966
|
+
pass
|
|
967
|
+
|
|
968
|
+
# Sex chromosomes and mitochondrial
|
|
969
|
+
if chrom_norm in ["X", "Y", "MT", "M"]:
|
|
970
|
+
order = {"X": 23, "Y": 24, "MT": 25, "M": 25}
|
|
971
|
+
return (1, order.get(chrom_norm, 99), chrom_norm)
|
|
972
|
+
|
|
973
|
+
# Other chromosomes (e.g., scaffolds)
|
|
974
|
+
return (2, 0, chrom_norm)
|
|
975
|
+
|
|
976
|
+
return sorted(chroms, key=chrom_sort_key)
|
|
977
|
+
|
|
978
|
+
|
|
803
979
|
def apply_de_novo_filter(
|
|
804
980
|
df: pl.DataFrame,
|
|
805
981
|
dnm_config: dict,
|
|
806
982
|
verbose: bool = False,
|
|
807
983
|
pedigree_df: Optional[pl.DataFrame] = None,
|
|
984
|
+
skip_prefilters: bool = False,
|
|
808
985
|
) -> pl.DataFrame:
|
|
809
986
|
"""Apply de novo detection filters to dataframe using vectorized operations.
|
|
810
987
|
|
|
@@ -815,6 +992,13 @@ def apply_de_novo_filter(
|
|
|
815
992
|
|
|
816
993
|
This function will read `sex` from `df` when present; otherwise it will use
|
|
817
994
|
the `pedigree_df` (which should contain `sample_id` and `sex`).
|
|
995
|
+
|
|
996
|
+
Args:
|
|
997
|
+
df: DataFrame with melted samples
|
|
998
|
+
dnm_config: DNM configuration dict
|
|
999
|
+
verbose: Whether to print progress messages
|
|
1000
|
+
pedigree_df: Pedigree DataFrame
|
|
1001
|
+
skip_prefilters: If True, skips frequency/genomes_filters (assumes already applied)
|
|
818
1002
|
"""
|
|
819
1003
|
if not dnm_config:
|
|
820
1004
|
return df
|
|
@@ -979,43 +1163,45 @@ def apply_de_novo_filter(
|
|
|
979
1163
|
err=True,
|
|
980
1164
|
)
|
|
981
1165
|
|
|
982
|
-
# Apply
|
|
983
|
-
if
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
1166
|
+
# Apply frequency/quality prefilters if not already applied
|
|
1167
|
+
if not skip_prefilters:
|
|
1168
|
+
# Apply fafmax_faf95_max_genomes filter if specified
|
|
1169
|
+
if fafmax_max is not None:
|
|
1170
|
+
if "fafmax_faf95_max_genomes" in df.columns:
|
|
1171
|
+
df = df.filter(
|
|
1172
|
+
(
|
|
1173
|
+
pl.col("fafmax_faf95_max_genomes").cast(pl.Float64, strict=False)
|
|
1174
|
+
<= fafmax_max
|
|
1175
|
+
)
|
|
1176
|
+
| pl.col("fafmax_faf95_max_genomes").is_null()
|
|
989
1177
|
)
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
1178
|
+
if verbose:
|
|
1179
|
+
click.echo(
|
|
1180
|
+
f"DNM: {df.shape[0]} variants after fafmax_faf95_max_genomes filter (<={fafmax_max})",
|
|
1181
|
+
err=True,
|
|
1182
|
+
)
|
|
1183
|
+
elif verbose:
|
|
993
1184
|
click.echo(
|
|
994
|
-
|
|
1185
|
+
"DNM: Warning - fafmax_faf95_max_genomes column not found, skipping frequency filter",
|
|
995
1186
|
err=True,
|
|
996
1187
|
)
|
|
997
|
-
elif verbose:
|
|
998
|
-
click.echo(
|
|
999
|
-
"DNM: Warning - fafmax_faf95_max_genomes column not found, skipping frequency filter",
|
|
1000
|
-
err=True,
|
|
1001
|
-
)
|
|
1002
1188
|
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1189
|
+
# Apply genomes_filters filter if specified
|
|
1190
|
+
if genomes_filters_pass_only:
|
|
1191
|
+
if "genomes_filters" in df.columns:
|
|
1192
|
+
df = df.filter(
|
|
1193
|
+
(pl.col("genomes_filters") == ".") | pl.col("genomes_filters").is_null()
|
|
1194
|
+
)
|
|
1195
|
+
if verbose:
|
|
1196
|
+
click.echo(
|
|
1197
|
+
f"DNM: {df.shape[0]} variants after genomes_filters filter (pass only)",
|
|
1198
|
+
err=True,
|
|
1199
|
+
)
|
|
1200
|
+
elif verbose:
|
|
1010
1201
|
click.echo(
|
|
1011
|
-
|
|
1202
|
+
"DNM: Warning - genomes_filters column not found, skipping genomes_filters filter",
|
|
1012
1203
|
err=True,
|
|
1013
1204
|
)
|
|
1014
|
-
elif verbose:
|
|
1015
|
-
click.echo(
|
|
1016
|
-
"DNM: Warning - genomes_filters column not found, skipping genomes_filters filter",
|
|
1017
|
-
err=True,
|
|
1018
|
-
)
|
|
1019
1205
|
|
|
1020
1206
|
# Build parent quality checks (common to all)
|
|
1021
1207
|
father_qual_ok = (pl.col("father_dp").cast(pl.Float64, strict=False) >= p_dp) & (
|
|
@@ -2293,6 +2479,55 @@ def process_with_progress(
|
|
|
2293
2479
|
click.echo("Processing complete.", err=True)
|
|
2294
2480
|
|
|
2295
2481
|
|
|
2482
|
+
def apply_dnm_prefilters(
|
|
2483
|
+
lazy_df: pl.LazyFrame,
|
|
2484
|
+
filter_config: dict,
|
|
2485
|
+
verbose: bool = False
|
|
2486
|
+
) -> pl.LazyFrame:
|
|
2487
|
+
"""Apply variant-level DNM filters before melting.
|
|
2488
|
+
|
|
2489
|
+
These filters don't require sample-level data and can be applied
|
|
2490
|
+
on wide-format data to reduce memory usage.
|
|
2491
|
+
|
|
2492
|
+
Applies:
|
|
2493
|
+
- Population frequency filters (fafmax_faf95_max_genomes_max)
|
|
2494
|
+
- Quality filters (genomes_filters PASS only)
|
|
2495
|
+
|
|
2496
|
+
Args:
|
|
2497
|
+
lazy_df: LazyFrame with wide-format data (not melted)
|
|
2498
|
+
filter_config: Filter configuration dict
|
|
2499
|
+
verbose: Whether to print progress messages
|
|
2500
|
+
|
|
2501
|
+
Returns:
|
|
2502
|
+
Filtered LazyFrame
|
|
2503
|
+
"""
|
|
2504
|
+
dnm_config = filter_config.get("dnm", {})
|
|
2505
|
+
|
|
2506
|
+
# Frequency filter
|
|
2507
|
+
fafmax_max = dnm_config.get("fafmax_faf95_max_genomes_max")
|
|
2508
|
+
if fafmax_max is not None:
|
|
2509
|
+
lazy_df = lazy_df.filter(
|
|
2510
|
+
(pl.col("fafmax_faf95_max_genomes").cast(pl.Float64, strict=False) <= fafmax_max)
|
|
2511
|
+
| pl.col("fafmax_faf95_max_genomes").is_null()
|
|
2512
|
+
)
|
|
2513
|
+
if verbose:
|
|
2514
|
+
click.echo(
|
|
2515
|
+
f"DNM prefilter: Applied frequency filter (fafmax <= {fafmax_max})", err=True
|
|
2516
|
+
)
|
|
2517
|
+
|
|
2518
|
+
# Quality filter (genomes_filters PASS only)
|
|
2519
|
+
if dnm_config.get("genomes_filters_pass_only", False):
|
|
2520
|
+
lazy_df = lazy_df.filter(
|
|
2521
|
+
(pl.col("genomes_filters") == ".") | pl.col("genomes_filters").is_null()
|
|
2522
|
+
)
|
|
2523
|
+
if verbose:
|
|
2524
|
+
click.echo(
|
|
2525
|
+
"DNM prefilter: Applied genomes_filters PASS filter", err=True
|
|
2526
|
+
)
|
|
2527
|
+
|
|
2528
|
+
return lazy_df
|
|
2529
|
+
|
|
2530
|
+
|
|
2296
2531
|
def apply_filters_lazy(
|
|
2297
2532
|
lazy_df: pl.LazyFrame,
|
|
2298
2533
|
filter_config: dict,
|
|
@@ -150,6 +150,49 @@ wheels = [
|
|
|
150
150
|
{ url = "https://files.pythonhosted.org/packages/f4/d1/8d1b28d007da43c750367c8bf5cb0f22758c16b1104b2b73b9acadb2d17a/polars_runtime_32-1.35.2-cp39-abi3-win_arm64.whl", hash = "sha256:6861145aa321a44eda7cc6694fb7751cb7aa0f21026df51b5faa52e64f9dc39b", size = 36955684, upload-time = "2025-11-09T13:19:15.666Z" },
|
|
151
151
|
]
|
|
152
152
|
|
|
153
|
+
[[package]]
|
|
154
|
+
name = "pyarrow"
|
|
155
|
+
version = "23.0.0"
|
|
156
|
+
source = { registry = "https://pypi.org/simple" }
|
|
157
|
+
sdist = { url = "https://files.pythonhosted.org/packages/01/33/ffd9c3eb087fa41dd79c3cf20c4c0ae3cdb877c4f8e1107a446006344924/pyarrow-23.0.0.tar.gz", hash = "sha256:180e3150e7edfcd182d3d9afba72f7cf19839a497cc76555a8dce998a8f67615", size = 1167185, upload-time = "2026-01-18T16:19:42.218Z" }
|
|
158
|
+
wheels = [
|
|
159
|
+
{ url = "https://files.pythonhosted.org/packages/3d/bd/c861d020831ee57609b73ea721a617985ece817684dc82415b0bc3e03ac3/pyarrow-23.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5961a9f646c232697c24f54d3419e69b4261ba8a8b66b0ac54a1851faffcbab8", size = 34189116, upload-time = "2026-01-18T16:15:28.054Z" },
|
|
160
|
+
{ url = "https://files.pythonhosted.org/packages/8c/23/7725ad6cdcbaf6346221391e7b3eecd113684c805b0a95f32014e6fa0736/pyarrow-23.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:632b3e7c3d232f41d64e1a4a043fb82d44f8a349f339a1188c6a0dd9d2d47d8a", size = 35803831, upload-time = "2026-01-18T16:15:33.798Z" },
|
|
161
|
+
{ url = "https://files.pythonhosted.org/packages/57/06/684a421543455cdc2944d6a0c2cc3425b028a4c6b90e34b35580c4899743/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:76242c846db1411f1d6c2cc3823be6b86b40567ee24493344f8226ba34a81333", size = 44436452, upload-time = "2026-01-18T16:15:41.598Z" },
|
|
162
|
+
{ url = "https://files.pythonhosted.org/packages/c6/6f/8f9eb40c2328d66e8b097777ddcf38494115ff9f1b5bc9754ba46991191e/pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b73519f8b52ae28127000986bf228fda781e81d3095cd2d3ece76eb5cf760e1b", size = 47557396, upload-time = "2026-01-18T16:15:51.252Z" },
|
|
163
|
+
{ url = "https://files.pythonhosted.org/packages/10/6e/f08075f1472e5159553501fde2cc7bc6700944bdabe49a03f8a035ee6ccd/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:068701f6823449b1b6469120f399a1239766b117d211c5d2519d4ed5861f75de", size = 48147129, upload-time = "2026-01-18T16:16:00.299Z" },
|
|
164
|
+
{ url = "https://files.pythonhosted.org/packages/7d/82/d5a680cd507deed62d141cc7f07f7944a6766fc51019f7f118e4d8ad0fb8/pyarrow-23.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1801ba947015d10e23bca9dd6ef5d0e9064a81569a89b6e9a63b59224fd060df", size = 50596642, upload-time = "2026-01-18T16:16:08.502Z" },
|
|
165
|
+
{ url = "https://files.pythonhosted.org/packages/a9/26/4f29c61b3dce9fa7780303b86895ec6a0917c9af927101daaaf118fbe462/pyarrow-23.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:52265266201ec25b6839bf6bd4ea918ca6d50f31d13e1cf200b4261cd11dc25c", size = 27660628, upload-time = "2026-01-18T16:16:15.28Z" },
|
|
166
|
+
{ url = "https://files.pythonhosted.org/packages/66/34/564db447d083ec7ff93e0a883a597d2f214e552823bfc178a2d0b1f2c257/pyarrow-23.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:ad96a597547af7827342ffb3c503c8316e5043bb09b47a84885ce39394c96e00", size = 34184630, upload-time = "2026-01-18T16:16:22.141Z" },
|
|
167
|
+
{ url = "https://files.pythonhosted.org/packages/aa/3a/3999daebcb5e6119690c92a621c4d78eef2ffba7a0a1b56386d2875fcd77/pyarrow-23.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b9edf990df77c2901e79608f08c13fbde60202334a4fcadb15c1f57bf7afee43", size = 35796820, upload-time = "2026-01-18T16:16:29.441Z" },
|
|
168
|
+
{ url = "https://files.pythonhosted.org/packages/ec/ee/39195233056c6a8d0976d7d1ac1cd4fe21fb0ec534eca76bc23ef3f60e11/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:36d1b5bc6ddcaff0083ceec7e2561ed61a51f49cce8be079ee8ed406acb6fdef", size = 44438735, upload-time = "2026-01-18T16:16:38.79Z" },
|
|
169
|
+
{ url = "https://files.pythonhosted.org/packages/2c/41/6a7328ee493527e7afc0c88d105ecca69a3580e29f2faaeac29308369fd7/pyarrow-23.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4292b889cd224f403304ddda8b63a36e60f92911f89927ec8d98021845ea21be", size = 47557263, upload-time = "2026-01-18T16:16:46.248Z" },
|
|
170
|
+
{ url = "https://files.pythonhosted.org/packages/c6/ee/34e95b21ee84db494eae60083ddb4383477b31fb1fd19fd866d794881696/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dfd9e133e60eaa847fd80530a1b89a052f09f695d0b9c34c235ea6b2e0924cf7", size = 48153529, upload-time = "2026-01-18T16:16:53.412Z" },
|
|
171
|
+
{ url = "https://files.pythonhosted.org/packages/52/88/8a8d83cea30f4563efa1b7bf51d241331ee5cd1b185a7e063f5634eca415/pyarrow-23.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832141cc09fac6aab1cd3719951d23301396968de87080c57c9a7634e0ecd068", size = 50598851, upload-time = "2026-01-18T16:17:01.133Z" },
|
|
172
|
+
{ url = "https://files.pythonhosted.org/packages/c6/4c/2929c4be88723ba025e7b3453047dc67e491c9422965c141d24bab6b5962/pyarrow-23.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:7a7d067c9a88faca655c71bcc30ee2782038d59c802d57950826a07f60d83c4c", size = 27577747, upload-time = "2026-01-18T16:18:02.413Z" },
|
|
173
|
+
{ url = "https://files.pythonhosted.org/packages/64/52/564a61b0b82d72bd68ec3aef1adda1e3eba776f89134b9ebcb5af4b13cb6/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ce9486e0535a843cf85d990e2ec5820a47918235183a5c7b8b97ed7e92c2d47d", size = 34446038, upload-time = "2026-01-18T16:17:07.861Z" },
|
|
174
|
+
{ url = "https://files.pythonhosted.org/packages/cc/c9/232d4f9855fd1de0067c8a7808a363230d223c83aeee75e0fe6eab851ba9/pyarrow-23.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:075c29aeaa685fd1182992a9ed2499c66f084ee54eea47da3eb76e125e06064c", size = 35921142, upload-time = "2026-01-18T16:17:15.401Z" },
|
|
175
|
+
{ url = "https://files.pythonhosted.org/packages/96/f2/60af606a3748367b906bb82d41f0032e059f075444445d47e32a7ff1df62/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:799965a5379589510d888be3094c2296efd186a17ca1cef5b77703d4d5121f53", size = 44490374, upload-time = "2026-01-18T16:17:23.93Z" },
|
|
176
|
+
{ url = "https://files.pythonhosted.org/packages/ff/2d/7731543050a678ea3a413955a2d5d80d2a642f270aa57a3cb7d5a86e3f46/pyarrow-23.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ef7cac8fe6fccd8b9e7617bfac785b0371a7fe26af59463074e4882747145d40", size = 47527896, upload-time = "2026-01-18T16:17:33.393Z" },
|
|
177
|
+
{ url = "https://files.pythonhosted.org/packages/5a/90/f3342553b7ac9879413aed46500f1637296f3c8222107523a43a1c08b42a/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15a414f710dc927132dd67c361f78c194447479555af57317066ee5116b90e9e", size = 48210401, upload-time = "2026-01-18T16:17:42.012Z" },
|
|
178
|
+
{ url = "https://files.pythonhosted.org/packages/f3/da/9862ade205ecc46c172b6ce5038a74b5151c7401e36255f15975a45878b2/pyarrow-23.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3e0d2e6915eca7d786be6a77bf227fbc06d825a75b5b5fe9bcbef121dec32685", size = 50579677, upload-time = "2026-01-18T16:17:50.241Z" },
|
|
179
|
+
{ url = "https://files.pythonhosted.org/packages/c2/4c/f11f371f5d4740a5dafc2e11c76bcf42d03dfdb2d68696da97de420b6963/pyarrow-23.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4b317ea6e800b5704e5e5929acb6e2dc13e9276b708ea97a39eb8b345aa2658b", size = 27631889, upload-time = "2026-01-18T16:17:56.55Z" },
|
|
180
|
+
{ url = "https://files.pythonhosted.org/packages/97/bb/15aec78bcf43a0c004067bd33eb5352836a29a49db8581fc56f2b6ca88b7/pyarrow-23.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:20b187ed9550d233a872074159f765f52f9d92973191cd4b93f293a19efbe377", size = 34213265, upload-time = "2026-01-18T16:18:07.904Z" },
|
|
181
|
+
{ url = "https://files.pythonhosted.org/packages/f6/6c/deb2c594bbba41c37c5d9aa82f510376998352aa69dfcb886cb4b18ad80f/pyarrow-23.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:18ec84e839b493c3886b9b5e06861962ab4adfaeb79b81c76afbd8d84c7d5fda", size = 35819211, upload-time = "2026-01-18T16:18:13.94Z" },
|
|
182
|
+
{ url = "https://files.pythonhosted.org/packages/e0/e5/ee82af693cb7b5b2b74f6524cdfede0e6ace779d7720ebca24d68b57c36b/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e438dd3f33894e34fd02b26bd12a32d30d006f5852315f611aa4add6c7fab4bc", size = 44502313, upload-time = "2026-01-18T16:18:20.367Z" },
|
|
183
|
+
{ url = "https://files.pythonhosted.org/packages/9c/86/95c61ad82236495f3c31987e85135926ba3ec7f3819296b70a68d8066b49/pyarrow-23.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:a244279f240c81f135631be91146d7fa0e9e840e1dfed2aba8483eba25cd98e6", size = 47585886, upload-time = "2026-01-18T16:18:27.544Z" },
|
|
184
|
+
{ url = "https://files.pythonhosted.org/packages/bb/6e/a72d901f305201802f016d015de1e05def7706fff68a1dedefef5dc7eff7/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c4692e83e42438dba512a570c6eaa42be2f8b6c0f492aea27dec54bdc495103a", size = 48207055, upload-time = "2026-01-18T16:18:35.425Z" },
|
|
185
|
+
{ url = "https://files.pythonhosted.org/packages/f9/e5/5de029c537630ca18828db45c30e2a78da03675a70ac6c3528203c416fe3/pyarrow-23.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ae7f30f898dfe44ea69654a35c93e8da4cef6606dc4c72394068fd95f8e9f54a", size = 50619812, upload-time = "2026-01-18T16:18:43.553Z" },
|
|
186
|
+
{ url = "https://files.pythonhosted.org/packages/59/8d/2af846cd2412e67a087f5bda4a8e23dfd4ebd570f777db2e8686615dafc1/pyarrow-23.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:5b86bb649e4112fb0614294b7d0a175c7513738876b89655605ebb87c804f861", size = 28263851, upload-time = "2026-01-18T16:19:38.567Z" },
|
|
187
|
+
{ url = "https://files.pythonhosted.org/packages/7b/7f/caab863e587041156f6786c52e64151b7386742c8c27140f637176e9230e/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ebc017d765d71d80a3f8584ca0566b53e40464586585ac64176115baa0ada7d3", size = 34463240, upload-time = "2026-01-18T16:18:49.755Z" },
|
|
188
|
+
{ url = "https://files.pythonhosted.org/packages/c9/fa/3a5b8c86c958e83622b40865e11af0857c48ec763c11d472c87cd518283d/pyarrow-23.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:0800cc58a6d17d159df823f87ad66cefebf105b982493d4bad03ee7fab84b993", size = 35935712, upload-time = "2026-01-18T16:18:55.626Z" },
|
|
189
|
+
{ url = "https://files.pythonhosted.org/packages/c5/08/17a62078fc1a53decb34a9aa79cf9009efc74d63d2422e5ade9fed2f99e3/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3a7c68c722da9bb5b0f8c10e3eae71d9825a4b429b40b32709df5d1fa55beb3d", size = 44503523, upload-time = "2026-01-18T16:19:03.958Z" },
|
|
190
|
+
{ url = "https://files.pythonhosted.org/packages/cc/70/84d45c74341e798aae0323d33b7c39194e23b1abc439ceaf60a68a7a969a/pyarrow-23.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:bd5556c24622df90551063ea41f559b714aa63ca953db884cfb958559087a14e", size = 47542490, upload-time = "2026-01-18T16:19:11.208Z" },
|
|
191
|
+
{ url = "https://files.pythonhosted.org/packages/61/d9/d1274b0e6f19e235de17441e53224f4716574b2ca837022d55702f24d71d/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54810f6e6afc4ffee7c2e0051b61722fbea9a4961b46192dcfae8ea12fa09059", size = 48233605, upload-time = "2026-01-18T16:19:19.544Z" },
|
|
192
|
+
{ url = "https://files.pythonhosted.org/packages/39/07/e4e2d568cb57543d84482f61e510732820cddb0f47c4bb7df629abfed852/pyarrow-23.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:14de7d48052cf4b0ed174533eafa3cfe0711b8076ad70bede32cf59f744f0d7c", size = 50603979, upload-time = "2026-01-18T16:19:26.717Z" },
|
|
193
|
+
{ url = "https://files.pythonhosted.org/packages/72/9c/47693463894b610f8439b2e970b82ef81e9599c757bf2049365e40ff963c/pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0", size = 28338905, upload-time = "2026-01-18T16:19:32.93Z" },
|
|
194
|
+
]
|
|
195
|
+
|
|
153
196
|
[[package]]
|
|
154
197
|
name = "pygments"
|
|
155
198
|
version = "2.19.2"
|
|
@@ -191,11 +234,12 @@ wheels = [
|
|
|
191
234
|
|
|
192
235
|
[[package]]
|
|
193
236
|
name = "pywombat"
|
|
194
|
-
version = "1.0
|
|
237
|
+
version = "1.2.0"
|
|
195
238
|
source = { editable = "." }
|
|
196
239
|
dependencies = [
|
|
197
240
|
{ name = "click" },
|
|
198
241
|
{ name = "polars" },
|
|
242
|
+
{ name = "pyarrow" },
|
|
199
243
|
{ name = "pyyaml" },
|
|
200
244
|
{ name = "tqdm" },
|
|
201
245
|
]
|
|
@@ -210,6 +254,7 @@ dev = [
|
|
|
210
254
|
requires-dist = [
|
|
211
255
|
{ name = "click", specifier = ">=8.1.0" },
|
|
212
256
|
{ name = "polars", specifier = ">=0.19.0" },
|
|
257
|
+
{ name = "pyarrow", specifier = ">=14.0.0" },
|
|
213
258
|
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" },
|
|
214
259
|
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0.0" },
|
|
215
260
|
{ name = "pyyaml", specifier = ">=6.0" },
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|