polars-bio 0.13.1__tar.gz → 0.14.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {polars_bio-0.13.1 → polars_bio-0.14.0}/Cargo.lock +50 -28
- {polars_bio-0.13.1 → polars_bio-0.14.0}/Cargo.toml +8 -8
- {polars_bio-0.13.1 → polars_bio-0.14.0}/PKG-INFO +2 -1
- polars_bio-0.14.0/docs/blog/index.md +2 -0
- polars_bio-0.14.0/docs/blog/posts/benchmark-operations-2025-09.md +114 -0
- polars_bio-0.14.0/docs/blog/posts/figures/benchmark-sep-2025/all_operations_walltime_comparison.png +0 -0
- polars_bio-0.14.0/docs/blog/posts/figures/benchmark-sep-2025/bench-20250-all_operations_speedup_comparison.png +0 -0
- polars_bio-0.14.0/docs/blog/posts/figures/benchmark-sep-2025/benchmark_comparison_genomicranges_vs_polars_bio.png +0 -0
- polars_bio-0.14.0/docs/blog/posts/figures/benchmark-sep-2025/benchmark_speedup_comparison_genomicranges_vs_polars_bio.png +0 -0
- polars_bio-0.14.0/docs/blog/posts/figures/benchmark-sep-2025/combined_benchmark_visualization.png +0 -0
- polars_bio-0.14.0/docs/blog/posts/figures/benchmark-sep-2025/combined_multi_testcase.png +0 -0
- polars_bio-0.14.0/docs/blog/posts/figures/benchmark-sep-2025/star-history-202595.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/faq.md +4 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/supplement.md +1 -1
- {polars_bio-0.13.1 → polars_bio-0.14.0}/mkdocs.yml +15 -2
- {polars_bio-0.13.1 → polars_bio-0.14.0}/poetry.lock +1264 -909
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/__init__.py +1 -1
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/io.py +338 -34
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/range_op.py +36 -3
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/range_op_helpers.py +10 -1
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/range_op_io.py +43 -10
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/sql.py +27 -12
- polars_bio-0.14.0/polars_bio/utils.py +124 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/pyproject.toml +4 -2
- {polars_bio-0.13.1 → polars_bio-0.14.0}/src/lib.rs +6 -4
- {polars_bio-0.13.1 → polars_bio-0.14.0}/src/operation.rs +2 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/src/option.rs +8 -3
- {polars_bio-0.13.1 → polars_bio-0.14.0}/src/query.rs +95 -43
- {polars_bio-0.13.1 → polars_bio-0.14.0}/src/scan.rs +72 -13
- {polars_bio-0.13.1 → polars_bio-0.14.0}/src/streaming.rs +3 -4
- polars_bio-0.14.0/tests/test_ensembl_vcf_parsing.py +201 -0
- polars_bio-0.14.0/tests/test_execution_plan_validation.py +145 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/test_io.py +140 -10
- polars_bio-0.14.0/tests/test_polars_bio_projection_validation.py +259 -0
- polars_bio-0.14.0/tests/test_projection_performance.py +259 -0
- polars_bio-0.14.0/tests/test_projection_pushdown.py +470 -0
- polars_bio-0.14.0/tests/test_vcf_projection_pushdown.py +158 -0
- polars_bio-0.13.1/polars_bio/utils.py +0 -46
- polars_bio-0.13.1/tests/test_ensembl_parsing.py +0 -193
- polars_bio-0.13.1/tests/test_ensembl_vcf_parsing.py +0 -193
- {polars_bio-0.13.1 → polars_bio-0.14.0}/.github/workflows/publish_documentation.yml +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/.github/workflows/publish_to_pypi.yml +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/.github/workflows/release.yml +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/.gitignore +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/.pre-commit-config.yaml +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/.readthedocs.yaml +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/LICENSE +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/Makefile +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/README.md +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/api.md +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/count-overlaps-parallel.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/count-overlaps-single.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/coverage-parallel.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/coverage-single.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/logo-large.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/logo.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/bioframe.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/bioframe_sink.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/dataframes/polars-bio-overlap-mem.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/dataframes/polars-bio-overlap-pd-mem.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/dataframes/polars-bio-overlap-pl-mem.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/polars-bio.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/polars-bio_sink.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/polars-bio_stream_sink.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/pyranges0.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/pyranges0_sink.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/pyranges1.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/memory/pyranges1_sink.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/nearest-parallel.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/nearest-single.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/overlap-parallel.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/overlap-single.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/results-nearest-0.1.1.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/assets/results-overlap-0.1.1.png +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/contact.md +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/features.md +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/index.md +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/example.bam +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/example.bed.bgz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/example.fastq.gz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/example.gff3.bgz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/example.vcf +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/exons/.part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/exons/.part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/exons/_SUCCESS +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/exons/part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/exons/part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/fBrain-DS14718/.part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/fBrain-DS14718/.part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/fBrain-DS14718/_SUCCESS +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/fBrain-DS14718/part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/data/fBrain-DS14718/part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/notebooks/tutorial.ipynb +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/performance.md +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/quickstart.md +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/requirements.txt +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/docs/versions.json +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/README.md +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/bin/.env +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/bin/start.sh +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/bin/stop.sh +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/data/policy-anonymous.json +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/data/policy-priv.json +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/data/test.fasta +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/data/vep.vcf +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/data/vep.vcf.bgz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/docker-compose.yml +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/it_ensembl_vcf_bgz.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/it/it_object_storage_io.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars-bio.iml +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/constants.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/context.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/interval_op_helpers.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/logging.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/operations.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/polars_ext.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/polars_bio/range_utils.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/requirements.txt +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/rust-toolchain.toml +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/rustfmt.toml +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/src/context.rs +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/src/udtf.rs +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/src/utils.rs +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/_expected.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/count_overlaps/reads.csv +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/count_overlaps/targets.csv +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/coverage/reads.csv +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/coverage/targets.csv +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/exons/.part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/exons/.part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/exons/_SUCCESS +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/exons/part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/exons/part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/fBrain-DS14718/.part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/fBrain-DS14718/.part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/fBrain-DS14718/_SUCCESS +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/fBrain-DS14718/part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/fBrain-DS14718/part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/bam/test.bam +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/bam/test.bam.bai +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/bed/ENCFF001XKR.bed.gz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/bed/chr16_fragile_site.bed +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/bed/chr16_fragile_site.bed.bgz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/bed/test.bed +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/fasta/test.fasta +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/fastq/example.fastq +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/fastq/example.fastq.bgz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/fastq/example.fastq.bgz.gzi +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/fastq/example.fastq.gz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/fastq/sample_parallel.fastq.bgz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/fastq/sample_parallel.fastq.bgz.gzi +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/fastq/test.fastq +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/fastq/wrong_extension.fastq.gz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/gff/gencode.v38.annotation.gff3 +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/gff/gencode.v38.annotation.gff3.bgz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/gff/gencode.v38.annotation.gff3.gz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/gff/wrong_extension.gff3.gz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/vcf/ensembl-2.vcf +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/vcf/ensembl.vcf +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/vcf/vep.vcf +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/vcf/vep.vcf.bgz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/vcf/vep.vcf.gz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/vcf/wrong_extension.vcf.bgz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/io/vcf/wrong_extension.vcf.gz +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/merge/input.csv +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/nearest/reads.csv +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/nearest/targets.csv +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/overlap/reads.csv +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/data/overlap/targets.csv +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/test_bioframe.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/test_native.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/test_overlap_algorithms.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/test_pandas.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/test_parallel_io.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/test_polars.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/test_polars_ext.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/test_streaming.py +0 -0
- {polars_bio-0.13.1 → polars_bio-0.14.0}/tests/test_warnings.py +0 -0
@@ -1404,7 +1404,7 @@ dependencies = [
|
|
1404
1404
|
[[package]]
|
1405
1405
|
name = "datafusion-bio-format-bam"
|
1406
1406
|
version = "0.1.0"
|
1407
|
-
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=
|
1407
|
+
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
|
1408
1408
|
dependencies = [
|
1409
1409
|
"async-stream",
|
1410
1410
|
"async-trait",
|
@@ -1425,7 +1425,7 @@ dependencies = [
|
|
1425
1425
|
[[package]]
|
1426
1426
|
name = "datafusion-bio-format-bed"
|
1427
1427
|
version = "0.1.0"
|
1428
|
-
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=
|
1428
|
+
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
|
1429
1429
|
dependencies = [
|
1430
1430
|
"async-compression",
|
1431
1431
|
"async-stream",
|
@@ -1449,7 +1449,7 @@ dependencies = [
|
|
1449
1449
|
[[package]]
|
1450
1450
|
name = "datafusion-bio-format-core"
|
1451
1451
|
version = "0.1.0"
|
1452
|
-
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=
|
1452
|
+
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
|
1453
1453
|
dependencies = [
|
1454
1454
|
"async-compression",
|
1455
1455
|
"bytes",
|
@@ -1468,7 +1468,7 @@ dependencies = [
|
|
1468
1468
|
[[package]]
|
1469
1469
|
name = "datafusion-bio-format-fasta"
|
1470
1470
|
version = "0.1.0"
|
1471
|
-
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=
|
1471
|
+
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
|
1472
1472
|
dependencies = [
|
1473
1473
|
"async-compression",
|
1474
1474
|
"async-stream",
|
@@ -1490,7 +1490,7 @@ dependencies = [
|
|
1490
1490
|
[[package]]
|
1491
1491
|
name = "datafusion-bio-format-fastq"
|
1492
1492
|
version = "0.1.0"
|
1493
|
-
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=
|
1493
|
+
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
|
1494
1494
|
dependencies = [
|
1495
1495
|
"async-compression",
|
1496
1496
|
"async-stream",
|
@@ -1514,7 +1514,7 @@ dependencies = [
|
|
1514
1514
|
[[package]]
|
1515
1515
|
name = "datafusion-bio-format-gff"
|
1516
1516
|
version = "0.1.0"
|
1517
|
-
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=
|
1517
|
+
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
|
1518
1518
|
dependencies = [
|
1519
1519
|
"async-compression",
|
1520
1520
|
"async-stream",
|
@@ -1523,20 +1523,23 @@ dependencies = [
|
|
1523
1523
|
"datafusion",
|
1524
1524
|
"datafusion-bio-format-core",
|
1525
1525
|
"env_logger",
|
1526
|
+
"flate2",
|
1526
1527
|
"futures",
|
1527
1528
|
"futures-util",
|
1528
1529
|
"log",
|
1529
1530
|
"noodles 0.93.0",
|
1530
|
-
"noodles-
|
1531
|
+
"noodles-bgzf 0.36.0",
|
1532
|
+
"noodles-gff 0.51.0",
|
1531
1533
|
"opendal",
|
1532
1534
|
"tokio",
|
1533
1535
|
"tokio-util",
|
1536
|
+
"tracing",
|
1534
1537
|
]
|
1535
1538
|
|
1536
1539
|
[[package]]
|
1537
1540
|
name = "datafusion-bio-format-vcf"
|
1538
1541
|
version = "0.1.0"
|
1539
|
-
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=
|
1542
|
+
source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
|
1540
1543
|
dependencies = [
|
1541
1544
|
"async-compression",
|
1542
1545
|
"async-stream",
|
@@ -3627,9 +3630,9 @@ dependencies = [
|
|
3627
3630
|
[[package]]
|
3628
3631
|
name = "noodles"
|
3629
3632
|
version = "0.100.0"
|
3630
|
-
source = "git+https://github.com/biodatageeks/noodles.git?rev=
|
3633
|
+
source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
|
3631
3634
|
dependencies = [
|
3632
|
-
"noodles-bgzf 0.42.0",
|
3635
|
+
"noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
|
3633
3636
|
"noodles-vcf 0.80.0",
|
3634
3637
|
]
|
3635
3638
|
|
@@ -3741,6 +3744,16 @@ dependencies = [
|
|
3741
3744
|
name = "noodles-bgzf"
|
3742
3745
|
version = "0.42.0"
|
3743
3746
|
source = "git+https://github.com/biodatageeks/noodles.git?rev=289ef32e7d43d142914fb3f02335044ae293871c#289ef32e7d43d142914fb3f02335044ae293871c"
|
3747
|
+
dependencies = [
|
3748
|
+
"bytes",
|
3749
|
+
"crossbeam-channel",
|
3750
|
+
"flate2",
|
3751
|
+
]
|
3752
|
+
|
3753
|
+
[[package]]
|
3754
|
+
name = "noodles-bgzf"
|
3755
|
+
version = "0.42.0"
|
3756
|
+
source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
|
3744
3757
|
dependencies = [
|
3745
3758
|
"bytes",
|
3746
3759
|
"crossbeam-channel",
|
@@ -3777,6 +3790,14 @@ dependencies = [
|
|
3777
3790
|
"bstr",
|
3778
3791
|
]
|
3779
3792
|
|
3793
|
+
[[package]]
|
3794
|
+
name = "noodles-core"
|
3795
|
+
version = "0.18.0"
|
3796
|
+
source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
|
3797
|
+
dependencies = [
|
3798
|
+
"bstr",
|
3799
|
+
]
|
3800
|
+
|
3780
3801
|
[[package]]
|
3781
3802
|
name = "noodles-cram"
|
3782
3803
|
version = "0.79.0"
|
@@ -3833,13 +3854,13 @@ dependencies = [
|
|
3833
3854
|
[[package]]
|
3834
3855
|
name = "noodles-csi"
|
3835
3856
|
version = "0.50.0"
|
3836
|
-
source = "git+https://github.com/biodatageeks/noodles.git?rev=
|
3857
|
+
source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
|
3837
3858
|
dependencies = [
|
3838
3859
|
"bit-vec",
|
3839
3860
|
"bstr",
|
3840
3861
|
"indexmap",
|
3841
|
-
"noodles-bgzf 0.42.0",
|
3842
|
-
"noodles-core 0.18.0",
|
3862
|
+
"noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
|
3863
|
+
"noodles-core 0.18.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
|
3843
3864
|
]
|
3844
3865
|
|
3845
3866
|
[[package]]
|
@@ -3865,8 +3886,8 @@ dependencies = [
|
|
3865
3886
|
"bytes",
|
3866
3887
|
"futures",
|
3867
3888
|
"memchr",
|
3868
|
-
"noodles-bgzf 0.42.0",
|
3869
|
-
"noodles-core 0.18.0",
|
3889
|
+
"noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=289ef32e7d43d142914fb3f02335044ae293871c)",
|
3890
|
+
"noodles-core 0.18.0 (git+https://github.com/biodatageeks/noodles.git?rev=289ef32e7d43d142914fb3f02335044ae293871c)",
|
3870
3891
|
"tokio",
|
3871
3892
|
]
|
3872
3893
|
|
@@ -3911,17 +3932,18 @@ dependencies = [
|
|
3911
3932
|
|
3912
3933
|
[[package]]
|
3913
3934
|
name = "noodles-gff"
|
3914
|
-
version = "0.
|
3915
|
-
source = "
|
3916
|
-
checksum = "c689769bddd0464a1db695aef1824754d910b81f3f1c4c74ff418c89426052a8"
|
3935
|
+
version = "0.51.0"
|
3936
|
+
source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
|
3917
3937
|
dependencies = [
|
3918
3938
|
"bstr",
|
3919
3939
|
"futures",
|
3920
3940
|
"indexmap",
|
3921
3941
|
"lexical-core",
|
3922
|
-
"
|
3923
|
-
"
|
3924
|
-
"noodles-
|
3942
|
+
"log",
|
3943
|
+
"memchr",
|
3944
|
+
"noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
|
3945
|
+
"noodles-core 0.18.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
|
3946
|
+
"noodles-csi 0.50.0",
|
3925
3947
|
"percent-encoding",
|
3926
3948
|
"tokio",
|
3927
3949
|
]
|
@@ -3991,12 +4013,12 @@ dependencies = [
|
|
3991
4013
|
[[package]]
|
3992
4014
|
name = "noodles-tabix"
|
3993
4015
|
version = "0.56.0"
|
3994
|
-
source = "git+https://github.com/biodatageeks/noodles.git?rev=
|
4016
|
+
source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
|
3995
4017
|
dependencies = [
|
3996
4018
|
"bstr",
|
3997
4019
|
"indexmap",
|
3998
|
-
"noodles-bgzf 0.42.0",
|
3999
|
-
"noodles-core 0.18.0",
|
4020
|
+
"noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
|
4021
|
+
"noodles-core 0.18.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
|
4000
4022
|
"noodles-csi 0.50.0",
|
4001
4023
|
"tokio",
|
4002
4024
|
]
|
@@ -4022,13 +4044,13 @@ dependencies = [
|
|
4022
4044
|
[[package]]
|
4023
4045
|
name = "noodles-vcf"
|
4024
4046
|
version = "0.80.0"
|
4025
|
-
source = "git+https://github.com/biodatageeks/noodles.git?rev=
|
4047
|
+
source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
|
4026
4048
|
dependencies = [
|
4027
4049
|
"futures",
|
4028
4050
|
"indexmap",
|
4029
4051
|
"memchr",
|
4030
|
-
"noodles-bgzf 0.42.0",
|
4031
|
-
"noodles-core 0.18.0",
|
4052
|
+
"noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
|
4053
|
+
"noodles-core 0.18.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
|
4032
4054
|
"noodles-csi 0.50.0",
|
4033
4055
|
"noodles-tabix 0.56.0",
|
4034
4056
|
"percent-encoding",
|
@@ -5203,7 +5225,7 @@ dependencies = [
|
|
5203
5225
|
|
5204
5226
|
[[package]]
|
5205
5227
|
name = "polars_bio"
|
5206
|
-
version = "0.
|
5228
|
+
version = "0.14.0"
|
5207
5229
|
dependencies = [
|
5208
5230
|
"arrow",
|
5209
5231
|
"arrow-array",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars_bio"
|
3
|
-
version = "0.
|
3
|
+
version = "0.14.0"
|
4
4
|
edition = "2021"
|
5
5
|
readme = "README.md"
|
6
6
|
|
@@ -36,13 +36,13 @@ polars-arrow = { git = "https://github.com/mwiewior/polars.git" , rev = "da42ae
|
|
36
36
|
polars-python = { git = "https://github.com/mwiewior/polars.git" , rev = "da42ae21ca9c25bc14562e36e07cf02eafd620ee"}
|
37
37
|
|
38
38
|
|
39
|
-
datafusion-bio-format-vcf = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "
|
40
|
-
datafusion-bio-format-core = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "
|
41
|
-
datafusion-bio-format-gff = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "
|
42
|
-
datafusion-bio-format-fastq = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "
|
43
|
-
datafusion-bio-format-bam = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "
|
44
|
-
datafusion-bio-format-bed = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "
|
45
|
-
datafusion-bio-format-fasta = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "
|
39
|
+
datafusion-bio-format-vcf = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
|
40
|
+
datafusion-bio-format-core = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
|
41
|
+
datafusion-bio-format-gff = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
|
42
|
+
datafusion-bio-format-fastq = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
|
43
|
+
datafusion-bio-format-bam = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
|
44
|
+
datafusion-bio-format-bed = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
|
45
|
+
datafusion-bio-format-fasta = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
|
46
46
|
|
47
47
|
|
48
48
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: polars-bio
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.14.0
|
4
4
|
Classifier: Programming Language :: Rust
|
5
5
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
6
6
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
@@ -9,6 +9,7 @@ Requires-Dist: pyarrow~=21.0.0
|
|
9
9
|
Requires-Dist: datafusion~=48.0.0
|
10
10
|
Requires-Dist: tqdm~=4.67.1
|
11
11
|
Requires-Dist: typing-extensions~=4.14.0
|
12
|
+
Requires-Dist: mkdocs-glightbox>=0.5.1,<0.6.0
|
12
13
|
Requires-Dist: pandas ; extra == 'pandas'
|
13
14
|
Requires-Dist: bioframe ; extra == 'viz'
|
14
15
|
Requires-Dist: matplotlib ; extra == 'viz'
|
@@ -0,0 +1,114 @@
|
|
1
|
+
---
|
2
|
+
draft: false
|
3
|
+
date:
|
4
|
+
created: 2025-09-05
|
5
|
+
updated: 2025-09-05
|
6
|
+
categories:
|
7
|
+
- performance
|
8
|
+
- benchmarks
|
9
|
+
|
10
|
+
---
|
11
|
+
|
12
|
+
# Interval operations benchmark — update September 2025
|
13
|
+
|
14
|
+
## Introduction
|
15
|
+
Benchmarking isn’t a one-and-done exercise—it’s a moving target. As tools evolve, new versions can shift performance profiles in meaningful ways, so keeping results current is just as important as the first round of measurements.
|
16
|
+
|
17
|
+
Recently, three novel libraries that have started to gain traction: [pyranges1](https://github.com/pyranges/pyranges_1.x), [GenomicRanges](https://github.com/BiocPy/GenomicRanges) and [polars-bio](https://github.com/biodatageeks/polars-bio)
|
18
|
+

|
19
|
+
|
20
|
+
shipped major updates:
|
21
|
+
|
22
|
+
* [pyranges1](https://github.com/pyranges/pyranges_1.x) adopted a new Rust backend ([ruranges](https://github.com/pyranges/ruranges)),
|
23
|
+
* [GenomicRanges](https://github.com/BiocPy/GenomicRanges) switched its interval core to a Nested Containment List ([NCLS](https://github.com/pyranges/ncls)) and added multithreaded execution,
|
24
|
+
* polars-bio migrated to the new Polars streaming engine and added support for new interval data structures. As of version `0.12.0` it supports:
|
25
|
+
* [COITrees](https://github.com/dcjones/coitrees)
|
26
|
+
* [IITree](https://github.com/rust-bio/rust-bio/blob/master/src/data_structures/interval_tree/array_backed_interval_tree.rs)
|
27
|
+
* [AVL-tree](https://github.com/rust-bio/rust-bio/blob/master/src/data_structures/interval_tree/avl_interval_tree.rs)
|
28
|
+
* [rust-lapper](https://github.com/sstadick/rust-lapper)
|
29
|
+
* [superintervals](https://github.com/kcleal/superintervals/)
|
30
|
+
|
31
|
+
Each of these changes has the potential to meaningfully alter performance and memory characteristics for common genomic interval tasks.
|
32
|
+
|
33
|
+
In this post, we revisit our benchmarks with those releases in mind. We focus on three everyday operations:
|
34
|
+
|
35
|
+
* overlap detection,
|
36
|
+
* nearest feature queries
|
37
|
+
* overlap counting.
|
38
|
+
|
39
|
+
For comparability, we use the same [AIList](/polars-bio/supplement/#real-dataset) dataset from our previous write-up, so you can see exactly how the new backends and data structures change the picture. Let’s dive in and see what’s faster, what’s leaner, and where the trade-offs now live.
|
40
|
+
|
41
|
+
## Setup
|
42
|
+
|
43
|
+
### Benchmark test cases
|
44
|
+
|
45
|
+
| Dataset pairs | Size | # of overlaps (1-based) |
|
46
|
+
|---------------|--------|-------------------------|
|
47
|
+
| 1-2 & 2-1 | Small | 54,246 |
|
48
|
+
| 7-3 & 3-7 | Medium | 4,408,383 |
|
49
|
+
| 8-7 & 7-8 | Large | 307,184,634 |
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
### Software versions
|
54
|
+
|
55
|
+
| Library | Version |
|
56
|
+
|--------------------|------------|
|
57
|
+
| polars_bio | 0.13.1 |
|
58
|
+
| pyranges | 0.1.14 |
|
59
|
+
| genomicranges | 0.7.2 |
|
60
|
+
|
61
|
+
## Results
|
62
|
+
|
63
|
+
### polars-bio interval data structures performance comparison
|
64
|
+
{.glightbox}
|
65
|
+
|
66
|
+
Key takeaways:
|
67
|
+
|
68
|
+
- **Superintervals** seems to be the best default. Across all three test cases, it is consistently the fastest or tied for fastest, delivering 1.25–1.44x speedups over the **polars-bio default (COITrees)** and avoiding worst‑case behavior.
|
69
|
+
- Lapper caveat: performs well on 1‑2 and 8‑7, but collapses on 7‑3 (≈25x slower than default), so it’s risky as a general‑purpose algorithm.
|
70
|
+
- Intervaltree/Arrayintervaltree: reliable but slower. They trail superintervals by 20–70% depending on the case.
|
71
|
+
|
72
|
+
|
73
|
+
### All operations comparison
|
74
|
+
{.glightbox}
|
75
|
+
|
76
|
+
{.glightbox}
|
77
|
+
|
78
|
+
Key takeaways:
|
79
|
+
|
80
|
+
- *Overlap*: **GenomicRanges** wins on small inputs (1‑2, 2‑1) by ~2.1–2.3x, but polars‑bio takes over from medium size onward and dominates on large (7‑8, 8‑7), where PyRanges falls far behind. Interesting case of *7-8* vs *8-7* when swapping inputs can significantly affect performance of GenomicRanges.
|
81
|
+
- *Nearest*: **polars‑bio** leads decisively at every size; speedups over the others grow with input size (orders of magnitude on large datasets).
|
82
|
+
- *Count overlaps*: **GenomicRanges** edges out polars‑bio on the smallest inputs, while **polars‑bio** is faster on medium and substantially faster on large inputs.
|
83
|
+
|
84
|
+
### All operations parallel execution
|
85
|
+
{.glightbox}
|
86
|
+
|
87
|
+
{.glightbox}
|
88
|
+
|
89
|
+
Key takeaways:
|
90
|
+
|
91
|
+
- Thread scaling: **both** libraries (GenomicRanges and polars-bio) benefit from additional threads, but the absolute gap favors **polars‑bio** for medium/large datasets across overlap, nearest, and count overlaps.
|
92
|
+
- Small overlaps: **GenomicRanges** remains >2x faster at 1–8 threads; on medium/large pairs its relative speed drops below 1x.
|
93
|
+
- Nearest: **polars‑bio** stays on the 1x reference line; **GenomicRanges** is typically 10–100x slower (log scale) even with more threads.
|
94
|
+
- Count overlaps: small inputs slightly favor **GenomicRanges**; for larger inputs **polars‑bio** maintains 2–10x advantage with stable scaling.
|
95
|
+
|
96
|
+
### End to-end data proecesing
|
97
|
+
|
98
|
+
Here we compare end-to-end performance including data loading, overlap operation, and saving results to CSV.
|
99
|
+
|
100
|
+
!!! info
|
101
|
+
1. `POLARS_MAX_THREADS=1` was set to ensure fair comparison with single-threaded PyRanges.
|
102
|
+
2. Since GenomicRanges supports Polars DataFrames as input and output, we used them instead of Pandas to again ensure fair comparison with polars-bio.
|
103
|
+
3. GenomicRanges [find_overlaps](https://biocpy.github.io/GenomicRanges/api/genomicranges.html#genomicranges.GenomicRanges.GenomicRanges.find_overlaps) method returns hits-only table (indices of genomic intervals instead of genomic coordinates), we also benchmarked an extended version with additional lookup of intervals (`full rows`, [code](https://github.com/biodatageeks/polars-bio-bench/blob/master/src/utils.py#L99)) for fair comparison.
|
104
|
+
|
105
|
+
{.glightbox}
|
106
|
+
|
107
|
+
Key takeaways:
|
108
|
+
|
109
|
+
- Wall time: **GenomicRanges (hits‑only)** is the fastest end‑to‑end here (~1.16x vs polars_bio) by avoiding full materialization of genomic intervals (unlike PyRanges and polars-bio that return pairs of genomic interval coordinates for each overlap); **PyRanges** is far slower; **GenomicRanges** (full rows, so with the output comparable with PyRanges and polars-bio) is much slower.
|
110
|
+
- Memory: **polars-bio (streaming)** minimizes peak RAM (~0.7 GB) while keeping speed comparable to **polars-bio**. **GenomicRanges** (full rows) peaks at ~40 GB; hits‑only sits in the middle (~8.2 GB) as it only returns DataFrame with pairs of indices not full genomic coordinates.
|
111
|
+
|
112
|
+
## Summary
|
113
|
+
|
114
|
+
For small and medium datasets, all tools perform well; at large scale, **polars-bio** excels with better scalability and memory efficiency, achieving an ultra‑low footprint in streaming mode.
|
polars_bio-0.14.0/docs/blog/posts/figures/benchmark-sep-2025/all_operations_walltime_comparison.png
ADDED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
polars_bio-0.14.0/docs/blog/posts/figures/benchmark-sep-2025/combined_benchmark_visualization.png
ADDED
Binary file
|
Binary file
|
Binary file
|
@@ -18,6 +18,10 @@ polars-bio-intel kernel: [ 1611.175045] traps: python[8844] trap invalid opcode
|
|
18
18
|
```bash
|
19
19
|
MKDOCS_EXPORTER_PDF=false JUPYTER_PLATFORM_DIRS=1 mkdocs serve -w polars_bio
|
20
20
|
```
|
21
|
+
Some pages of the documentation take a while to build—to speed up the process, you can disable dynamic content rendering:
|
22
|
+
```bash
|
23
|
+
MKDOCS_EXPORTER_PDF=false ENABLE_MD_EXEC=false ENABLE_MKDOCSTRINGS=false ENABLE_JUPYTER=false JUPYTER_PLATFORM_DIRS=1 mkdocs serve
|
24
|
+
```
|
21
25
|
|
22
26
|
4. How to build the source code and install in the current virtual environment?
|
23
27
|
```bash
|
@@ -51,7 +51,7 @@ The basic concept is that each operation consists of two sides: the **probe** si
|
|
51
51
|
* [IITree](https://github.com/rust-bio/rust-bio/blob/master/src/data_structures/interval_tree/array_backed_interval_tree.rs)
|
52
52
|
* [AVL-tree](https://github.com/rust-bio/rust-bio/blob/master/src/data_structures/interval_tree/avl_interval_tree.rs)
|
53
53
|
* [rust-lapper](https://github.com/sstadick/rust-lapper)
|
54
|
-
* [superintervals](https://github.com/kcleal/superintervals/) -
|
54
|
+
* [superintervals](https://github.com/kcleal/superintervals/) - available since `polars-bio` version `0.12.0`
|
55
55
|
Once the **build** side data structure is ready, then records from the **probe** side are processed against the search structure organized as record batches. Each record batch can be processed independently. Search structure nodes contains identifiers of the rows from the **build** side that are then used to construct a new record that is returned as a result of the operation.
|
56
56
|
|
57
57
|
### Out-of-core (streaming) processing
|
@@ -14,6 +14,7 @@ nav:
|
|
14
14
|
- 🔬 Supplementary material: supplement.md
|
15
15
|
- 📡 Contact: contact.md
|
16
16
|
|
17
|
+
|
17
18
|
- Quick start: quickstart.md
|
18
19
|
- Features: features.md
|
19
20
|
- Tutorial: notebooks/tutorial.ipynb
|
@@ -22,20 +23,29 @@ nav:
|
|
22
23
|
- FAQ: faq.md
|
23
24
|
- 🔬 Supplementary material: supplement.md
|
24
25
|
- Contact: contact.md
|
26
|
+
- 📖 Blog:
|
27
|
+
- blog/index.md
|
25
28
|
|
26
29
|
plugins:
|
30
|
+
- blog:
|
31
|
+
post_date_format: full
|
32
|
+
archive: false
|
33
|
+
post_readtime: true
|
27
34
|
- social
|
28
35
|
- search
|
29
36
|
- autorefs
|
30
37
|
- mkdocs-jupyter:
|
38
|
+
enabled: !ENV [ENABLE_JUPYTER, true]
|
31
39
|
execute: false
|
32
40
|
allow_errors: false
|
33
41
|
- mkdocstrings:
|
42
|
+
enabled: !ENV [ENABLE_MKDOCSTRINGS, true]
|
34
43
|
default_handler: python
|
35
44
|
handlers:
|
36
45
|
options:
|
37
46
|
docstring_style: google
|
38
|
-
- markdown-exec
|
47
|
+
- markdown-exec:
|
48
|
+
enabled: !ENV [ENABLE_MD_EXEC, true]
|
39
49
|
- table-reader
|
40
50
|
- mkdocs_matplotlib
|
41
51
|
- exporter:
|
@@ -52,6 +62,8 @@ plugins:
|
|
52
62
|
enabled: true
|
53
63
|
output: .well-known/site.pdf
|
54
64
|
covers: all
|
65
|
+
# Place glightbox last so it runs after other plugins
|
66
|
+
- glightbox
|
55
67
|
theme:
|
56
68
|
name: material
|
57
69
|
logo: assets/logo.png
|
@@ -91,6 +103,7 @@ extra:
|
|
91
103
|
|
92
104
|
markdown_extensions:
|
93
105
|
- admonition
|
106
|
+
- codehilite
|
94
107
|
- footnotes
|
95
108
|
- pymdownx.critic
|
96
109
|
- pymdownx.caret
|
@@ -123,4 +136,4 @@ markdown_extensions:
|
|
123
136
|
format: !!python/name:pymdownx.superfences.fence_code_format
|
124
137
|
- markdown.extensions.toc:
|
125
138
|
baselevel: 1
|
126
|
-
permalink: ""
|
139
|
+
permalink: ""
|