polars-bio 0.13.1__tar.gz → 0.14.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. {polars_bio-0.13.1 → polars_bio-0.14.1}/Cargo.lock +50 -28
  2. {polars_bio-0.13.1 → polars_bio-0.14.1}/Cargo.toml +8 -8
  3. {polars_bio-0.13.1 → polars_bio-0.14.1}/PKG-INFO +2 -1
  4. polars_bio-0.14.1/docs/blog/index.md +2 -0
  5. polars_bio-0.14.1/docs/blog/posts/benchmark-operations-2025-09.md +114 -0
  6. polars_bio-0.14.1/docs/blog/posts/figures/benchmark-sep-2025/all_operations_walltime_comparison.png +0 -0
  7. polars_bio-0.14.1/docs/blog/posts/figures/benchmark-sep-2025/bench-20250-all_operations_speedup_comparison.png +0 -0
  8. polars_bio-0.14.1/docs/blog/posts/figures/benchmark-sep-2025/benchmark_comparison_genomicranges_vs_polars_bio.png +0 -0
  9. polars_bio-0.14.1/docs/blog/posts/figures/benchmark-sep-2025/benchmark_speedup_comparison_genomicranges_vs_polars_bio.png +0 -0
  10. polars_bio-0.14.1/docs/blog/posts/figures/benchmark-sep-2025/combined_benchmark_visualization.png +0 -0
  11. polars_bio-0.14.1/docs/blog/posts/figures/benchmark-sep-2025/combined_multi_testcase.png +0 -0
  12. polars_bio-0.14.1/docs/blog/posts/figures/benchmark-sep-2025/star-history-202595.png +0 -0
  13. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/faq.md +4 -0
  14. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/supplement.md +1 -1
  15. {polars_bio-0.13.1 → polars_bio-0.14.1}/mkdocs.yml +15 -2
  16. {polars_bio-0.13.1 → polars_bio-0.14.1}/poetry.lock +1264 -909
  17. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/__init__.py +1 -1
  18. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/io.py +342 -34
  19. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/range_op.py +36 -3
  20. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/range_op_helpers.py +10 -1
  21. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/range_op_io.py +43 -10
  22. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/sql.py +27 -12
  23. polars_bio-0.14.1/polars_bio/utils.py +124 -0
  24. {polars_bio-0.13.1 → polars_bio-0.14.1}/pyproject.toml +4 -2
  25. {polars_bio-0.13.1 → polars_bio-0.14.1}/src/lib.rs +6 -4
  26. {polars_bio-0.13.1 → polars_bio-0.14.1}/src/operation.rs +2 -0
  27. {polars_bio-0.13.1 → polars_bio-0.14.1}/src/option.rs +8 -3
  28. {polars_bio-0.13.1 → polars_bio-0.14.1}/src/query.rs +95 -43
  29. {polars_bio-0.13.1 → polars_bio-0.14.1}/src/scan.rs +72 -13
  30. {polars_bio-0.13.1 → polars_bio-0.14.1}/src/streaming.rs +3 -4
  31. polars_bio-0.14.1/tests/test_ensembl_vcf_parsing.py +201 -0
  32. polars_bio-0.14.1/tests/test_execution_plan_validation.py +145 -0
  33. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/test_io.py +140 -10
  34. polars_bio-0.14.1/tests/test_polars_bio_projection_validation.py +259 -0
  35. polars_bio-0.14.1/tests/test_projection_performance.py +259 -0
  36. polars_bio-0.14.1/tests/test_projection_pushdown.py +470 -0
  37. polars_bio-0.14.1/tests/test_vcf_projection_pushdown.py +158 -0
  38. polars_bio-0.13.1/polars_bio/utils.py +0 -46
  39. polars_bio-0.13.1/tests/test_ensembl_parsing.py +0 -193
  40. polars_bio-0.13.1/tests/test_ensembl_vcf_parsing.py +0 -193
  41. {polars_bio-0.13.1 → polars_bio-0.14.1}/.github/workflows/publish_documentation.yml +0 -0
  42. {polars_bio-0.13.1 → polars_bio-0.14.1}/.github/workflows/publish_to_pypi.yml +0 -0
  43. {polars_bio-0.13.1 → polars_bio-0.14.1}/.github/workflows/release.yml +0 -0
  44. {polars_bio-0.13.1 → polars_bio-0.14.1}/.gitignore +0 -0
  45. {polars_bio-0.13.1 → polars_bio-0.14.1}/.pre-commit-config.yaml +0 -0
  46. {polars_bio-0.13.1 → polars_bio-0.14.1}/.readthedocs.yaml +0 -0
  47. {polars_bio-0.13.1 → polars_bio-0.14.1}/LICENSE +0 -0
  48. {polars_bio-0.13.1 → polars_bio-0.14.1}/Makefile +0 -0
  49. {polars_bio-0.13.1 → polars_bio-0.14.1}/README.md +0 -0
  50. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/api.md +0 -0
  51. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/count-overlaps-parallel.png +0 -0
  52. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/count-overlaps-single.png +0 -0
  53. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/coverage-parallel.png +0 -0
  54. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/coverage-single.png +0 -0
  55. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/logo-large.png +0 -0
  56. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/logo.png +0 -0
  57. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/bioframe.png +0 -0
  58. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/bioframe_sink.png +0 -0
  59. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/dataframes/polars-bio-overlap-mem.png +0 -0
  60. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/dataframes/polars-bio-overlap-pd-mem.png +0 -0
  61. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/dataframes/polars-bio-overlap-pl-mem.png +0 -0
  62. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/polars-bio.png +0 -0
  63. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/polars-bio_sink.png +0 -0
  64. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/polars-bio_stream_sink.png +0 -0
  65. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/pyranges0.png +0 -0
  66. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/pyranges0_sink.png +0 -0
  67. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/pyranges1.png +0 -0
  68. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/memory/pyranges1_sink.png +0 -0
  69. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/nearest-parallel.png +0 -0
  70. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/nearest-single.png +0 -0
  71. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/overlap-parallel.png +0 -0
  72. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/overlap-single.png +0 -0
  73. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/results-nearest-0.1.1.png +0 -0
  74. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/assets/results-overlap-0.1.1.png +0 -0
  75. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/contact.md +0 -0
  76. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/features.md +0 -0
  77. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/index.md +0 -0
  78. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/example.bam +0 -0
  79. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/example.bed.bgz +0 -0
  80. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/example.fastq.gz +0 -0
  81. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/example.gff3.bgz +0 -0
  82. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/example.vcf +0 -0
  83. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/exons/.part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
  84. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/exons/.part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
  85. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/exons/_SUCCESS +0 -0
  86. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/exons/part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
  87. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/exons/part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
  88. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/fBrain-DS14718/.part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
  89. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/fBrain-DS14718/.part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
  90. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/fBrain-DS14718/_SUCCESS +0 -0
  91. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/fBrain-DS14718/part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
  92. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/data/fBrain-DS14718/part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
  93. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/notebooks/tutorial.ipynb +0 -0
  94. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/performance.md +0 -0
  95. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/quickstart.md +0 -0
  96. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/requirements.txt +0 -0
  97. {polars_bio-0.13.1 → polars_bio-0.14.1}/docs/versions.json +0 -0
  98. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/README.md +0 -0
  99. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/bin/.env +0 -0
  100. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/bin/start.sh +0 -0
  101. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/bin/stop.sh +0 -0
  102. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/data/policy-anonymous.json +0 -0
  103. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/data/policy-priv.json +0 -0
  104. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/data/test.fasta +0 -0
  105. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/data/vep.vcf +0 -0
  106. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/data/vep.vcf.bgz +0 -0
  107. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/docker-compose.yml +0 -0
  108. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/it_ensembl_vcf_bgz.py +0 -0
  109. {polars_bio-0.13.1 → polars_bio-0.14.1}/it/it_object_storage_io.py +0 -0
  110. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars-bio.iml +0 -0
  111. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/constants.py +0 -0
  112. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/context.py +0 -0
  113. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/interval_op_helpers.py +0 -0
  114. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/logging.py +0 -0
  115. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/operations.py +0 -0
  116. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/polars_ext.py +0 -0
  117. {polars_bio-0.13.1 → polars_bio-0.14.1}/polars_bio/range_utils.py +0 -0
  118. {polars_bio-0.13.1 → polars_bio-0.14.1}/requirements.txt +0 -0
  119. {polars_bio-0.13.1 → polars_bio-0.14.1}/rust-toolchain.toml +0 -0
  120. {polars_bio-0.13.1 → polars_bio-0.14.1}/rustfmt.toml +0 -0
  121. {polars_bio-0.13.1 → polars_bio-0.14.1}/src/context.rs +0 -0
  122. {polars_bio-0.13.1 → polars_bio-0.14.1}/src/udtf.rs +0 -0
  123. {polars_bio-0.13.1 → polars_bio-0.14.1}/src/utils.rs +0 -0
  124. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/_expected.py +0 -0
  125. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/count_overlaps/reads.csv +0 -0
  126. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/count_overlaps/targets.csv +0 -0
  127. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/coverage/reads.csv +0 -0
  128. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/coverage/targets.csv +0 -0
  129. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/exons/.part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
  130. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/exons/.part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
  131. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/exons/_SUCCESS +0 -0
  132. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/exons/part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
  133. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/exons/part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
  134. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/fBrain-DS14718/.part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
  135. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/fBrain-DS14718/.part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
  136. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/fBrain-DS14718/_SUCCESS +0 -0
  137. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/fBrain-DS14718/part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
  138. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/fBrain-DS14718/part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
  139. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/bam/test.bam +0 -0
  140. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/bam/test.bam.bai +0 -0
  141. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/bed/ENCFF001XKR.bed.gz +0 -0
  142. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/bed/chr16_fragile_site.bed +0 -0
  143. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/bed/chr16_fragile_site.bed.bgz +0 -0
  144. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/bed/test.bed +0 -0
  145. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/fasta/test.fasta +0 -0
  146. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/fastq/example.fastq +0 -0
  147. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/fastq/example.fastq.bgz +0 -0
  148. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/fastq/example.fastq.bgz.gzi +0 -0
  149. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/fastq/example.fastq.gz +0 -0
  150. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/fastq/sample_parallel.fastq.bgz +0 -0
  151. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/fastq/sample_parallel.fastq.bgz.gzi +0 -0
  152. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/fastq/test.fastq +0 -0
  153. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/fastq/wrong_extension.fastq.gz +0 -0
  154. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/gff/gencode.v38.annotation.gff3 +0 -0
  155. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/gff/gencode.v38.annotation.gff3.bgz +0 -0
  156. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/gff/gencode.v38.annotation.gff3.gz +0 -0
  157. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/gff/wrong_extension.gff3.gz +0 -0
  158. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/vcf/ensembl-2.vcf +0 -0
  159. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/vcf/ensembl.vcf +0 -0
  160. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/vcf/vep.vcf +0 -0
  161. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/vcf/vep.vcf.bgz +0 -0
  162. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/vcf/vep.vcf.gz +0 -0
  163. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/vcf/wrong_extension.vcf.bgz +0 -0
  164. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/io/vcf/wrong_extension.vcf.gz +0 -0
  165. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/merge/input.csv +0 -0
  166. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/nearest/reads.csv +0 -0
  167. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/nearest/targets.csv +0 -0
  168. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/overlap/reads.csv +0 -0
  169. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/data/overlap/targets.csv +0 -0
  170. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/test_bioframe.py +0 -0
  171. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/test_native.py +0 -0
  172. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/test_overlap_algorithms.py +0 -0
  173. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/test_pandas.py +0 -0
  174. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/test_parallel_io.py +0 -0
  175. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/test_polars.py +0 -0
  176. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/test_polars_ext.py +0 -0
  177. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/test_streaming.py +0 -0
  178. {polars_bio-0.13.1 → polars_bio-0.14.1}/tests/test_warnings.py +0 -0
@@ -1404,7 +1404,7 @@ dependencies = [
1404
1404
  [[package]]
1405
1405
  name = "datafusion-bio-format-bam"
1406
1406
  version = "0.1.0"
1407
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1407
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
1408
1408
  dependencies = [
1409
1409
  "async-stream",
1410
1410
  "async-trait",
@@ -1425,7 +1425,7 @@ dependencies = [
1425
1425
  [[package]]
1426
1426
  name = "datafusion-bio-format-bed"
1427
1427
  version = "0.1.0"
1428
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1428
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
1429
1429
  dependencies = [
1430
1430
  "async-compression",
1431
1431
  "async-stream",
@@ -1449,7 +1449,7 @@ dependencies = [
1449
1449
  [[package]]
1450
1450
  name = "datafusion-bio-format-core"
1451
1451
  version = "0.1.0"
1452
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1452
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
1453
1453
  dependencies = [
1454
1454
  "async-compression",
1455
1455
  "bytes",
@@ -1468,7 +1468,7 @@ dependencies = [
1468
1468
  [[package]]
1469
1469
  name = "datafusion-bio-format-fasta"
1470
1470
  version = "0.1.0"
1471
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1471
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
1472
1472
  dependencies = [
1473
1473
  "async-compression",
1474
1474
  "async-stream",
@@ -1490,7 +1490,7 @@ dependencies = [
1490
1490
  [[package]]
1491
1491
  name = "datafusion-bio-format-fastq"
1492
1492
  version = "0.1.0"
1493
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1493
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
1494
1494
  dependencies = [
1495
1495
  "async-compression",
1496
1496
  "async-stream",
@@ -1514,7 +1514,7 @@ dependencies = [
1514
1514
  [[package]]
1515
1515
  name = "datafusion-bio-format-gff"
1516
1516
  version = "0.1.0"
1517
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1517
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
1518
1518
  dependencies = [
1519
1519
  "async-compression",
1520
1520
  "async-stream",
@@ -1523,20 +1523,23 @@ dependencies = [
1523
1523
  "datafusion",
1524
1524
  "datafusion-bio-format-core",
1525
1525
  "env_logger",
1526
+ "flate2",
1526
1527
  "futures",
1527
1528
  "futures-util",
1528
1529
  "log",
1529
1530
  "noodles 0.93.0",
1530
- "noodles-gff 0.50.0",
1531
+ "noodles-bgzf 0.36.0",
1532
+ "noodles-gff 0.51.0",
1531
1533
  "opendal",
1532
1534
  "tokio",
1533
1535
  "tokio-util",
1536
+ "tracing",
1534
1537
  ]
1535
1538
 
1536
1539
  [[package]]
1537
1540
  name = "datafusion-bio-format-vcf"
1538
1541
  version = "0.1.0"
1539
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1542
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=aecd7b316c1c498bff14c2cbb4ac0bd04d21612f#aecd7b316c1c498bff14c2cbb4ac0bd04d21612f"
1540
1543
  dependencies = [
1541
1544
  "async-compression",
1542
1545
  "async-stream",
@@ -3627,9 +3630,9 @@ dependencies = [
3627
3630
  [[package]]
3628
3631
  name = "noodles"
3629
3632
  version = "0.100.0"
3630
- source = "git+https://github.com/biodatageeks/noodles.git?rev=289ef32e7d43d142914fb3f02335044ae293871c#289ef32e7d43d142914fb3f02335044ae293871c"
3633
+ source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
3631
3634
  dependencies = [
3632
- "noodles-bgzf 0.42.0",
3635
+ "noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
3633
3636
  "noodles-vcf 0.80.0",
3634
3637
  ]
3635
3638
 
@@ -3741,6 +3744,16 @@ dependencies = [
3741
3744
  name = "noodles-bgzf"
3742
3745
  version = "0.42.0"
3743
3746
  source = "git+https://github.com/biodatageeks/noodles.git?rev=289ef32e7d43d142914fb3f02335044ae293871c#289ef32e7d43d142914fb3f02335044ae293871c"
3747
+ dependencies = [
3748
+ "bytes",
3749
+ "crossbeam-channel",
3750
+ "flate2",
3751
+ ]
3752
+
3753
+ [[package]]
3754
+ name = "noodles-bgzf"
3755
+ version = "0.42.0"
3756
+ source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
3744
3757
  dependencies = [
3745
3758
  "bytes",
3746
3759
  "crossbeam-channel",
@@ -3777,6 +3790,14 @@ dependencies = [
3777
3790
  "bstr",
3778
3791
  ]
3779
3792
 
3793
+ [[package]]
3794
+ name = "noodles-core"
3795
+ version = "0.18.0"
3796
+ source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
3797
+ dependencies = [
3798
+ "bstr",
3799
+ ]
3800
+
3780
3801
  [[package]]
3781
3802
  name = "noodles-cram"
3782
3803
  version = "0.79.0"
@@ -3833,13 +3854,13 @@ dependencies = [
3833
3854
  [[package]]
3834
3855
  name = "noodles-csi"
3835
3856
  version = "0.50.0"
3836
- source = "git+https://github.com/biodatageeks/noodles.git?rev=289ef32e7d43d142914fb3f02335044ae293871c#289ef32e7d43d142914fb3f02335044ae293871c"
3857
+ source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
3837
3858
  dependencies = [
3838
3859
  "bit-vec",
3839
3860
  "bstr",
3840
3861
  "indexmap",
3841
- "noodles-bgzf 0.42.0",
3842
- "noodles-core 0.18.0",
3862
+ "noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
3863
+ "noodles-core 0.18.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
3843
3864
  ]
3844
3865
 
3845
3866
  [[package]]
@@ -3865,8 +3886,8 @@ dependencies = [
3865
3886
  "bytes",
3866
3887
  "futures",
3867
3888
  "memchr",
3868
- "noodles-bgzf 0.42.0",
3869
- "noodles-core 0.18.0",
3889
+ "noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=289ef32e7d43d142914fb3f02335044ae293871c)",
3890
+ "noodles-core 0.18.0 (git+https://github.com/biodatageeks/noodles.git?rev=289ef32e7d43d142914fb3f02335044ae293871c)",
3870
3891
  "tokio",
3871
3892
  ]
3872
3893
 
@@ -3911,17 +3932,18 @@ dependencies = [
3911
3932
 
3912
3933
  [[package]]
3913
3934
  name = "noodles-gff"
3914
- version = "0.50.0"
3915
- source = "registry+https://github.com/rust-lang/crates.io-index"
3916
- checksum = "c689769bddd0464a1db695aef1824754d910b81f3f1c4c74ff418c89426052a8"
3935
+ version = "0.51.0"
3936
+ source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
3917
3937
  dependencies = [
3918
3938
  "bstr",
3919
3939
  "futures",
3920
3940
  "indexmap",
3921
3941
  "lexical-core",
3922
- "noodles-bgzf 0.41.0",
3923
- "noodles-core 0.17.0",
3924
- "noodles-csi 0.49.0",
3942
+ "log",
3943
+ "memchr",
3944
+ "noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
3945
+ "noodles-core 0.18.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
3946
+ "noodles-csi 0.50.0",
3925
3947
  "percent-encoding",
3926
3948
  "tokio",
3927
3949
  ]
@@ -3991,12 +4013,12 @@ dependencies = [
3991
4013
  [[package]]
3992
4014
  name = "noodles-tabix"
3993
4015
  version = "0.56.0"
3994
- source = "git+https://github.com/biodatageeks/noodles.git?rev=289ef32e7d43d142914fb3f02335044ae293871c#289ef32e7d43d142914fb3f02335044ae293871c"
4016
+ source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
3995
4017
  dependencies = [
3996
4018
  "bstr",
3997
4019
  "indexmap",
3998
- "noodles-bgzf 0.42.0",
3999
- "noodles-core 0.18.0",
4020
+ "noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
4021
+ "noodles-core 0.18.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
4000
4022
  "noodles-csi 0.50.0",
4001
4023
  "tokio",
4002
4024
  ]
@@ -4022,13 +4044,13 @@ dependencies = [
4022
4044
  [[package]]
4023
4045
  name = "noodles-vcf"
4024
4046
  version = "0.80.0"
4025
- source = "git+https://github.com/biodatageeks/noodles.git?rev=289ef32e7d43d142914fb3f02335044ae293871c#289ef32e7d43d142914fb3f02335044ae293871c"
4047
+ source = "git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c#9b7b2c5b6531373918302d4c07410e583f1b5b5c"
4026
4048
  dependencies = [
4027
4049
  "futures",
4028
4050
  "indexmap",
4029
4051
  "memchr",
4030
- "noodles-bgzf 0.42.0",
4031
- "noodles-core 0.18.0",
4052
+ "noodles-bgzf 0.42.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
4053
+ "noodles-core 0.18.0 (git+https://github.com/biodatageeks/noodles.git?rev=9b7b2c5b6531373918302d4c07410e583f1b5b5c)",
4032
4054
  "noodles-csi 0.50.0",
4033
4055
  "noodles-tabix 0.56.0",
4034
4056
  "percent-encoding",
@@ -5203,7 +5225,7 @@ dependencies = [
5203
5225
 
5204
5226
  [[package]]
5205
5227
  name = "polars_bio"
5206
- version = "0.13.1"
5228
+ version = "0.14.1"
5207
5229
  dependencies = [
5208
5230
  "arrow",
5209
5231
  "arrow-array",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars_bio"
3
- version = "0.13.1"
3
+ version = "0.14.1"
4
4
  edition = "2021"
5
5
  readme = "README.md"
6
6
 
@@ -36,13 +36,13 @@ polars-arrow = { git = "https://github.com/mwiewior/polars.git" , rev = "da42ae
36
36
  polars-python = { git = "https://github.com/mwiewior/polars.git" , rev = "da42ae21ca9c25bc14562e36e07cf02eafd620ee"}
37
37
 
38
38
 
39
- datafusion-bio-format-vcf = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
40
- datafusion-bio-format-core = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
41
- datafusion-bio-format-gff = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
42
- datafusion-bio-format-fastq = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
43
- datafusion-bio-format-bam = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
44
- datafusion-bio-format-bed = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
45
- datafusion-bio-format-fasta = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
39
+ datafusion-bio-format-vcf = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
40
+ datafusion-bio-format-core = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
41
+ datafusion-bio-format-gff = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
42
+ datafusion-bio-format-fastq = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
43
+ datafusion-bio-format-bam = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
44
+ datafusion-bio-format-bed = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
45
+ datafusion-bio-format-fasta = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "aecd7b316c1c498bff14c2cbb4ac0bd04d21612f" }
46
46
 
47
47
 
48
48
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: polars-bio
3
- Version: 0.13.1
3
+ Version: 0.14.1
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
@@ -9,6 +9,7 @@ Requires-Dist: pyarrow~=21.0.0
9
9
  Requires-Dist: datafusion~=48.0.0
10
10
  Requires-Dist: tqdm~=4.67.1
11
11
  Requires-Dist: typing-extensions~=4.14.0
12
+ Requires-Dist: mkdocs-glightbox>=0.5.1,<0.6.0
12
13
  Requires-Dist: pandas ; extra == 'pandas'
13
14
  Requires-Dist: bioframe ; extra == 'viz'
14
15
  Requires-Dist: matplotlib ; extra == 'viz'
@@ -0,0 +1,2 @@
1
+ # Blog
2
+
@@ -0,0 +1,114 @@
1
+ ---
2
+ draft: false
3
+ date:
4
+ created: 2025-09-05
5
+ updated: 2025-09-05
6
+ categories:
7
+ - performance
8
+ - benchmarks
9
+
10
+ ---
11
+
12
+ # Interval operations benchmark — update September 2025
13
+
14
+ ## Introduction
15
+ Benchmarking isn’t a one-and-done exercise—it’s a moving target. As tools evolve, new versions can shift performance profiles in meaningful ways, so keeping results current is just as important as the first round of measurements.
16
+
17
+ Recently, three novel libraries that have started to gain traction: [pyranges1](https://github.com/pyranges/pyranges_1.x), [GenomicRanges](https://github.com/BiocPy/GenomicRanges) and [polars-bio](https://github.com/biodatageeks/polars-bio)
18
+ ![star-history-202595.png](figures/benchmark-sep-2025/star-history-202595.png)
19
+
20
+ shipped major updates:
21
+
22
+ * [pyranges1](https://github.com/pyranges/pyranges_1.x) adopted a new Rust backend ([ruranges](https://github.com/pyranges/ruranges)),
23
+ * [GenomicRanges](https://github.com/BiocPy/GenomicRanges) switched its interval core to a Nested Containment List ([NCLS](https://github.com/pyranges/ncls)) and added multithreaded execution,
24
+ * polars-bio migrated to the new Polars streaming engine and added support for new interval data structures. As of version `0.12.0` it supports:
25
+ * [COITrees](https://github.com/dcjones/coitrees)
26
+ * [IITree](https://github.com/rust-bio/rust-bio/blob/master/src/data_structures/interval_tree/array_backed_interval_tree.rs)
27
+ * [AVL-tree](https://github.com/rust-bio/rust-bio/blob/master/src/data_structures/interval_tree/avl_interval_tree.rs)
28
+ * [rust-lapper](https://github.com/sstadick/rust-lapper)
29
+ * [superintervals](https://github.com/kcleal/superintervals/)
30
+
31
+ Each of these changes has the potential to meaningfully alter performance and memory characteristics for common genomic interval tasks.
32
+
33
+ In this post, we revisit our benchmarks with those releases in mind. We focus on three everyday operations:
34
+
35
+ * overlap detection,
36
+ * nearest feature queries
37
+ * overlap counting.
38
+
39
+ For comparability, we use the same [AIList](/polars-bio/supplement/#real-dataset) dataset from our previous write-up, so you can see exactly how the new backends and data structures change the picture. Let’s dive in and see what’s faster, what’s leaner, and where the trade-offs now live.
40
+
41
+ ## Setup
42
+
43
+ ### Benchmark test cases
44
+
45
+ | Dataset pairs | Size | # of overlaps (1-based) |
46
+ |---------------|--------|-------------------------|
47
+ | 1-2 & 2-1 | Small | 54,246 |
48
+ | 7-3 & 3-7 | Medium | 4,408,383 |
49
+ | 8-7 & 7-8 | Large | 307,184,634 |
50
+
51
+
52
+
53
+ ### Software versions
54
+
55
+ | Library | Version |
56
+ |--------------------|------------|
57
+ | polars_bio | 0.13.1 |
58
+ | pyranges | 0.1.14 |
59
+ | genomicranges | 0.7.2 |
60
+
61
+ ## Results
62
+
63
+ ### polars-bio interval data structures performance comparison
64
+ ![combined_multi_testcase.png](figures/benchmark-sep-2025/combined_multi_testcase.png){.glightbox}
65
+
66
+ Key takeaways:
67
+
68
+ - **Superintervals** seems to be the best default. Across all three test cases, it is consistently the fastest or tied for fastest, delivering 1.25–1.44x speedups over the **polars-bio default (COITrees)** and avoiding worst‑case behavior.
69
+ - Lapper caveat: performs well on 1‑2 and 8‑7, but collapses on 7‑3 (≈25x slower than default), so it’s risky as a general‑purpose algorithm.
70
+ - Intervaltree/Arrayintervaltree: reliable but slower. They trail superintervals by 20–70% depending on the case.
71
+
72
+
73
+ ### All operations comparison
74
+ ![all_operations_walltime_comparison.png](figures/benchmark-sep-2025/all_operations_walltime_comparison.png){.glightbox}
75
+
76
+ ![bench-20250-all_operations_speedup_comparison.png](figures/benchmark-sep-2025/bench-20250-all_operations_speedup_comparison.png){.glightbox}
77
+
78
+ Key takeaways:
79
+
80
+ - *Overlap*: **GenomicRanges** wins on small inputs (1‑2, 2‑1) by ~2.1–2.3x, but polars‑bio takes over from medium size onward and dominates on large (7‑8, 8‑7), where PyRanges falls far behind. Interesting case of *7-8* vs *8-7* when swapping inputs can significantly affect performance of GenomicRanges.
81
+ - *Nearest*: **polars‑bio** leads decisively at every size; speedups over the others grow with input size (orders of magnitude on large datasets).
82
+ - *Count overlaps*: **GenomicRanges** edges out polars‑bio on the smallest inputs, while **polars‑bio** is faster on medium and substantially faster on large inputs.
83
+
84
+ ### All operations parallel execution
85
+ ![benchmark_comparison_genomicranges_vs_polars_bio.png](figures/benchmark-sep-2025/benchmark_comparison_genomicranges_vs_polars_bio.png){.glightbox}
86
+
87
+ ![benchmark_speedup_comparison_genomicranges_vs_polars_bio.png](figures/benchmark-sep-2025/benchmark_speedup_comparison_genomicranges_vs_polars_bio.png){.glightbox}
88
+
89
+ Key takeaways:
90
+
91
+ - Thread scaling: **both** libraries (GenomicRanges and polars-bio) benefit from additional threads, but the absolute gap favors **polars‑bio** for medium/large datasets across overlap, nearest, and count overlaps.
92
+ - Small overlaps: **GenomicRanges** remains >2x faster at 1–8 threads; on medium/large pairs its relative speed drops below 1x.
93
+ - Nearest: **polars‑bio** stays on the 1x reference line; **GenomicRanges** is typically 10–100x slower (log scale) even with more threads.
94
+ - Count overlaps: small inputs slightly favor **GenomicRanges**; for larger inputs **polars‑bio** maintains 2–10x advantage with stable scaling.
95
+
96
+ ### End to-end data proecesing
97
+
98
+ Here we compare end-to-end performance including data loading, overlap operation, and saving results to CSV.
99
+
100
+ !!! info
101
+ 1. `POLARS_MAX_THREADS=1` was set to ensure fair comparison with single-threaded PyRanges.
102
+ 2. Since GenomicRanges supports Polars DataFrames as input and output, we used them instead of Pandas to again ensure fair comparison with polars-bio.
103
+ 3. GenomicRanges [find_overlaps](https://biocpy.github.io/GenomicRanges/api/genomicranges.html#genomicranges.GenomicRanges.GenomicRanges.find_overlaps) method returns hits-only table (indices of genomic intervals instead of genomic coordinates), we also benchmarked an extended version with additional lookup of intervals (`full rows`, [code](https://github.com/biodatageeks/polars-bio-bench/blob/master/src/utils.py#L99)) for fair comparison.
104
+
105
+ ![combined_benchmark_visualization.png](figures/benchmark-sep-2025/combined_benchmark_visualization.png){.glightbox}
106
+
107
+ Key takeaways:
108
+
109
+ - Wall time: **GenomicRanges (hits‑only)** is the fastest end‑to‑end here (~1.16x vs polars_bio) by avoiding full materialization of genomic intervals (unlike PyRanges and polars-bio that return pairs of genomic interval coordinates for each overlap); **PyRanges** is far slower; **GenomicRanges** (full rows, so with the output comparable with PyRanges and polars-bio) is much slower.
110
+ - Memory: **polars-bio (streaming)** minimizes peak RAM (~0.7 GB) while keeping speed comparable to **polars-bio**. **GenomicRanges** (full rows) peaks at ~40 GB; hits‑only sits in the middle (~8.2 GB) as it only returns DataFrame with pairs of indices not full genomic coordinates.
111
+
112
+ ## Summary
113
+
114
+ For small and medium datasets, all tools perform well; at large scale, **polars-bio** excels with better scalability and memory efficiency, achieving an ultra‑low footprint in streaming mode.
@@ -18,6 +18,10 @@ polars-bio-intel kernel: [ 1611.175045] traps: python[8844] trap invalid opcode
18
18
  ```bash
19
19
  MKDOCS_EXPORTER_PDF=false JUPYTER_PLATFORM_DIRS=1 mkdocs serve -w polars_bio
20
20
  ```
21
+ Some pages of the documentation take a while to build—to speed up the process, you can disable dynamic content rendering:
22
+ ```bash
23
+ MKDOCS_EXPORTER_PDF=false ENABLE_MD_EXEC=false ENABLE_MKDOCSTRINGS=false ENABLE_JUPYTER=false JUPYTER_PLATFORM_DIRS=1 mkdocs serve
24
+ ```
21
25
 
22
26
  4. How to build the source code and install in the current virtual environment?
23
27
  ```bash
@@ -51,7 +51,7 @@ The basic concept is that each operation consists of two sides: the **probe** si
51
51
  * [IITree](https://github.com/rust-bio/rust-bio/blob/master/src/data_structures/interval_tree/array_backed_interval_tree.rs)
52
52
  * [AVL-tree](https://github.com/rust-bio/rust-bio/blob/master/src/data_structures/interval_tree/avl_interval_tree.rs)
53
53
  * [rust-lapper](https://github.com/sstadick/rust-lapper)
54
- * [superintervals](https://github.com/kcleal/superintervals/) - on the roadmap, see [issue](https://github.com/biodatageeks/polars-bio/issues/126)
54
+ * [superintervals](https://github.com/kcleal/superintervals/) - available since `polars-bio` version `0.12.0`
55
55
  Once the **build** side data structure is ready, then records from the **probe** side are processed against the search structure organized as record batches. Each record batch can be processed independently. Search structure nodes contains identifiers of the rows from the **build** side that are then used to construct a new record that is returned as a result of the operation.
56
56
 
57
57
  ### Out-of-core (streaming) processing
@@ -14,6 +14,7 @@ nav:
14
14
  - 🔬 Supplementary material: supplement.md
15
15
  - 📡 Contact: contact.md
16
16
 
17
+
17
18
  - Quick start: quickstart.md
18
19
  - Features: features.md
19
20
  - Tutorial: notebooks/tutorial.ipynb
@@ -22,20 +23,29 @@ nav:
22
23
  - FAQ: faq.md
23
24
  - 🔬 Supplementary material: supplement.md
24
25
  - Contact: contact.md
26
+ - 📖 Blog:
27
+ - blog/index.md
25
28
 
26
29
  plugins:
30
+ - blog:
31
+ post_date_format: full
32
+ archive: false
33
+ post_readtime: true
27
34
  - social
28
35
  - search
29
36
  - autorefs
30
37
  - mkdocs-jupyter:
38
+ enabled: !ENV [ENABLE_JUPYTER, true]
31
39
  execute: false
32
40
  allow_errors: false
33
41
  - mkdocstrings:
42
+ enabled: !ENV [ENABLE_MKDOCSTRINGS, true]
34
43
  default_handler: python
35
44
  handlers:
36
45
  options:
37
46
  docstring_style: google
38
- - markdown-exec
47
+ - markdown-exec:
48
+ enabled: !ENV [ENABLE_MD_EXEC, true]
39
49
  - table-reader
40
50
  - mkdocs_matplotlib
41
51
  - exporter:
@@ -52,6 +62,8 @@ plugins:
52
62
  enabled: true
53
63
  output: .well-known/site.pdf
54
64
  covers: all
65
+ # Place glightbox last so it runs after other plugins
66
+ - glightbox
55
67
  theme:
56
68
  name: material
57
69
  logo: assets/logo.png
@@ -91,6 +103,7 @@ extra:
91
103
 
92
104
  markdown_extensions:
93
105
  - admonition
106
+ - codehilite
94
107
  - footnotes
95
108
  - pymdownx.critic
96
109
  - pymdownx.caret
@@ -123,4 +136,4 @@ markdown_extensions:
123
136
  format: !!python/name:pymdownx.superfences.fence_code_format
124
137
  - markdown.extensions.toc:
125
138
  baselevel: 1
126
- permalink: ""
139
+ permalink: ""