polars-bio 0.11.0__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. {polars_bio-0.11.0 → polars_bio-0.12.0}/Cargo.lock +70 -17
  2. {polars_bio-0.11.0 → polars_bio-0.12.0}/Cargo.toml +9 -9
  3. {polars_bio-0.11.0 → polars_bio-0.12.0}/PKG-INFO +1 -1
  4. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/faq.md +2 -2
  5. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/quickstart.md +1 -1
  6. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/__init__.py +1 -1
  7. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/range_op.py +1 -1
  8. {polars_bio-0.11.0 → polars_bio-0.12.0}/pyproject.toml +1 -1
  9. polars_bio-0.12.0/tests/data/io/fasta/test.fasta +4 -0
  10. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_overlap_algorithms.py +38 -0
  11. {polars_bio-0.11.0 → polars_bio-0.12.0}/.github/workflows/publish_documentation.yml +0 -0
  12. {polars_bio-0.11.0 → polars_bio-0.12.0}/.github/workflows/publish_to_pypi.yml +0 -0
  13. {polars_bio-0.11.0 → polars_bio-0.12.0}/.github/workflows/release.yml +0 -0
  14. {polars_bio-0.11.0 → polars_bio-0.12.0}/.gitignore +0 -0
  15. {polars_bio-0.11.0 → polars_bio-0.12.0}/.pre-commit-config.yaml +0 -0
  16. {polars_bio-0.11.0 → polars_bio-0.12.0}/.readthedocs.yaml +0 -0
  17. {polars_bio-0.11.0 → polars_bio-0.12.0}/LICENSE +0 -0
  18. {polars_bio-0.11.0 → polars_bio-0.12.0}/Makefile +0 -0
  19. {polars_bio-0.11.0 → polars_bio-0.12.0}/README.md +0 -0
  20. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/api.md +0 -0
  21. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/count-overlaps-parallel.png +0 -0
  22. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/count-overlaps-single.png +0 -0
  23. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/coverage-parallel.png +0 -0
  24. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/coverage-single.png +0 -0
  25. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/logo-large.png +0 -0
  26. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/logo.png +0 -0
  27. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/bioframe.png +0 -0
  28. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/bioframe_sink.png +0 -0
  29. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/dataframes/polars-bio-overlap-mem.png +0 -0
  30. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/dataframes/polars-bio-overlap-pd-mem.png +0 -0
  31. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/dataframes/polars-bio-overlap-pl-mem.png +0 -0
  32. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/polars-bio.png +0 -0
  33. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/polars-bio_sink.png +0 -0
  34. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/polars-bio_stream_sink.png +0 -0
  35. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/pyranges0.png +0 -0
  36. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/pyranges0_sink.png +0 -0
  37. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/pyranges1.png +0 -0
  38. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/memory/pyranges1_sink.png +0 -0
  39. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/nearest-parallel.png +0 -0
  40. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/nearest-single.png +0 -0
  41. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/overlap-parallel.png +0 -0
  42. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/overlap-single.png +0 -0
  43. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/results-nearest-0.1.1.png +0 -0
  44. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/assets/results-overlap-0.1.1.png +0 -0
  45. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/contact.md +0 -0
  46. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/features.md +0 -0
  47. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/index.md +0 -0
  48. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/example.bam +0 -0
  49. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/example.bed.bgz +0 -0
  50. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/example.fastq.gz +0 -0
  51. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/example.gff3.bgz +0 -0
  52. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/example.vcf +0 -0
  53. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/exons/.part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
  54. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/exons/.part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
  55. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/exons/_SUCCESS +0 -0
  56. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/exons/part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
  57. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/exons/part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
  58. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/fBrain-DS14718/.part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
  59. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/fBrain-DS14718/.part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
  60. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/fBrain-DS14718/_SUCCESS +0 -0
  61. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/fBrain-DS14718/part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
  62. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/data/fBrain-DS14718/part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
  63. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/notebooks/tutorial.ipynb +0 -0
  64. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/performance.md +0 -0
  65. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/requirements.txt +0 -0
  66. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/supplement.md +0 -0
  67. {polars_bio-0.11.0 → polars_bio-0.12.0}/docs/versions.json +0 -0
  68. {polars_bio-0.11.0 → polars_bio-0.12.0}/it/README.md +0 -0
  69. {polars_bio-0.11.0 → polars_bio-0.12.0}/it/bin/.env +0 -0
  70. {polars_bio-0.11.0 → polars_bio-0.12.0}/it/bin/start.sh +0 -0
  71. {polars_bio-0.11.0 → polars_bio-0.12.0}/it/bin/stop.sh +0 -0
  72. {polars_bio-0.11.0 → polars_bio-0.12.0}/it/data/policy-anonymous.json +0 -0
  73. {polars_bio-0.11.0 → polars_bio-0.12.0}/it/data/policy-priv.json +0 -0
  74. {polars_bio-0.11.0/tests/data/io/fasta → polars_bio-0.12.0/it/data}/test.fasta +0 -0
  75. {polars_bio-0.11.0 → polars_bio-0.12.0}/it/data/vep.vcf +0 -0
  76. {polars_bio-0.11.0 → polars_bio-0.12.0}/it/data/vep.vcf.bgz +0 -0
  77. {polars_bio-0.11.0 → polars_bio-0.12.0}/it/docker-compose.yml +0 -0
  78. {polars_bio-0.11.0 → polars_bio-0.12.0}/it/it_ensembl_vcf_bgz.py +0 -0
  79. {polars_bio-0.11.0 → polars_bio-0.12.0}/it/it_object_storage_io.py +0 -0
  80. {polars_bio-0.11.0 → polars_bio-0.12.0}/mkdocs.yml +0 -0
  81. {polars_bio-0.11.0 → polars_bio-0.12.0}/poetry.lock +0 -0
  82. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars-bio.iml +0 -0
  83. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/constants.py +0 -0
  84. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/context.py +0 -0
  85. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/interval_op_helpers.py +0 -0
  86. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/io.py +0 -0
  87. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/logging.py +0 -0
  88. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/operations.py +0 -0
  89. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/polars_ext.py +0 -0
  90. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/range_op_helpers.py +0 -0
  91. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/range_op_io.py +0 -0
  92. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/range_utils.py +0 -0
  93. {polars_bio-0.11.0 → polars_bio-0.12.0}/polars_bio/sql.py +0 -0
  94. {polars_bio-0.11.0 → polars_bio-0.12.0}/requirements.txt +0 -0
  95. {polars_bio-0.11.0 → polars_bio-0.12.0}/rust-toolchain.toml +0 -0
  96. {polars_bio-0.11.0 → polars_bio-0.12.0}/rustfmt.toml +0 -0
  97. {polars_bio-0.11.0 → polars_bio-0.12.0}/src/context.rs +0 -0
  98. {polars_bio-0.11.0 → polars_bio-0.12.0}/src/lib.rs +0 -0
  99. {polars_bio-0.11.0 → polars_bio-0.12.0}/src/operation.rs +0 -0
  100. {polars_bio-0.11.0 → polars_bio-0.12.0}/src/option.rs +0 -0
  101. {polars_bio-0.11.0 → polars_bio-0.12.0}/src/query.rs +0 -0
  102. {polars_bio-0.11.0 → polars_bio-0.12.0}/src/scan.rs +0 -0
  103. {polars_bio-0.11.0 → polars_bio-0.12.0}/src/streaming.rs +0 -0
  104. {polars_bio-0.11.0 → polars_bio-0.12.0}/src/udtf.rs +0 -0
  105. {polars_bio-0.11.0 → polars_bio-0.12.0}/src/utils.rs +0 -0
  106. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/_expected.py +0 -0
  107. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/count_overlaps/reads.csv +0 -0
  108. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/count_overlaps/targets.csv +0 -0
  109. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/coverage/reads.csv +0 -0
  110. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/coverage/targets.csv +0 -0
  111. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/exons/.part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
  112. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/exons/.part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet.crc +0 -0
  113. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/exons/_SUCCESS +0 -0
  114. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/exons/part-00000-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
  115. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/exons/part-00001-47fafbb5-1cab-410c-9461-d10effacf760-c000.snappy.parquet +0 -0
  116. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/fBrain-DS14718/.part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
  117. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/fBrain-DS14718/.part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet.crc +0 -0
  118. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/fBrain-DS14718/_SUCCESS +0 -0
  119. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/fBrain-DS14718/part-00000-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
  120. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/fBrain-DS14718/part-00001-a0d75244-2d87-41eb-a3eb-a18847c7cb87-c000.snappy.parquet +0 -0
  121. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/bam/test.bam +0 -0
  122. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/bam/test.bam.bai +0 -0
  123. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/bed/ENCFF001XKR.bed.gz +0 -0
  124. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/bed/chr16_fragile_site.bed +0 -0
  125. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/bed/chr16_fragile_site.bed.bgz +0 -0
  126. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/bed/test.bed +0 -0
  127. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/fastq/example.fastq +0 -0
  128. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/fastq/example.fastq.bgz +0 -0
  129. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/fastq/example.fastq.bgz.gzi +0 -0
  130. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/fastq/example.fastq.gz +0 -0
  131. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/fastq/sample_parallel.fastq.bgz +0 -0
  132. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/fastq/sample_parallel.fastq.bgz.gzi +0 -0
  133. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/fastq/test.fastq +0 -0
  134. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/fastq/wrong_extension.fastq.gz +0 -0
  135. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/gff/gencode.v38.annotation.gff3 +0 -0
  136. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/gff/gencode.v38.annotation.gff3.bgz +0 -0
  137. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/gff/gencode.v38.annotation.gff3.gz +0 -0
  138. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/gff/wrong_extension.gff3.gz +0 -0
  139. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/vcf/ensembl-2.vcf +0 -0
  140. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/vcf/ensembl.vcf +0 -0
  141. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/vcf/vep.vcf +0 -0
  142. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/vcf/vep.vcf.bgz +0 -0
  143. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/vcf/vep.vcf.gz +0 -0
  144. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/vcf/wrong_extension.vcf.bgz +0 -0
  145. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/io/vcf/wrong_extension.vcf.gz +0 -0
  146. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/merge/input.csv +0 -0
  147. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/nearest/reads.csv +0 -0
  148. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/nearest/targets.csv +0 -0
  149. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/overlap/reads.csv +0 -0
  150. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/data/overlap/targets.csv +0 -0
  151. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_bioframe.py +0 -0
  152. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_ensembl_parsing.py +0 -0
  153. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_ensembl_vcf_parsing.py +0 -0
  154. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_io.py +0 -0
  155. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_native.py +0 -0
  156. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_pandas.py +0 -0
  157. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_parallel_io.py +0 -0
  158. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_polars.py +0 -0
  159. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_polars_ext.py +0 -0
  160. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_streaming.py +0 -0
  161. {polars_bio-0.11.0 → polars_bio-0.12.0}/tests/test_warnings.py +0 -0
@@ -105,6 +105,16 @@ dependencies = [
105
105
  "memchr",
106
106
  ]
107
107
 
108
+ [[package]]
109
+ name = "aligned-vec"
110
+ version = "0.6.4"
111
+ source = "registry+https://github.com/rust-lang/crates.io-index"
112
+ checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b"
113
+ dependencies = [
114
+ "equator",
115
+ "serde",
116
+ ]
117
+
108
118
  [[package]]
109
119
  name = "alloc-no-stdlib"
110
120
  version = "2.0.4"
@@ -1373,7 +1383,7 @@ dependencies = [
1373
1383
  [[package]]
1374
1384
  name = "datafusion-bio-format-bam"
1375
1385
  version = "0.1.0"
1376
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=89648086098023a3544f2735fb3395c83118c7e3#89648086098023a3544f2735fb3395c83118c7e3"
1386
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1377
1387
  dependencies = [
1378
1388
  "async-stream",
1379
1389
  "async-trait",
@@ -1394,7 +1404,7 @@ dependencies = [
1394
1404
  [[package]]
1395
1405
  name = "datafusion-bio-format-bed"
1396
1406
  version = "0.1.0"
1397
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=89648086098023a3544f2735fb3395c83118c7e3#89648086098023a3544f2735fb3395c83118c7e3"
1407
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1398
1408
  dependencies = [
1399
1409
  "async-compression",
1400
1410
  "async-stream",
@@ -1407,7 +1417,7 @@ dependencies = [
1407
1417
  "futures",
1408
1418
  "futures-util",
1409
1419
  "log",
1410
- "noodles",
1420
+ "noodles 0.93.0",
1411
1421
  "noodles-bed",
1412
1422
  "noodles-bgzf 0.36.0",
1413
1423
  "opendal",
@@ -1418,14 +1428,15 @@ dependencies = [
1418
1428
  [[package]]
1419
1429
  name = "datafusion-bio-format-core"
1420
1430
  version = "0.1.0"
1421
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=89648086098023a3544f2735fb3395c83118c7e3#89648086098023a3544f2735fb3395c83118c7e3"
1431
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1422
1432
  dependencies = [
1423
1433
  "async-compression",
1424
1434
  "bytes",
1425
1435
  "datafusion",
1426
1436
  "datafusion-execution",
1437
+ "futures",
1427
1438
  "log",
1428
- "noodles",
1439
+ "noodles 0.93.0",
1429
1440
  "noodles-bgzf 0.36.0",
1430
1441
  "opendal",
1431
1442
  "tokio",
@@ -1436,7 +1447,7 @@ dependencies = [
1436
1447
  [[package]]
1437
1448
  name = "datafusion-bio-format-fasta"
1438
1449
  version = "0.1.0"
1439
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=89648086098023a3544f2735fb3395c83118c7e3#89648086098023a3544f2735fb3395c83118c7e3"
1450
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1440
1451
  dependencies = [
1441
1452
  "async-compression",
1442
1453
  "async-stream",
@@ -1447,7 +1458,7 @@ dependencies = [
1447
1458
  "futures",
1448
1459
  "futures-util",
1449
1460
  "log",
1450
- "noodles",
1461
+ "noodles 0.93.0",
1451
1462
  "noodles-bgzf 0.36.0",
1452
1463
  "noodles-fasta 0.55.0",
1453
1464
  "opendal",
@@ -1458,7 +1469,7 @@ dependencies = [
1458
1469
  [[package]]
1459
1470
  name = "datafusion-bio-format-fastq"
1460
1471
  version = "0.1.0"
1461
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=89648086098023a3544f2735fb3395c83118c7e3#89648086098023a3544f2735fb3395c83118c7e3"
1472
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1462
1473
  dependencies = [
1463
1474
  "async-compression",
1464
1475
  "async-stream",
@@ -1469,7 +1480,7 @@ dependencies = [
1469
1480
  "futures",
1470
1481
  "futures-util",
1471
1482
  "log",
1472
- "noodles",
1483
+ "noodles 0.93.0",
1473
1484
  "noodles-bgzf 0.36.0",
1474
1485
  "noodles-fastq 0.19.0",
1475
1486
  "opendal",
@@ -1482,7 +1493,7 @@ dependencies = [
1482
1493
  [[package]]
1483
1494
  name = "datafusion-bio-format-gff"
1484
1495
  version = "0.1.0"
1485
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=89648086098023a3544f2735fb3395c83118c7e3#89648086098023a3544f2735fb3395c83118c7e3"
1496
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1486
1497
  dependencies = [
1487
1498
  "async-compression",
1488
1499
  "async-stream",
@@ -1494,7 +1505,7 @@ dependencies = [
1494
1505
  "futures",
1495
1506
  "futures-util",
1496
1507
  "log",
1497
- "noodles",
1508
+ "noodles 0.93.0",
1498
1509
  "noodles-gff 0.50.0",
1499
1510
  "opendal",
1500
1511
  "tokio",
@@ -1504,7 +1515,7 @@ dependencies = [
1504
1515
  [[package]]
1505
1516
  name = "datafusion-bio-format-vcf"
1506
1517
  version = "0.1.0"
1507
- source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=89648086098023a3544f2735fb3395c83118c7e3#89648086098023a3544f2735fb3395c83118c7e3"
1518
+ source = "git+https://github.com/biodatageeks/datafusion-bio-formats.git?rev=0b9746c77aeb0e3a29f6460cb29f48aa17625c1d#0b9746c77aeb0e3a29f6460cb29f48aa17625c1d"
1508
1519
  dependencies = [
1509
1520
  "async-compression",
1510
1521
  "async-stream",
@@ -1516,6 +1527,7 @@ dependencies = [
1516
1527
  "env_logger",
1517
1528
  "futures",
1518
1529
  "log",
1530
+ "noodles 0.100.0",
1519
1531
  "noodles-bgzf 0.36.0",
1520
1532
  "noodles-vcf 0.80.0",
1521
1533
  "opendal",
@@ -2332,6 +2344,26 @@ dependencies = [
2332
2344
  "log",
2333
2345
  ]
2334
2346
 
2347
+ [[package]]
2348
+ name = "equator"
2349
+ version = "0.4.2"
2350
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2351
+ checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc"
2352
+ dependencies = [
2353
+ "equator-macro",
2354
+ ]
2355
+
2356
+ [[package]]
2357
+ name = "equator-macro"
2358
+ version = "0.4.2"
2359
+ source = "registry+https://github.com/rust-lang/crates.io-index"
2360
+ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
2361
+ dependencies = [
2362
+ "proc-macro2",
2363
+ "quote",
2364
+ "syn 2.0.106",
2365
+ ]
2366
+
2335
2367
  [[package]]
2336
2368
  name = "equivalent"
2337
2369
  version = "1.0.2"
@@ -2772,19 +2804,21 @@ checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
2772
2804
 
2773
2805
  [[package]]
2774
2806
  name = "hyper"
2775
- version = "1.6.0"
2807
+ version = "1.7.0"
2776
2808
  source = "registry+https://github.com/rust-lang/crates.io-index"
2777
- checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80"
2809
+ checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e"
2778
2810
  dependencies = [
2811
+ "atomic-waker",
2779
2812
  "bytes",
2780
2813
  "futures-channel",
2781
- "futures-util",
2814
+ "futures-core",
2782
2815
  "h2",
2783
2816
  "http",
2784
2817
  "http-body",
2785
2818
  "httparse",
2786
2819
  "itoa",
2787
2820
  "pin-project-lite",
2821
+ "pin-utils",
2788
2822
  "smallvec",
2789
2823
  "tokio",
2790
2824
  "want",
@@ -3557,6 +3591,15 @@ dependencies = [
3557
3591
  "noodles-vcf 0.74.0",
3558
3592
  ]
3559
3593
 
3594
+ [[package]]
3595
+ name = "noodles"
3596
+ version = "0.100.0"
3597
+ source = "git+https://github.com/biodatageeks/noodles.git?rev=289ef32e7d43d142914fb3f02335044ae293871c#289ef32e7d43d142914fb3f02335044ae293871c"
3598
+ dependencies = [
3599
+ "noodles-bgzf 0.42.0",
3600
+ "noodles-vcf 0.80.0",
3601
+ ]
3602
+
3560
3603
  [[package]]
3561
3604
  name = "noodles-bam"
3562
3605
  version = "0.76.0"
@@ -5150,7 +5193,7 @@ dependencies = [
5150
5193
 
5151
5194
  [[package]]
5152
5195
  name = "polars_bio"
5153
- version = "0.11.0"
5196
+ version = "0.12.0"
5154
5197
  dependencies = [
5155
5198
  "arrow",
5156
5199
  "arrow-array",
@@ -6027,7 +6070,7 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
6027
6070
  [[package]]
6028
6071
  name = "sequila-core"
6029
6072
  version = "0.1.0"
6030
- source = "git+https://github.com/biodatageeks/sequila-native.git?rev=21eb221d65fe2514b6c83d386adb33540828d22d#21eb221d65fe2514b6c83d386adb33540828d22d"
6073
+ source = "git+https://github.com/biodatageeks/sequila-native.git?rev=745d40f77da7ced5d540f9285eb5123ba12682ff#745d40f77da7ced5d540f9285eb5123ba12682ff"
6031
6074
  dependencies = [
6032
6075
  "ahash",
6033
6076
  "async-trait",
@@ -6042,6 +6085,7 @@ dependencies = [
6042
6085
  "parking_lot",
6043
6086
  "rand 0.8.5",
6044
6087
  "rust-lapper",
6088
+ "superintervals",
6045
6089
  "tokio",
6046
6090
  ]
6047
6091
 
@@ -6407,6 +6451,15 @@ version = "2.6.1"
6407
6451
  source = "registry+https://github.com/rust-lang/crates.io-index"
6408
6452
  checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
6409
6453
 
6454
+ [[package]]
6455
+ name = "superintervals"
6456
+ version = "0.3.6"
6457
+ source = "git+https://github.com/biodatageeks/sequila-native.git?rev=745d40f77da7ced5d540f9285eb5123ba12682ff#745d40f77da7ced5d540f9285eb5123ba12682ff"
6458
+ dependencies = [
6459
+ "aligned-vec",
6460
+ "serde",
6461
+ ]
6462
+
6410
6463
  [[package]]
6411
6464
  name = "syn"
6412
6465
  version = "1.0.109"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars_bio"
3
- version = "0.11.0"
3
+ version = "0.12.0"
4
4
  edition = "2021"
5
5
  readme = "README.md"
6
6
 
@@ -15,7 +15,7 @@ crate-type= ["cdylib"]
15
15
  datafusion-python = "48.0.0"
16
16
  pyo3 = { version = "0.24.2", features = ["extension-module", "abi3"] }
17
17
  pyo3-log = "0.12.4"
18
- sequila-core = { git = "https://github.com/biodatageeks/sequila-native.git", rev = "21eb221d65fe2514b6c83d386adb33540828d22d" }
18
+ sequila-core = { git = "https://github.com/biodatageeks/sequila-native.git", rev = "745d40f77da7ced5d540f9285eb5123ba12682ff" }
19
19
 
20
20
  datafusion = { version = "48.0.1"}
21
21
  arrow = "55.0.0"
@@ -36,13 +36,13 @@ polars-arrow = { git = "https://github.com/mwiewior/polars.git" , rev = "49142b
36
36
  polars-python = { git = "https://github.com/mwiewior/polars.git" , rev = "49142b2e89dbc93888154dccfb7c6a4e4233c7b0"}
37
37
 
38
38
 
39
- datafusion-bio-format-vcf = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "89648086098023a3544f2735fb3395c83118c7e3" }
40
- datafusion-bio-format-core = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "89648086098023a3544f2735fb3395c83118c7e3" }
41
- datafusion-bio-format-gff = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "89648086098023a3544f2735fb3395c83118c7e3" }
42
- datafusion-bio-format-fastq = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "89648086098023a3544f2735fb3395c83118c7e3" }
43
- datafusion-bio-format-bam = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "89648086098023a3544f2735fb3395c83118c7e3" }
44
- datafusion-bio-format-bed = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "89648086098023a3544f2735fb3395c83118c7e3" }
45
- datafusion-bio-format-fasta = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "89648086098023a3544f2735fb3395c83118c7e3" }
39
+ datafusion-bio-format-vcf = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
40
+ datafusion-bio-format-core = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
41
+ datafusion-bio-format-gff = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
42
+ datafusion-bio-format-fastq = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
43
+ datafusion-bio-format-bam = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
44
+ datafusion-bio-format-bed = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
45
+ datafusion-bio-format-fasta = { git = "https://github.com/biodatageeks/datafusion-bio-formats.git", rev = "0b9746c77aeb0e3a29f6460cb29f48aa17625c1d" }
46
46
 
47
47
 
48
48
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: polars-bio
3
- Version: 0.11.0
3
+ Version: 0.12.0
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
@@ -1,8 +1,8 @@
1
1
  1. What versions of Polars are supported?
2
2
 
3
- Short answer: Polars <= **1.17.1** is supported.
3
+ Short answer: Polars <= **1.21.0** is supported.
4
4
 
5
- Longer answer: Since Polars has recently [upgraded py03 to 0.23.x](https://github.com/pola-rs/polars/pull/20111) any many other dependencies still rely on 0.22.x, we are currently limited to Polars <= 1.17.1. We are working on upgrading to the latest version of Polars.
5
+ Long answer: We recommend handling most of the heavy lifting on the DataFusion side (e.g., using SQL and views) and relying on Polars’ streaming capabilities primarily for sinking results. This means we are not making use of the latest Polars features, and we are not in a hurry to upgrade to the newest version. However, we do plan to adopt the new Polars streaming engine in the near future. [Reference](https://github.com/pola-rs/polars/issues/20947).
6
6
 
7
7
  2. What to do if I get `Illegal instruction (core dumped)` when using polars-bio?
8
8
  This error is likely due to the fact that the ABI of the polars-bio wheel package does not match the ABI of the Python interpreter.
@@ -32,7 +32,7 @@ pip install /Users/mwiewior/research/git/polars-bio/target/wheels/polars_bio-0.1
32
32
  !!! tip
33
33
  Required dependencies:
34
34
 
35
- * Python>=3.9<3.13 (3.12 is recommended),
35
+ * Python>=3.9<3.14 (3.12 is recommended),
36
36
  * [poetry](https://python-poetry.org/)
37
37
  * cmake,
38
38
  * Rust compiler
@@ -59,7 +59,7 @@ except ImportError:
59
59
  POLARS_BIO_MAX_THREADS = "datafusion.execution.target_partitions"
60
60
 
61
61
 
62
- __version__ = "0.11.0"
62
+ __version__ = "0.12.0"
63
63
  __all__ = [
64
64
  "ctx",
65
65
  "FilterOp",
@@ -64,7 +64,7 @@ class IntervalOperations:
64
64
  genomic intervals, provided separately for each set.
65
65
  suffixes: Suffixes for the columns of the two overlapped sets.
66
66
  on_cols: List of additional column names to join on. default is None.
67
- algorithm: The algorithm to use for the overlap operation. Available options: Coitrees, IntervalTree, ArrayIntervalTree, Lapper
67
+ algorithm: The algorithm to use for the overlap operation. Available options: Coitrees, IntervalTree, ArrayIntervalTree, Lapper, SuperIntervals
68
68
  output_type: Type of the output. default is "polars.LazyFrame", "polars.DataFrame", or "pandas.DataFrame" or "datafusion.DataFrame" are also supported.
69
69
  streaming: **EXPERIMENTAL** If True, use Polars [streaming](features.md#streaming) engine.
70
70
  read_options1: Additional options for reading the input files.
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "polars-bio"
7
- version = "0.11.0"
7
+ version = "0.12.0"
8
8
  description = "Blazing fast genomic operations on large Python dataframes"
9
9
  authors = []
10
10
  requires-python = ">=3.9"
@@ -0,0 +1,4 @@
1
+ >seq1 First sequence
2
+ ACTG
3
+ >seq2 Second sequence
4
+ GATTACA
@@ -106,6 +106,27 @@ class TestOverlapAlgorithms:
106
106
  algorithm="ArrayIntervalTree",
107
107
  )
108
108
 
109
+ result_overlap_superintervals = pb.overlap(
110
+ BIO_PD_DF1,
111
+ BIO_PD_DF2,
112
+ cols1=("contig", "pos_start", "pos_end"),
113
+ cols2=("contig", "pos_start", "pos_end"),
114
+ output_type="pandas.DataFrame",
115
+ suffixes=("_1", "_3"),
116
+ use_zero_based=True,
117
+ algorithm="SuperIntervals",
118
+ )
119
+
120
+ result_overlap_superintervals_log = pb.overlap(
121
+ BIO_PD_DF1,
122
+ BIO_PD_DF2,
123
+ cols1=("contig", "pos_start", "pos_end"),
124
+ cols2=("contig", "pos_start", "pos_end"),
125
+ suffixes=("_1", "_3"),
126
+ use_zero_based=True,
127
+ algorithm="SuperIntervals",
128
+ )
129
+
109
130
  expected = result_bio_overlap.sort_values(
110
131
  by=list(result_bio_overlap.columns)
111
132
  ).reset_index(drop=True)
@@ -122,6 +143,9 @@ class TestOverlapAlgorithms:
122
143
  def test_overlap_count_ait(self):
123
144
  assert len(self.result_overlap_ait) == len(self.result_bio_overlap)
124
145
 
146
+ def test_overlap_count_superintervals(self):
147
+ assert len(self.result_overlap_superintervals) == len(self.result_bio_overlap)
148
+
125
149
  def test_overlap_schema_rows_coitrees(self):
126
150
  result = self.result_overlap_coitrees.sort_values(
127
151
  by=list(self.result_overlap_coitrees.columns)
@@ -146,6 +170,12 @@ class TestOverlapAlgorithms:
146
170
  ).reset_index(drop=True)
147
171
  pd.testing.assert_frame_equal(result_ait, self.expected)
148
172
 
173
+ def test_overlap_schema_rows_superintervals(self):
174
+ result_superintervals = self.result_overlap_superintervals.sort_values(
175
+ by=list(self.result_overlap_superintervals.columns)
176
+ ).reset_index(drop=True)
177
+ pd.testing.assert_frame_equal(result_superintervals, self.expected)
178
+
149
179
  def test_overlap_schema_rows_it_log(self, caplog):
150
180
  caplog.set_level("INFO")
151
181
  self.result_overlap_it_log.count().collect()
@@ -173,3 +203,11 @@ class TestOverlapAlgorithms:
173
203
  caplog.set_level("INFO")
174
204
  self.result_overlap_lapper_log.count().collect()
175
205
  assert "Optimizing into IntervalJoinExec using Lapper algorithm" in caplog.text
206
+
207
+ def test_overlap_schema_rows_superintervals_log(self, caplog):
208
+ caplog.set_level("INFO")
209
+ self.result_overlap_superintervals_log.count().collect()
210
+ assert (
211
+ "Optimizing into IntervalJoinExec using SuperIntervals algorithm"
212
+ in caplog.text
213
+ )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes