bio2zarr 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bio2zarr might be problematic. Click here for more details.
- bio2zarr/__init__.py +1 -1
- bio2zarr/__main__.py +2 -0
- bio2zarr/_version.py +2 -2
- bio2zarr/cli.py +129 -32
- bio2zarr/core.py +18 -9
- bio2zarr/plink.py +6 -8
- bio2zarr/typing.py +1 -1
- bio2zarr/vcf.py +642 -386
- bio2zarr/vcf_utils.py +26 -8
- {bio2zarr-0.0.4.dist-info → bio2zarr-0.0.6.dist-info}/METADATA +1 -1
- bio2zarr-0.0.6.dist-info/RECORD +16 -0
- bio2zarr-0.0.4.dist-info/RECORD +0 -16
- {bio2zarr-0.0.4.dist-info → bio2zarr-0.0.6.dist-info}/LICENSE +0 -0
- {bio2zarr-0.0.4.dist-info → bio2zarr-0.0.6.dist-info}/WHEEL +0 -0
- {bio2zarr-0.0.4.dist-info → bio2zarr-0.0.6.dist-info}/entry_points.txt +0 -0
- {bio2zarr-0.0.4.dist-info → bio2zarr-0.0.6.dist-info}/top_level.txt +0 -0
bio2zarr/vcf_utils.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
from typing import IO, Any, Dict, Optional, Sequence, Union
|
|
2
1
|
import contextlib
|
|
3
|
-
import struct
|
|
4
|
-
import pathlib
|
|
5
2
|
import gzip
|
|
6
|
-
from dataclasses import dataclass
|
|
7
3
|
import os
|
|
4
|
+
import pathlib
|
|
5
|
+
import struct
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import IO, Any, Dict, Optional, Sequence, Union
|
|
8
8
|
|
|
9
|
-
import numpy as np
|
|
10
9
|
import cyvcf2
|
|
11
10
|
import humanfriendly
|
|
11
|
+
import numpy as np
|
|
12
12
|
|
|
13
13
|
from bio2zarr.typing import PathType
|
|
14
14
|
|
|
@@ -38,7 +38,8 @@ def read_bytes_as_value(f: IO[Any], fmt: str, nodata: Optional[Any] = None) -> A
|
|
|
38
38
|
fmt : str
|
|
39
39
|
A Python `struct` format string.
|
|
40
40
|
nodata : Optional[Any], optional
|
|
41
|
-
The value to return in case there is no further data in the stream,
|
|
41
|
+
The value to return in case there is no further data in the stream,
|
|
42
|
+
by default None
|
|
42
43
|
|
|
43
44
|
Returns
|
|
44
45
|
-------
|
|
@@ -277,7 +278,8 @@ class TabixIndex:
|
|
|
277
278
|
# Create file offsets for each element in the linear index
|
|
278
279
|
file_offsets = np.array([get_file_offset(vfp) for vfp in linear_index])
|
|
279
280
|
|
|
280
|
-
# Calculate corresponding contigs and positions or each element in
|
|
281
|
+
# Calculate corresponding contigs and positions or each element in
|
|
282
|
+
# the linear index
|
|
281
283
|
contig_indexes = np.hstack(
|
|
282
284
|
[np.full(len(li), i) for (i, li) in enumerate(linear_indexes)]
|
|
283
285
|
)
|
|
@@ -433,6 +435,22 @@ class IndexedVcf(contextlib.AbstractContextManager):
|
|
|
433
435
|
if var.POS >= start:
|
|
434
436
|
yield var
|
|
435
437
|
|
|
438
|
+
def _filter_empty(self, regions):
|
|
439
|
+
"""
|
|
440
|
+
Return all regions in the specified list that have one or more records.
|
|
441
|
+
|
|
442
|
+
Sometimes with Tabix indexes these seem to crop up:
|
|
443
|
+
|
|
444
|
+
- https://github.com/sgkit-dev/bio2zarr/issues/45
|
|
445
|
+
- https://github.com/sgkit-dev/bio2zarr/issues/120
|
|
446
|
+
"""
|
|
447
|
+
ret = []
|
|
448
|
+
for region in regions:
|
|
449
|
+
variants = self.variants(region)
|
|
450
|
+
if next(variants, None) is not None:
|
|
451
|
+
ret.append(region)
|
|
452
|
+
return ret
|
|
453
|
+
|
|
436
454
|
def partition_into_regions(
|
|
437
455
|
self,
|
|
438
456
|
num_parts: Optional[int] = None,
|
|
@@ -509,4 +527,4 @@ class IndexedVcf(contextlib.AbstractContextManager):
|
|
|
509
527
|
if self.index.record_counts[ri] > 0:
|
|
510
528
|
regions.append(Region(self.sequence_names[ri]))
|
|
511
529
|
|
|
512
|
-
return regions
|
|
530
|
+
return self._filter_empty(regions)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
bio2zarr/__init__.py,sha256=KiUGyya-9RHNcBldB8Lc1g3rP3CRjaL-5Olben0_6qA,49
|
|
2
|
+
bio2zarr/__main__.py,sha256=hO4vV-kPFgsYq0NQwG2r-WkserPL27oqae_tUvNB7yE,527
|
|
3
|
+
bio2zarr/_version.py,sha256=c6ZQWSJeXXzGZ3WoZWjkA-MiNkBFXMIRV9kZPo4MQ_M,411
|
|
4
|
+
bio2zarr/cli.py,sha256=iQIT3c_mos97st9bTLiBuxLL70MbNOtlTegUti6q2-g,14397
|
|
5
|
+
bio2zarr/core.py,sha256=E8MF5PUVulYtq5gaxaMWN7g5YeWPDueqlGCck3DJCuM,8516
|
|
6
|
+
bio2zarr/plink.py,sha256=huXMlxQ5C3gPmOYCavA-QW7PzaV48I2lo80cQqHT1wY,6768
|
|
7
|
+
bio2zarr/provenance.py,sha256=c_Z__QbWkLS0Rfa8D7LgEhtStng_zRMJX8comaDXIkw,142
|
|
8
|
+
bio2zarr/typing.py,sha256=BYxhL16sKRoNxa6amf6AYxvt5Ke9qzv2np_kOT_zPJo,79
|
|
9
|
+
bio2zarr/vcf.py,sha256=dJcaRCdlDlUxYE_Z3hQaqSy7EivfpihP78XSRMzlWbU,83330
|
|
10
|
+
bio2zarr/vcf_utils.py,sha256=r3NQXxWK1SYU7CcwDzSWXdX5Q8Ixk7gdCTEiFPzfUAk,17307
|
|
11
|
+
bio2zarr-0.0.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
12
|
+
bio2zarr-0.0.6.dist-info/METADATA,sha256=siwhDelRAqr4Zo4EAPqZ4c9SkMOsU9ixRtgALAROP9A,1077
|
|
13
|
+
bio2zarr-0.0.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
14
|
+
bio2zarr-0.0.6.dist-info/entry_points.txt,sha256=pklStOdATE5hHJm4qiIvmhHkcn21Si_XAu6MC7ieNrk,131
|
|
15
|
+
bio2zarr-0.0.6.dist-info/top_level.txt,sha256=ouAvp3u9N25eKrQbN8BCDLPcWWQLhtlgdHKu8AtEj5Q,9
|
|
16
|
+
bio2zarr-0.0.6.dist-info/RECORD,,
|
bio2zarr-0.0.4.dist-info/RECORD
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
bio2zarr/__init__.py,sha256=yIJYx4GyKtOLOtODOX0kGCeGPYgQ-TBbsRdT1NwBpQQ,37
|
|
2
|
-
bio2zarr/__main__.py,sha256=3cgaQ4x8YKXt-9xC2GLrHnS6UA38y1GXqttwZiBZJg4,525
|
|
3
|
-
bio2zarr/_version.py,sha256=yBVOKdXLEcTVc7YV7ZPqRXhRDRt-pKrfXxcgHkgPY5g,411
|
|
4
|
-
bio2zarr/cli.py,sha256=QE0DfoZHbBbxq9K_im9y4tJ49_Wss0zzavSjjz-85Xw,11484
|
|
5
|
-
bio2zarr/core.py,sha256=tZb9exfFmuzbA8tUpPY8avSm9YvfH31-vUCTM4fpj78,8128
|
|
6
|
-
bio2zarr/plink.py,sha256=llhfP-v44BVPvgCcwXktk0YrKaJSII63U_PTtpHlGtM,6755
|
|
7
|
-
bio2zarr/provenance.py,sha256=c_Z__QbWkLS0Rfa8D7LgEhtStng_zRMJX8comaDXIkw,142
|
|
8
|
-
bio2zarr/typing.py,sha256=wZ99Zzp5BD9Nqpd-S5bn38fSdPzfj6Z9IHPBfZqt9Gs,78
|
|
9
|
-
bio2zarr/vcf.py,sha256=MEskVTDq4QntzoawPz0sfmInV0aPkIPLXXNv7GmVcmY,73870
|
|
10
|
-
bio2zarr/vcf_utils.py,sha256=_kMZdpye15HGpniv8wwISw0L6NEEi54ZFaTcM83wLGs,16751
|
|
11
|
-
bio2zarr-0.0.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
12
|
-
bio2zarr-0.0.4.dist-info/METADATA,sha256=DISckjzZ0b6FpBTfBvpmJmEe00SIdTHyB3UTsTR8rws,1077
|
|
13
|
-
bio2zarr-0.0.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
14
|
-
bio2zarr-0.0.4.dist-info/entry_points.txt,sha256=pklStOdATE5hHJm4qiIvmhHkcn21Si_XAu6MC7ieNrk,131
|
|
15
|
-
bio2zarr-0.0.4.dist-info/top_level.txt,sha256=ouAvp3u9N25eKrQbN8BCDLPcWWQLhtlgdHKu8AtEj5Q,9
|
|
16
|
-
bio2zarr-0.0.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|