bio2zarr 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bio2zarr might be problematic. Click here for more details.
- bio2zarr/__main__.py +2 -1
- bio2zarr/_version.py +2 -2
- bio2zarr/cli.py +89 -22
- bio2zarr/core.py +43 -22
- bio2zarr/plink.py +314 -189
- bio2zarr/tskit.py +301 -0
- bio2zarr/typing.py +1 -2
- bio2zarr/{vcf2zarr/icf.py → vcf.py} +594 -112
- bio2zarr/vcf_utils.py +12 -11
- bio2zarr/{vcf2zarr/vcz.py → vcz.py} +544 -708
- bio2zarr/{vcf2zarr/verification.py → vcz_verification.py} +5 -2
- {bio2zarr-0.1.5.dist-info → bio2zarr-0.1.6.dist-info}/METADATA +17 -6
- bio2zarr-0.1.6.dist-info/RECORD +21 -0
- {bio2zarr-0.1.5.dist-info → bio2zarr-0.1.6.dist-info}/WHEEL +1 -1
- {bio2zarr-0.1.5.dist-info → bio2zarr-0.1.6.dist-info}/entry_points.txt +2 -0
- bio2zarr/vcf2zarr/__init__.py +0 -38
- bio2zarr-0.1.5.dist-info/RECORD +0 -21
- {bio2zarr-0.1.5.dist-info → bio2zarr-0.1.6.dist-info}/licenses/LICENSE +0 -0
- {bio2zarr-0.1.5.dist-info → bio2zarr-0.1.6.dist-info}/top_level.txt +0 -0
bio2zarr/vcf_utils.py
CHANGED
|
@@ -7,12 +7,12 @@ import struct
|
|
|
7
7
|
from collections.abc import Sequence
|
|
8
8
|
from dataclasses import dataclass
|
|
9
9
|
from enum import Enum
|
|
10
|
-
from typing import IO, Any
|
|
10
|
+
from typing import IO, Any
|
|
11
11
|
|
|
12
|
-
import cyvcf2
|
|
13
12
|
import humanfriendly
|
|
14
13
|
import numpy as np
|
|
15
14
|
|
|
15
|
+
from bio2zarr import core
|
|
16
16
|
from bio2zarr.typing import PathType
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
@@ -33,7 +33,7 @@ def get_file_offset(vfp: int) -> int:
|
|
|
33
33
|
return vfp >> 16 & address_mask
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
def read_bytes_as_value(f: IO[Any], fmt: str, nodata:
|
|
36
|
+
def read_bytes_as_value(f: IO[Any], fmt: str, nodata: Any | None = None) -> Any:
|
|
37
37
|
"""Read bytes using a `struct` format string and return the unpacked data value.
|
|
38
38
|
|
|
39
39
|
Parameters
|
|
@@ -85,8 +85,8 @@ class Region:
|
|
|
85
85
|
"""
|
|
86
86
|
|
|
87
87
|
contig: str
|
|
88
|
-
start:
|
|
89
|
-
end:
|
|
88
|
+
start: int | None = None
|
|
89
|
+
end: int | None = None
|
|
90
90
|
|
|
91
91
|
def __post_init__(self):
|
|
92
92
|
assert self.contig is not None
|
|
@@ -197,9 +197,7 @@ def get_first_locus_in_bin(csi: CSIIndex, bin: int) -> int:
|
|
|
197
197
|
return (bin - first_bin_on_level) * (max_span // level_size) + 1
|
|
198
198
|
|
|
199
199
|
|
|
200
|
-
def read_csi(
|
|
201
|
-
file: PathType, storage_options: Optional[dict[str, str]] = None
|
|
202
|
-
) -> CSIIndex:
|
|
200
|
+
def read_csi(file: PathType, storage_options: dict[str, str] | None = None) -> CSIIndex:
|
|
203
201
|
"""Parse a CSI file into a `CSIIndex` object.
|
|
204
202
|
|
|
205
203
|
Parameters
|
|
@@ -314,7 +312,7 @@ class TabixIndex:
|
|
|
314
312
|
|
|
315
313
|
|
|
316
314
|
def read_tabix(
|
|
317
|
-
file: PathType, storage_options:
|
|
315
|
+
file: PathType, storage_options: dict[str, str] | None = None
|
|
318
316
|
) -> TabixIndex:
|
|
319
317
|
"""Parse a tabix file into a `TabixIndex` object.
|
|
320
318
|
|
|
@@ -397,7 +395,10 @@ class VcfIndexType(Enum):
|
|
|
397
395
|
|
|
398
396
|
|
|
399
397
|
class VcfFile(contextlib.AbstractContextManager):
|
|
398
|
+
@core.requires_optional_dependency("cyvcf2", "vcf")
|
|
400
399
|
def __init__(self, vcf_path, index_path=None):
|
|
400
|
+
import cyvcf2
|
|
401
|
+
|
|
401
402
|
self.vcf = None
|
|
402
403
|
self.file_type = None
|
|
403
404
|
self.index_type = None
|
|
@@ -512,8 +513,8 @@ class VcfFile(contextlib.AbstractContextManager):
|
|
|
512
513
|
|
|
513
514
|
def partition_into_regions(
|
|
514
515
|
self,
|
|
515
|
-
num_parts:
|
|
516
|
-
target_part_size:
|
|
516
|
+
num_parts: int | None = None,
|
|
517
|
+
target_part_size: None | int | str = None,
|
|
517
518
|
):
|
|
518
519
|
if num_parts is None and target_part_size is None:
|
|
519
520
|
raise ValueError("One of num_parts or target_part_size must be specified")
|