bio2zarr 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bio2zarr/_version.py +16 -3
- bio2zarr/cli.py +16 -3
- bio2zarr/plink.py +7 -5
- bio2zarr/tskit.py +14 -19
- bio2zarr/vcf.py +23 -13
- bio2zarr/vcz.py +34 -41
- bio2zarr/zarr_utils.py +169 -2
- {bio2zarr-0.1.6.dist-info → bio2zarr-0.1.7.dist-info}/METADATA +10 -6
- bio2zarr-0.1.7.dist-info/RECORD +21 -0
- {bio2zarr-0.1.6.dist-info → bio2zarr-0.1.7.dist-info}/WHEEL +1 -1
- bio2zarr-0.1.6.dist-info/RECORD +0 -21
- {bio2zarr-0.1.6.dist-info → bio2zarr-0.1.7.dist-info}/entry_points.txt +0 -0
- {bio2zarr-0.1.6.dist-info → bio2zarr-0.1.7.dist-info}/licenses/LICENSE +0 -0
- {bio2zarr-0.1.6.dist-info → bio2zarr-0.1.7.dist-info}/top_level.txt +0 -0
bio2zarr/_version.py
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
3
|
|
|
4
|
-
__all__ = [
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
5
12
|
|
|
6
13
|
TYPE_CHECKING = False
|
|
7
14
|
if TYPE_CHECKING:
|
|
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
|
|
|
9
16
|
from typing import Union
|
|
10
17
|
|
|
11
18
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
12
20
|
else:
|
|
13
21
|
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
14
23
|
|
|
15
24
|
version: str
|
|
16
25
|
__version__: str
|
|
17
26
|
__version_tuple__: VERSION_TUPLE
|
|
18
27
|
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
19
30
|
|
|
20
|
-
__version__ = version = '0.1.
|
|
21
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.7'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 7)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
bio2zarr/cli.py
CHANGED
|
@@ -652,7 +652,12 @@ def vcfpartition(vcfs, verbose, num_partitions, partition_size):
|
|
|
652
652
|
@click.argument("zarr_path", type=click.Path())
|
|
653
653
|
@click.option("--contig-id", type=str, help="Contig/chromosome ID (default: '1')")
|
|
654
654
|
@click.option(
|
|
655
|
-
"--isolated-as-missing
|
|
655
|
+
"--isolated-as-missing/--isolated-as-ancestral",
|
|
656
|
+
default=None,
|
|
657
|
+
help=(
|
|
658
|
+
"Treat isolated samples without mutations as missing or ancestral "
|
|
659
|
+
"(default: tskit default)"
|
|
660
|
+
),
|
|
656
661
|
)
|
|
657
662
|
@variants_chunk_size
|
|
658
663
|
@samples_chunk_size
|
|
@@ -660,6 +665,7 @@ def vcfpartition(vcfs, verbose, num_partitions, partition_size):
|
|
|
660
665
|
@progress
|
|
661
666
|
@worker_processes
|
|
662
667
|
@force
|
|
668
|
+
@core.requires_optional_dependency("tskit", "tskit")
|
|
663
669
|
def convert_tskit(
|
|
664
670
|
ts_path,
|
|
665
671
|
zarr_path,
|
|
@@ -675,11 +681,18 @@ def convert_tskit(
|
|
|
675
681
|
setup_logging(verbose)
|
|
676
682
|
check_overwrite_dir(zarr_path, force)
|
|
677
683
|
|
|
684
|
+
import tskit
|
|
685
|
+
|
|
686
|
+
ts = tskit.load(ts_path)
|
|
687
|
+
model_mapping = ts.map_to_vcf_model(
|
|
688
|
+
contig_id=contig_id,
|
|
689
|
+
isolated_as_missing=isolated_as_missing,
|
|
690
|
+
)
|
|
691
|
+
|
|
678
692
|
tskit_mod.convert(
|
|
679
693
|
ts_path,
|
|
680
694
|
zarr_path,
|
|
681
|
-
|
|
682
|
-
isolated_as_missing=isolated_as_missing,
|
|
695
|
+
model_mapping=model_mapping,
|
|
683
696
|
variants_chunk_size=variants_chunk_size,
|
|
684
697
|
samples_chunk_size=samples_chunk_size,
|
|
685
698
|
worker_processes=worker_processes,
|
bio2zarr/plink.py
CHANGED
|
@@ -6,6 +6,7 @@ import numpy as np
|
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
8
|
from bio2zarr import constants, core, vcz
|
|
9
|
+
from bio2zarr.zarr_utils import STRING_DTYPE_NAME
|
|
9
10
|
|
|
10
11
|
logger = logging.getLogger(__name__)
|
|
11
12
|
|
|
@@ -198,7 +199,7 @@ class PlinkFormat(vcz.Source):
|
|
|
198
199
|
ref_iter = self.bim.allele_2.values[start:stop]
|
|
199
200
|
gt_iter = self.bed_reader.iter_decode(start, stop)
|
|
200
201
|
for alt, ref, gt in zip(alt_iter, ref_iter, gt_iter):
|
|
201
|
-
alleles = np.full(num_alleles, constants.STR_FILL, dtype=
|
|
202
|
+
alleles = np.full(num_alleles, constants.STR_FILL, dtype=STRING_DTYPE_NAME)
|
|
202
203
|
alleles[0] = ref
|
|
203
204
|
alleles[1 : 1 + len(alt)] = alt
|
|
204
205
|
phased = np.zeros(gt.shape[0], dtype=bool)
|
|
@@ -234,8 +235,9 @@ class PlinkFormat(vcz.Source):
|
|
|
234
235
|
)
|
|
235
236
|
# If we don't have SVLEN or END annotations, the rlen field is defined
|
|
236
237
|
# as the length of the REF
|
|
237
|
-
|
|
238
|
-
|
|
238
|
+
# Explicitly cast to fixed size array to support pandas 2.x and 3.x
|
|
239
|
+
allele_2_array = self.bim.allele_2.values.astype("S")
|
|
240
|
+
max_len = allele_2_array.itemsize
|
|
239
241
|
array_specs = [
|
|
240
242
|
vcz.ZarrArraySpec(
|
|
241
243
|
source="position",
|
|
@@ -246,13 +248,13 @@ class PlinkFormat(vcz.Source):
|
|
|
246
248
|
),
|
|
247
249
|
vcz.ZarrArraySpec(
|
|
248
250
|
name="variant_allele",
|
|
249
|
-
dtype=
|
|
251
|
+
dtype=STRING_DTYPE_NAME,
|
|
250
252
|
dimensions=["variants", "alleles"],
|
|
251
253
|
description=None,
|
|
252
254
|
),
|
|
253
255
|
vcz.ZarrArraySpec(
|
|
254
256
|
name="variant_id",
|
|
255
|
-
dtype=
|
|
257
|
+
dtype=STRING_DTYPE_NAME,
|
|
256
258
|
dimensions=["variants"],
|
|
257
259
|
description=None,
|
|
258
260
|
),
|
bio2zarr/tskit.py
CHANGED
|
@@ -4,6 +4,7 @@ import pathlib
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
6
|
from bio2zarr import constants, core, vcz
|
|
7
|
+
from bio2zarr.zarr_utils import STRING_DTYPE_NAME
|
|
7
8
|
|
|
8
9
|
logger = logging.getLogger(__name__)
|
|
9
10
|
|
|
@@ -15,8 +16,6 @@ class TskitFormat(vcz.Source):
|
|
|
15
16
|
ts,
|
|
16
17
|
*,
|
|
17
18
|
model_mapping=None,
|
|
18
|
-
contig_id=None,
|
|
19
|
-
isolated_as_missing=False,
|
|
20
19
|
):
|
|
21
20
|
import tskit
|
|
22
21
|
|
|
@@ -35,14 +34,14 @@ class TskitFormat(vcz.Source):
|
|
|
35
34
|
f"{self.ts.num_sites} sites"
|
|
36
35
|
)
|
|
37
36
|
|
|
38
|
-
self.contig_id = contig_id if contig_id is not None else "1"
|
|
39
|
-
self.isolated_as_missing = isolated_as_missing
|
|
40
|
-
|
|
41
|
-
self.positions = self.ts.sites_position
|
|
42
|
-
|
|
43
37
|
if model_mapping is None:
|
|
44
38
|
model_mapping = self.ts.map_to_vcf_model()
|
|
45
39
|
|
|
40
|
+
self.contig_id = model_mapping.contig_id
|
|
41
|
+
self.contig_length = model_mapping.contig_length
|
|
42
|
+
self.isolated_as_missing = model_mapping.isolated_as_missing
|
|
43
|
+
self.raw_positions = self.ts.sites_position
|
|
44
|
+
self.vcf_positions = model_mapping.transformed_positions
|
|
46
45
|
individuals_nodes = model_mapping.individuals_nodes
|
|
47
46
|
sample_ids = model_mapping.individuals_name
|
|
48
47
|
|
|
@@ -91,14 +90,14 @@ class TskitFormat(vcz.Source):
|
|
|
91
90
|
|
|
92
91
|
@property
|
|
93
92
|
def contigs(self):
|
|
94
|
-
return [vcz.Contig(id=self.contig_id)]
|
|
93
|
+
return [vcz.Contig(id=self.contig_id, length=self.contig_length)]
|
|
95
94
|
|
|
96
95
|
def iter_contig(self, start, stop):
|
|
97
96
|
yield from (0 for _ in range(start, stop))
|
|
98
97
|
|
|
99
98
|
def iter_field(self, field_name, shape, start, stop):
|
|
100
99
|
if field_name == "position":
|
|
101
|
-
for pos in self.
|
|
100
|
+
for pos in self.vcf_positions[start:stop]:
|
|
102
101
|
yield int(pos)
|
|
103
102
|
else:
|
|
104
103
|
raise ValueError(f"Unknown field {field_name}")
|
|
@@ -110,13 +109,13 @@ class TskitFormat(vcz.Source):
|
|
|
110
109
|
|
|
111
110
|
for variant in self.ts.variants(
|
|
112
111
|
isolated_as_missing=self.isolated_as_missing,
|
|
113
|
-
left=self.
|
|
114
|
-
right=self.
|
|
112
|
+
left=self.raw_positions[start],
|
|
113
|
+
right=self.raw_positions[stop] if stop < self.num_records else None,
|
|
115
114
|
samples=self.tskit_samples,
|
|
116
115
|
copy=False,
|
|
117
116
|
):
|
|
118
117
|
gt = np.full(shape, constants.INT_FILL, dtype=np.int8)
|
|
119
|
-
alleles = np.full(num_alleles, constants.STR_FILL, dtype=
|
|
118
|
+
alleles = np.full(num_alleles, constants.STR_FILL, dtype=STRING_DTYPE_NAME)
|
|
120
119
|
# length is the length of the REF allele unless other fields
|
|
121
120
|
# are included.
|
|
122
121
|
variant_length = len(variant.alleles[0])
|
|
@@ -176,8 +175,8 @@ class TskitFormat(vcz.Source):
|
|
|
176
175
|
min_position = 0
|
|
177
176
|
max_position = 0
|
|
178
177
|
if self.ts.num_sites > 0:
|
|
179
|
-
min_position = np.min(self.
|
|
180
|
-
max_position = np.max(self.
|
|
178
|
+
min_position = np.min(self.vcf_positions)
|
|
179
|
+
max_position = np.max(self.vcf_positions)
|
|
181
180
|
|
|
182
181
|
tables = self.ts.tables
|
|
183
182
|
ancestral_state_offsets = tables.sites.ancestral_state_offset
|
|
@@ -200,7 +199,7 @@ class TskitFormat(vcz.Source):
|
|
|
200
199
|
vcz.ZarrArraySpec(
|
|
201
200
|
source=None,
|
|
202
201
|
name="variant_allele",
|
|
203
|
-
dtype=
|
|
202
|
+
dtype=STRING_DTYPE_NAME,
|
|
204
203
|
dimensions=["variants", "alleles"],
|
|
205
204
|
description="Alleles for each variant",
|
|
206
205
|
),
|
|
@@ -252,8 +251,6 @@ def convert(
|
|
|
252
251
|
vcz_path,
|
|
253
252
|
*,
|
|
254
253
|
model_mapping=None,
|
|
255
|
-
contig_id=None,
|
|
256
|
-
isolated_as_missing=False,
|
|
257
254
|
variants_chunk_size=None,
|
|
258
255
|
samples_chunk_size=None,
|
|
259
256
|
worker_processes=core.DEFAULT_WORKER_PROCESSES,
|
|
@@ -277,8 +274,6 @@ def convert(
|
|
|
277
274
|
tskit_format = TskitFormat(
|
|
278
275
|
ts_or_path,
|
|
279
276
|
model_mapping=model_mapping,
|
|
280
|
-
contig_id=contig_id,
|
|
281
|
-
isolated_as_missing=isolated_as_missing,
|
|
282
277
|
)
|
|
283
278
|
schema_instance = tskit_format.generate_schema(
|
|
284
279
|
variants_chunk_size=variants_chunk_size,
|
bio2zarr/vcf.py
CHANGED
|
@@ -16,6 +16,8 @@ from typing import Any
|
|
|
16
16
|
import numcodecs
|
|
17
17
|
import numpy as np
|
|
18
18
|
|
|
19
|
+
from bio2zarr.zarr_utils import STRING_DTYPE_NAME, zarr_exists
|
|
20
|
+
|
|
19
21
|
from . import constants, core, provenance, vcf_utils, vcz
|
|
20
22
|
|
|
21
23
|
logger = logging.getLogger(__name__)
|
|
@@ -110,7 +112,7 @@ class VcfField:
|
|
|
110
112
|
ret = "U1"
|
|
111
113
|
else:
|
|
112
114
|
assert self.vcf_type == "String"
|
|
113
|
-
ret =
|
|
115
|
+
ret = STRING_DTYPE_NAME
|
|
114
116
|
return ret
|
|
115
117
|
|
|
116
118
|
|
|
@@ -397,7 +399,7 @@ def sanitise_value_string_scalar(shape, value):
|
|
|
397
399
|
|
|
398
400
|
def sanitise_value_string_1d(shape, value):
|
|
399
401
|
if value is None:
|
|
400
|
-
return np.full(shape, ".", dtype=
|
|
402
|
+
return np.full(shape, ".", dtype=STRING_DTYPE_NAME)
|
|
401
403
|
else:
|
|
402
404
|
value = drop_empty_second_dim(value)
|
|
403
405
|
result = np.full(shape, "", dtype=value.dtype)
|
|
@@ -407,9 +409,9 @@ def sanitise_value_string_1d(shape, value):
|
|
|
407
409
|
|
|
408
410
|
def sanitise_value_string_2d(shape, value):
|
|
409
411
|
if value is None:
|
|
410
|
-
return np.full(shape, ".", dtype=
|
|
412
|
+
return np.full(shape, ".", dtype=STRING_DTYPE_NAME)
|
|
411
413
|
else:
|
|
412
|
-
result = np.full(shape, "", dtype=
|
|
414
|
+
result = np.full(shape, "", dtype=STRING_DTYPE_NAME)
|
|
413
415
|
if value.ndim == 2:
|
|
414
416
|
result[: value.shape[0], : value.shape[1]] = value
|
|
415
417
|
else:
|
|
@@ -569,7 +571,12 @@ class StringValueTransformer(VcfValueTransformer):
|
|
|
569
571
|
value = np.array(list(vcf_value.split(",")))
|
|
570
572
|
else:
|
|
571
573
|
# TODO can we make this faster??
|
|
572
|
-
|
|
574
|
+
var_len_values = [v.split(",") for v in vcf_value]
|
|
575
|
+
number = max(len(v) for v in var_len_values)
|
|
576
|
+
value = np.array(
|
|
577
|
+
[v + [""] * (number - len(v)) for v in var_len_values],
|
|
578
|
+
dtype=STRING_DTYPE_NAME,
|
|
579
|
+
)
|
|
573
580
|
# print("HERE", vcf_value, value)
|
|
574
581
|
# for v in vcf_value:
|
|
575
582
|
# print("\t", type(v), len(v), v.split(","))
|
|
@@ -1044,7 +1051,7 @@ class IntermediateColumnarFormat(vcz.Source):
|
|
|
1044
1051
|
ref_field.iter_values(start, stop),
|
|
1045
1052
|
alt_field.iter_values(start, stop),
|
|
1046
1053
|
):
|
|
1047
|
-
alleles = np.full(num_alleles, constants.STR_FILL, dtype=
|
|
1054
|
+
alleles = np.full(num_alleles, constants.STR_FILL, dtype=STRING_DTYPE_NAME)
|
|
1048
1055
|
alleles[0] = ref[0]
|
|
1049
1056
|
alleles[1 : 1 + len(alt)] = alt
|
|
1050
1057
|
yield alleles
|
|
@@ -1068,14 +1075,16 @@ class IntermediateColumnarFormat(vcz.Source):
|
|
|
1068
1075
|
for variant_length, alleles in zip(
|
|
1069
1076
|
variant_lengths, self.iter_alleles(start, stop, num_alleles)
|
|
1070
1077
|
):
|
|
1071
|
-
|
|
1078
|
+
# Stored ICF values are always at least 1D arrays; "rlen" is Number=1
|
|
1079
|
+
# so we must extract the scalar to avoid NumPy scalar-conversion issues.
|
|
1080
|
+
yield vcz.VariantData(variant_length[0], alleles, None, None)
|
|
1072
1081
|
else:
|
|
1073
1082
|
for variant_length, alleles, (gt, phased) in zip(
|
|
1074
1083
|
variant_lengths,
|
|
1075
1084
|
self.iter_alleles(start, stop, num_alleles),
|
|
1076
1085
|
self.iter_genotypes(shape, start, stop),
|
|
1077
1086
|
):
|
|
1078
|
-
yield vcz.VariantData(variant_length, alleles, gt, phased)
|
|
1087
|
+
yield vcz.VariantData(variant_length[0], alleles, gt, phased)
|
|
1079
1088
|
|
|
1080
1089
|
def generate_schema(
|
|
1081
1090
|
self, variants_chunk_size=None, samples_chunk_size=None, local_alleles=None
|
|
@@ -1087,8 +1096,10 @@ class IntermediateColumnarFormat(vcz.Source):
|
|
|
1087
1096
|
|
|
1088
1097
|
# Add ploidy and genotypes dimensions only when needed
|
|
1089
1098
|
max_genotypes = 0
|
|
1099
|
+
has_g_field = False
|
|
1090
1100
|
for field in self.metadata.format_fields:
|
|
1091
1101
|
if field.vcf_number == "G":
|
|
1102
|
+
has_g_field = True
|
|
1092
1103
|
max_genotypes = max(max_genotypes, field.summary.max_number)
|
|
1093
1104
|
|
|
1094
1105
|
ploidy = None
|
|
@@ -1100,7 +1111,7 @@ class IntermediateColumnarFormat(vcz.Source):
|
|
|
1100
1111
|
genotypes_size = math.comb(max_alleles + ploidy - 1, ploidy)
|
|
1101
1112
|
# assert max_genotypes == genotypes_size
|
|
1102
1113
|
else:
|
|
1103
|
-
if max_genotypes > 0:
|
|
1114
|
+
if max_genotypes > 0 or has_g_field:
|
|
1104
1115
|
# there is no GT field, but there is at least one Number=G field,
|
|
1105
1116
|
# so need to define genotypes dimension
|
|
1106
1117
|
genotypes_size = max_genotypes
|
|
@@ -1163,7 +1174,7 @@ class IntermediateColumnarFormat(vcz.Source):
|
|
|
1163
1174
|
),
|
|
1164
1175
|
fixed_field_spec(
|
|
1165
1176
|
name="variant_allele",
|
|
1166
|
-
dtype=
|
|
1177
|
+
dtype=STRING_DTYPE_NAME,
|
|
1167
1178
|
dimensions=["variants", "alleles"],
|
|
1168
1179
|
),
|
|
1169
1180
|
fixed_field_spec(
|
|
@@ -1173,7 +1184,7 @@ class IntermediateColumnarFormat(vcz.Source):
|
|
|
1173
1184
|
),
|
|
1174
1185
|
fixed_field_spec(
|
|
1175
1186
|
name="variant_id",
|
|
1176
|
-
dtype=
|
|
1187
|
+
dtype=STRING_DTYPE_NAME,
|
|
1177
1188
|
),
|
|
1178
1189
|
fixed_field_spec(
|
|
1179
1190
|
name="variant_id_mask",
|
|
@@ -1581,8 +1592,7 @@ def inspect(path):
|
|
|
1581
1592
|
raise ValueError(f"Path not found: {path}")
|
|
1582
1593
|
if (path / "metadata.json").exists():
|
|
1583
1594
|
obj = IntermediateColumnarFormat(path)
|
|
1584
|
-
|
|
1585
|
-
elif (path / ".zmetadata").exists():
|
|
1595
|
+
elif zarr_exists(path):
|
|
1586
1596
|
obj = vcz.VcfZarr(path)
|
|
1587
1597
|
else:
|
|
1588
1598
|
raise ValueError(f"{path} not in ICF or VCF Zarr format")
|
bio2zarr/vcz.py
CHANGED
|
@@ -284,7 +284,7 @@ class ZarrArraySpec:
|
|
|
284
284
|
for size in self.get_shape(schema)[1:]:
|
|
285
285
|
chunk_items *= size
|
|
286
286
|
dt = np.dtype(self.dtype)
|
|
287
|
-
if dt.kind ==
|
|
287
|
+
if dt.kind == zarr_utils.STRING_DTYPE_NAME and "samples" in self.dimensions:
|
|
288
288
|
logger.warning(
|
|
289
289
|
f"Field {self.name} is a string; max memory usage may "
|
|
290
290
|
"be a significant underestimate"
|
|
@@ -643,55 +643,60 @@ class VcfZarrWriter:
|
|
|
643
643
|
|
|
644
644
|
def encode_samples(self, root):
|
|
645
645
|
samples = self.source.samples
|
|
646
|
-
|
|
646
|
+
zarr_utils.create_group_array(
|
|
647
|
+
root,
|
|
647
648
|
"sample_id",
|
|
648
649
|
data=[sample.id for sample in samples],
|
|
649
650
|
shape=len(samples),
|
|
650
651
|
dtype="str",
|
|
651
652
|
compressor=DEFAULT_ZARR_COMPRESSOR,
|
|
652
653
|
chunks=(self.schema.get_chunks(["samples"])[0],),
|
|
654
|
+
dimension_names=["samples"],
|
|
653
655
|
)
|
|
654
|
-
array.attrs["_ARRAY_DIMENSIONS"] = ["samples"]
|
|
655
656
|
logger.debug("Samples done")
|
|
656
657
|
|
|
657
658
|
def encode_contigs(self, root):
|
|
658
659
|
contigs = self.source.contigs
|
|
659
|
-
|
|
660
|
+
zarr_utils.create_group_array(
|
|
661
|
+
root,
|
|
660
662
|
"contig_id",
|
|
661
663
|
data=[contig.id for contig in contigs],
|
|
662
664
|
shape=len(contigs),
|
|
663
665
|
dtype="str",
|
|
664
666
|
compressor=DEFAULT_ZARR_COMPRESSOR,
|
|
667
|
+
dimension_names=["contigs"],
|
|
665
668
|
)
|
|
666
|
-
array.attrs["_ARRAY_DIMENSIONS"] = ["contigs"]
|
|
667
669
|
if all(contig.length is not None for contig in contigs):
|
|
668
|
-
|
|
670
|
+
zarr_utils.create_group_array(
|
|
671
|
+
root,
|
|
669
672
|
"contig_length",
|
|
670
673
|
data=[contig.length for contig in contigs],
|
|
671
674
|
shape=len(contigs),
|
|
672
675
|
dtype=np.int64,
|
|
673
676
|
compressor=DEFAULT_ZARR_COMPRESSOR,
|
|
677
|
+
dimension_names=["contigs"],
|
|
674
678
|
)
|
|
675
|
-
array.attrs["_ARRAY_DIMENSIONS"] = ["contigs"]
|
|
676
679
|
|
|
677
680
|
def encode_filters(self, root):
|
|
678
681
|
filters = self.source.filters
|
|
679
|
-
|
|
682
|
+
zarr_utils.create_group_array(
|
|
683
|
+
root,
|
|
680
684
|
"filter_id",
|
|
681
685
|
data=[filt.id for filt in filters],
|
|
682
686
|
shape=len(filters),
|
|
683
687
|
dtype="str",
|
|
684
688
|
compressor=DEFAULT_ZARR_COMPRESSOR,
|
|
689
|
+
dimension_names=["filters"],
|
|
685
690
|
)
|
|
686
|
-
|
|
687
|
-
|
|
691
|
+
zarr_utils.create_group_array(
|
|
692
|
+
root,
|
|
688
693
|
"filter_description",
|
|
689
694
|
data=[filt.description for filt in filters],
|
|
690
695
|
shape=len(filters),
|
|
691
696
|
dtype="str",
|
|
692
697
|
compressor=DEFAULT_ZARR_COMPRESSOR,
|
|
698
|
+
dimension_names=["filters"],
|
|
693
699
|
)
|
|
694
|
-
array.attrs["_ARRAY_DIMENSIONS"] = ["filters"]
|
|
695
700
|
|
|
696
701
|
def init_array(self, root, schema, array_spec, variants_dim_size):
|
|
697
702
|
kwargs = dict(zarr_utils.ZARR_FORMAT_KWARGS)
|
|
@@ -707,34 +712,33 @@ class VcfZarrWriter:
|
|
|
707
712
|
else schema.defaults["compressor"]
|
|
708
713
|
)
|
|
709
714
|
compressor = numcodecs.get_codec(compressor)
|
|
710
|
-
if array_spec.dtype ==
|
|
715
|
+
if array_spec.dtype == zarr_utils.STRING_DTYPE_NAME:
|
|
711
716
|
if zarr_utils.zarr_v3():
|
|
712
717
|
filters = [*list(filters), numcodecs.VLenUTF8()]
|
|
713
718
|
else:
|
|
714
719
|
kwargs["object_codec"] = numcodecs.VLenUTF8()
|
|
715
720
|
|
|
716
|
-
if
|
|
721
|
+
if zarr_utils.zarr_v3():
|
|
722
|
+
# see https://github.com/zarr-developers/zarr-python/issues/3197
|
|
723
|
+
kwargs["fill_value"] = None
|
|
724
|
+
else:
|
|
717
725
|
kwargs["dimension_separator"] = self.metadata.dimension_separator
|
|
718
726
|
|
|
719
727
|
shape = schema.get_shape(array_spec.dimensions)
|
|
720
728
|
# Truncate the variants dimension if max_variant_chunks was specified
|
|
721
729
|
shape[0] = variants_dim_size
|
|
722
|
-
a =
|
|
730
|
+
a = zarr_utils.create_empty_group_array(
|
|
731
|
+
root,
|
|
723
732
|
name=array_spec.name,
|
|
724
733
|
shape=shape,
|
|
725
734
|
chunks=schema.get_chunks(array_spec.dimensions),
|
|
726
735
|
dtype=array_spec.dtype,
|
|
727
736
|
compressor=compressor,
|
|
728
737
|
filters=filters,
|
|
738
|
+
dimension_names=array_spec.dimensions,
|
|
729
739
|
**kwargs,
|
|
730
740
|
)
|
|
731
|
-
a.attrs.update(
|
|
732
|
-
{
|
|
733
|
-
"description": array_spec.description,
|
|
734
|
-
# Dimension names are part of the spec in Zarr v3
|
|
735
|
-
"_ARRAY_DIMENSIONS": array_spec.dimensions,
|
|
736
|
-
}
|
|
737
|
-
)
|
|
741
|
+
a.attrs.update({"description": array_spec.description})
|
|
738
742
|
logger.debug(f"Initialised {a}")
|
|
739
743
|
return a
|
|
740
744
|
|
|
@@ -977,19 +981,7 @@ class VcfZarrWriter:
|
|
|
977
981
|
if not src.exists():
|
|
978
982
|
# Needs test
|
|
979
983
|
raise ValueError(f"Partition {partition} of {name} does not exist")
|
|
980
|
-
|
|
981
|
-
# This is Zarr v2 specific. Chunks in v3 with start with "c" prefix.
|
|
982
|
-
chunk_files = [
|
|
983
|
-
path for path in src.iterdir() if not path.name.startswith(".")
|
|
984
|
-
]
|
|
985
|
-
# TODO check for a count of then number of files. If we require a
|
|
986
|
-
# dimension_separator of "/" then we could make stronger assertions
|
|
987
|
-
# here, as we'd always have num_variant_chunks
|
|
988
|
-
logger.debug(
|
|
989
|
-
f"Moving {len(chunk_files)} chunks for {name} partition {partition}"
|
|
990
|
-
)
|
|
991
|
-
for chunk_file in chunk_files:
|
|
992
|
-
os.rename(chunk_file, dest / chunk_file.name)
|
|
984
|
+
zarr_utils.move_chunks(src, self.arrays_path, partition, name)
|
|
993
985
|
# Finally, once all the chunks have moved into the arrays dir,
|
|
994
986
|
# we move it out of wip
|
|
995
987
|
os.rename(self.arrays_path / name, self.path / name)
|
|
@@ -1108,7 +1100,7 @@ class VcfZarrWriter:
|
|
|
1108
1100
|
|
|
1109
1101
|
class VcfZarr:
|
|
1110
1102
|
def __init__(self, path):
|
|
1111
|
-
if not (path
|
|
1103
|
+
if not zarr_utils.zarr_exists(path):
|
|
1112
1104
|
raise ValueError("Not in VcfZarr format") # NEEDS TEST
|
|
1113
1105
|
self.path = path
|
|
1114
1106
|
self.root = zarr.open(path, mode="r")
|
|
@@ -1129,7 +1121,7 @@ class VcfZarr:
|
|
|
1129
1121
|
"avg_chunk_stored": core.display_size(int(stored / array.nchunks)),
|
|
1130
1122
|
"shape": str(array.shape),
|
|
1131
1123
|
"chunk_shape": str(array.chunks),
|
|
1132
|
-
"compressor": str(array
|
|
1124
|
+
"compressor": str(zarr_utils.get_compressor(array)),
|
|
1133
1125
|
"filters": str(array.filters),
|
|
1134
1126
|
}
|
|
1135
1127
|
data.append(d)
|
|
@@ -1192,7 +1184,8 @@ class VcfZarrIndexer:
|
|
|
1192
1184
|
kwargs = {}
|
|
1193
1185
|
if not zarr_utils.zarr_v3():
|
|
1194
1186
|
kwargs["dimension_separator"] = "/"
|
|
1195
|
-
|
|
1187
|
+
zarr_utils.create_group_array(
|
|
1188
|
+
root,
|
|
1196
1189
|
"region_index",
|
|
1197
1190
|
data=index,
|
|
1198
1191
|
shape=index.shape,
|
|
@@ -1200,12 +1193,12 @@ class VcfZarrIndexer:
|
|
|
1200
1193
|
dtype=index.dtype,
|
|
1201
1194
|
compressor=numcodecs.Blosc("zstd", clevel=9, shuffle=0),
|
|
1202
1195
|
fill_value=None,
|
|
1196
|
+
dimension_names=[
|
|
1197
|
+
"region_index_values",
|
|
1198
|
+
"region_index_fields",
|
|
1199
|
+
],
|
|
1203
1200
|
**kwargs,
|
|
1204
1201
|
)
|
|
1205
|
-
array.attrs["_ARRAY_DIMENSIONS"] = [
|
|
1206
|
-
"region_index_values",
|
|
1207
|
-
"region_index_fields",
|
|
1208
|
-
]
|
|
1209
1202
|
|
|
1210
1203
|
logger.info("Consolidating Zarr metadata")
|
|
1211
1204
|
zarr.consolidate_metadata(self.path)
|
bio2zarr/zarr_utils.py
CHANGED
|
@@ -1,18 +1,185 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
1
4
|
import zarr
|
|
2
5
|
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
# Use zarr format v2 by default even when running with zarr-python v3
|
|
9
|
+
# NOTE: this interface was introduced for experimentation with zarr
|
|
10
|
+
# format 3 and is not envisaged as a long-term interface.
|
|
11
|
+
try:
|
|
12
|
+
ZARR_FORMAT = int(os.environ.get("BIO2ZARR_ZARR_FORMAT", "2"))
|
|
13
|
+
except Exception:
|
|
14
|
+
ZARR_FORMAT = 2
|
|
15
|
+
|
|
3
16
|
|
|
4
17
|
def zarr_v3() -> bool:
|
|
5
18
|
return zarr.__version__ >= "3"
|
|
6
19
|
|
|
7
20
|
|
|
8
21
|
if zarr_v3():
|
|
9
|
-
|
|
10
|
-
|
|
22
|
+
ZARR_FORMAT_KWARGS = dict(zarr_format=ZARR_FORMAT)
|
|
23
|
+
# In zarr-python v3 strings are stored as string arrays (T) with itemsize 16
|
|
24
|
+
STRING_DTYPE_NAME = "T"
|
|
25
|
+
STRING_ITEMSIZE = 16
|
|
11
26
|
else:
|
|
12
27
|
ZARR_FORMAT_KWARGS = dict()
|
|
28
|
+
# In zarr-python v2 strings are stored as object arrays (O) with itemsize 8
|
|
29
|
+
STRING_DTYPE_NAME = "O"
|
|
30
|
+
STRING_ITEMSIZE = 8
|
|
13
31
|
|
|
14
32
|
|
|
15
33
|
# See discussion in https://github.com/zarr-developers/zarr-python/issues/2529
|
|
16
34
|
def first_dim_iter(z):
|
|
17
35
|
for chunk in range(z.cdata_shape[0]):
|
|
18
36
|
yield from z.blocks[chunk]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def zarr_exists(path):
|
|
40
|
+
# NOTE: this is too strict, we should support more general Zarrs, see #276
|
|
41
|
+
return (path / ".zmetadata").exists() or (path / "zarr.json").exists()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def create_group_array(
|
|
45
|
+
group,
|
|
46
|
+
name,
|
|
47
|
+
*,
|
|
48
|
+
data,
|
|
49
|
+
shape,
|
|
50
|
+
dtype,
|
|
51
|
+
compressor=None,
|
|
52
|
+
dimension_names=None,
|
|
53
|
+
**kwargs,
|
|
54
|
+
):
|
|
55
|
+
"""Create an array within a group."""
|
|
56
|
+
if ZARR_FORMAT == 2:
|
|
57
|
+
array = group.array(
|
|
58
|
+
name,
|
|
59
|
+
data=data,
|
|
60
|
+
shape=shape,
|
|
61
|
+
dtype=dtype,
|
|
62
|
+
compressor=compressor,
|
|
63
|
+
**kwargs,
|
|
64
|
+
)
|
|
65
|
+
if dimension_names is not None:
|
|
66
|
+
array.attrs["_ARRAY_DIMENSIONS"] = dimension_names
|
|
67
|
+
return array
|
|
68
|
+
else:
|
|
69
|
+
new_kwargs = {**kwargs}
|
|
70
|
+
if compressor is not None:
|
|
71
|
+
compressors = [_convert_v2_compressor_to_v3_codec(compressor, dtype)]
|
|
72
|
+
# TODO: seems odd that we need to set this
|
|
73
|
+
new_kwargs["compressor"] = "auto"
|
|
74
|
+
new_kwargs["compressors"] = compressors
|
|
75
|
+
return group.array(
|
|
76
|
+
name,
|
|
77
|
+
data=data,
|
|
78
|
+
shape=shape,
|
|
79
|
+
dtype=dtype,
|
|
80
|
+
dimension_names=dimension_names,
|
|
81
|
+
**new_kwargs,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def create_empty_group_array(
|
|
86
|
+
group,
|
|
87
|
+
name,
|
|
88
|
+
*,
|
|
89
|
+
shape,
|
|
90
|
+
dtype,
|
|
91
|
+
chunks,
|
|
92
|
+
compressor=None,
|
|
93
|
+
filters=None,
|
|
94
|
+
dimension_names=None,
|
|
95
|
+
**kwargs,
|
|
96
|
+
):
|
|
97
|
+
"""Create an empty array within a group."""
|
|
98
|
+
if ZARR_FORMAT == 2:
|
|
99
|
+
array = group.empty(
|
|
100
|
+
name=name,
|
|
101
|
+
shape=shape,
|
|
102
|
+
dtype=dtype,
|
|
103
|
+
chunks=chunks,
|
|
104
|
+
compressor=compressor,
|
|
105
|
+
filters=filters,
|
|
106
|
+
**kwargs,
|
|
107
|
+
)
|
|
108
|
+
if dimension_names is not None:
|
|
109
|
+
array.attrs["_ARRAY_DIMENSIONS"] = dimension_names
|
|
110
|
+
return array
|
|
111
|
+
else:
|
|
112
|
+
new_kwargs = {**kwargs}
|
|
113
|
+
new_kwargs.pop("zarr_format")
|
|
114
|
+
if compressor is not None:
|
|
115
|
+
compressors = [_convert_v2_compressor_to_v3_codec(compressor, dtype)]
|
|
116
|
+
# TODO: seems odd that we need to set this
|
|
117
|
+
new_kwargs["compressor"] = "auto"
|
|
118
|
+
new_kwargs["compressors"] = compressors
|
|
119
|
+
return group.array(
|
|
120
|
+
name=name,
|
|
121
|
+
shape=shape,
|
|
122
|
+
dtype=dtype,
|
|
123
|
+
chunks=chunks,
|
|
124
|
+
dimension_names=dimension_names,
|
|
125
|
+
**new_kwargs,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def get_compressor(array):
|
|
130
|
+
try:
|
|
131
|
+
# zarr format v2: compressor (singular)
|
|
132
|
+
return array.compressor
|
|
133
|
+
except TypeError as e:
|
|
134
|
+
# zarr format v3: compressors (plural)
|
|
135
|
+
compressors = array.compressors
|
|
136
|
+
if len(compressors) > 1:
|
|
137
|
+
raise ValueError(
|
|
138
|
+
f"Only one compressor is supported but found {compressors}"
|
|
139
|
+
) from e
|
|
140
|
+
return compressors[0] if len(compressors) == 1 else None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def get_compressor_config(array):
|
|
144
|
+
compressor = get_compressor(array)
|
|
145
|
+
if hasattr(compressor, "get_config"):
|
|
146
|
+
return compressor.get_config()
|
|
147
|
+
else:
|
|
148
|
+
from zarr.codecs.blosc import BloscCodec
|
|
149
|
+
|
|
150
|
+
if isinstance(compressor, BloscCodec):
|
|
151
|
+
return compressor._blosc_codec.get_config()
|
|
152
|
+
else:
|
|
153
|
+
return compressor.as_dict()["configuration"]
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _convert_v2_compressor_to_v3_codec(compressor, dtype):
|
|
157
|
+
# import here since this is zarr-python v3 only
|
|
158
|
+
from zarr.core.dtype import parse_dtype
|
|
159
|
+
from zarr.metadata.migrate_v3 import _convert_compressor
|
|
160
|
+
|
|
161
|
+
return _convert_compressor(compressor, parse_dtype(dtype, zarr_format=3))
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def move_chunks(src_path, dest_path, partition, name):
|
|
165
|
+
if ZARR_FORMAT == 2:
|
|
166
|
+
dest = dest_path / name
|
|
167
|
+
chunk_files = [
|
|
168
|
+
path for path in src_path.iterdir() if not path.name.startswith(".")
|
|
169
|
+
]
|
|
170
|
+
else:
|
|
171
|
+
dest = dest_path / name / "c"
|
|
172
|
+
dest.mkdir(exist_ok=True)
|
|
173
|
+
src_chunks = src_path / "c"
|
|
174
|
+
if not src_chunks.exists():
|
|
175
|
+
chunk_files = []
|
|
176
|
+
else:
|
|
177
|
+
chunk_files = [
|
|
178
|
+
path for path in src_chunks.iterdir() if not path.name.startswith(".")
|
|
179
|
+
]
|
|
180
|
+
# TODO check for a count of then number of files. If we require a
|
|
181
|
+
# dimension_separator of "/" then we could make stronger assertions
|
|
182
|
+
# here, as we'd always have num_variant_chunks
|
|
183
|
+
logger.debug(f"Moving {len(chunk_files)} chunks for {name} partition {partition}")
|
|
184
|
+
for chunk_file in chunk_files:
|
|
185
|
+
os.rename(chunk_file, dest / chunk_file.name)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bio2zarr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: Convert bioinformatics data to Zarr
|
|
5
5
|
Author-email: sgkit Developers <project@sgkit.dev>
|
|
6
6
|
License: Apache License
|
|
@@ -219,11 +219,12 @@ Classifier: Programming Language :: Python :: 3
|
|
|
219
219
|
Classifier: Programming Language :: Python :: 3.10
|
|
220
220
|
Classifier: Programming Language :: Python :: 3.11
|
|
221
221
|
Classifier: Programming Language :: Python :: 3.12
|
|
222
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
222
223
|
Classifier: Topic :: Scientific/Engineering
|
|
223
224
|
Requires-Python: >=3.10
|
|
224
225
|
Description-Content-Type: text/markdown
|
|
225
226
|
License-File: LICENSE
|
|
226
|
-
Requires-Dist: numpy>=
|
|
227
|
+
Requires-Dist: numpy>=2
|
|
227
228
|
Requires-Dist: zarr<3,>=2.17
|
|
228
229
|
Requires-Dist: numcodecs[msgpack]!=0.14.0,!=0.14.1,<0.16
|
|
229
230
|
Requires-Dist: tabulate
|
|
@@ -240,22 +241,25 @@ Requires-Dist: pysam; extra == "dev"
|
|
|
240
241
|
Requires-Dist: pytest; extra == "dev"
|
|
241
242
|
Requires-Dist: pytest-coverage; extra == "dev"
|
|
242
243
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
243
|
-
Requires-Dist: sgkit>=0.8.0; extra == "dev"
|
|
244
244
|
Requires-Dist: tqdm; extra == "dev"
|
|
245
|
-
Requires-Dist: tskit>=
|
|
245
|
+
Requires-Dist: tskit>=1; extra == "dev"
|
|
246
246
|
Requires-Dist: bed_reader; extra == "dev"
|
|
247
247
|
Requires-Dist: cyvcf2; extra == "dev"
|
|
248
|
+
Requires-Dist: xarray<2025.03.1; extra == "dev"
|
|
249
|
+
Requires-Dist: dask[array]<=2024.8.0,>=2022.01.0; extra == "dev"
|
|
248
250
|
Provides-Extra: tskit
|
|
249
|
-
Requires-Dist: tskit>=
|
|
251
|
+
Requires-Dist: tskit>=1; extra == "tskit"
|
|
250
252
|
Provides-Extra: vcf
|
|
251
253
|
Requires-Dist: cyvcf2; extra == "vcf"
|
|
252
254
|
Provides-Extra: all
|
|
253
|
-
Requires-Dist: tskit>=
|
|
255
|
+
Requires-Dist: tskit>=1; extra == "all"
|
|
254
256
|
Requires-Dist: cyvcf2; extra == "all"
|
|
255
257
|
Dynamic: license-file
|
|
256
258
|
|
|
257
259
|
[](https://github.com/sgkit-dev/bio2zarr/actions/workflows/ci.yml)
|
|
258
260
|
[](https://coveralls.io/github/sgkit-dev/bio2zarr)
|
|
261
|
+
[](https://pepy.tech/projects/bio2zarr)
|
|
262
|
+
[](https://anaconda.org/bioconda/bio2zarr)
|
|
259
263
|
|
|
260
264
|
|
|
261
265
|
# bio2zarr
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
bio2zarr/__init__.py,sha256=KiUGyya-9RHNcBldB8Lc1g3rP3CRjaL-5Olben0_6qA,49
|
|
2
|
+
bio2zarr/__main__.py,sha256=4pF1IBO4CcswA_Fe7NmK_pqGOUHCwsd_8YU7dP92n9c,578
|
|
3
|
+
bio2zarr/_version.py,sha256=szvPIs2C82UunpzuvVg3MbF4QhzbBYTsVJ8DmPfq6_E,704
|
|
4
|
+
bio2zarr/cli.py,sha256=iHfmc-qU2roQXm9Bt3TyR2bmgH-2p3DqYosQERePMZ8,17873
|
|
5
|
+
bio2zarr/constants.py,sha256=QjbtFeBUZ-XqG35ZFIFj8EYrta_EwUkC2B5VGRP7oQs,425
|
|
6
|
+
bio2zarr/core.py,sha256=mYi2Vmh_YdNEd3weE0zZIPr7ToEUynq8nNCVvONVaqM,12140
|
|
7
|
+
bio2zarr/plink.py,sha256=ELGhsSdH1Xmxx6agCfTx1kYyntrU0XQ384wxTEn87BM,11717
|
|
8
|
+
bio2zarr/provenance.py,sha256=c_Z__QbWkLS0Rfa8D7LgEhtStng_zRMJX8comaDXIkw,142
|
|
9
|
+
bio2zarr/tskit.py,sha256=iLheNWtX7Pad1oNfijf6THMphzXwEtuQ6Zmi94pRZHg,10847
|
|
10
|
+
bio2zarr/typing.py,sha256=HdXNwIBEqYtGNwKyeUDQv6-H-pKSwNZO0qD2_VxTXEY,48
|
|
11
|
+
bio2zarr/vcf.py,sha256=3aXCdTAIuGoUmpbPIPVKhNj4oevkF0s_l7gRB0QmaPU,60738
|
|
12
|
+
bio2zarr/vcf_utils.py,sha256=xrsmxpu1xyXtl6FaYuU562WZP-iVUIaqzxD-11MHfAM,19541
|
|
13
|
+
bio2zarr/vcz.py,sha256=3IkcrAsQkWCiHiMBh0bbxzHtvX8qaUV3W84y1ojUWSs,42204
|
|
14
|
+
bio2zarr/vcz_verification.py,sha256=4YZZnAuMH-z9uPqAeBONdsZADz2MtY57D7RAbMa90yY,8119
|
|
15
|
+
bio2zarr/zarr_utils.py,sha256=4vE6CqnOLqZExc_7Z0jGGbA-kjqz9NPSqSBue10bzHk,5443
|
|
16
|
+
bio2zarr-0.1.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
17
|
+
bio2zarr-0.1.7.dist-info/METADATA,sha256=wXANeYEuZh41wH_nay96e4xobWhpBhL-BzkBcdGAR04,15736
|
|
18
|
+
bio2zarr-0.1.7.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
19
|
+
bio2zarr-0.1.7.dist-info/entry_points.txt,sha256=bbIbR8fWMGruyLaoCxO1O22nKidWKUzMgYbTYdsN6YQ,181
|
|
20
|
+
bio2zarr-0.1.7.dist-info/top_level.txt,sha256=ouAvp3u9N25eKrQbN8BCDLPcWWQLhtlgdHKu8AtEj5Q,9
|
|
21
|
+
bio2zarr-0.1.7.dist-info/RECORD,,
|
bio2zarr-0.1.6.dist-info/RECORD
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
bio2zarr/__init__.py,sha256=KiUGyya-9RHNcBldB8Lc1g3rP3CRjaL-5Olben0_6qA,49
|
|
2
|
-
bio2zarr/__main__.py,sha256=4pF1IBO4CcswA_Fe7NmK_pqGOUHCwsd_8YU7dP92n9c,578
|
|
3
|
-
bio2zarr/_version.py,sha256=ESbJO0YD7TYfOUv_WDIJJgWELGepEWsoyhqVifEcXPA,511
|
|
4
|
-
bio2zarr/cli.py,sha256=WrLfUyV6VggqtDAcI3c1S5YN62ZVOent5f9JzSkX_vA,17570
|
|
5
|
-
bio2zarr/constants.py,sha256=QjbtFeBUZ-XqG35ZFIFj8EYrta_EwUkC2B5VGRP7oQs,425
|
|
6
|
-
bio2zarr/core.py,sha256=mYi2Vmh_YdNEd3weE0zZIPr7ToEUynq8nNCVvONVaqM,12140
|
|
7
|
-
bio2zarr/plink.py,sha256=hkrgXKkxfExgOpgNkj0SszEh9qA8R3T6kXCd-4jsXO8,11498
|
|
8
|
-
bio2zarr/provenance.py,sha256=c_Z__QbWkLS0Rfa8D7LgEhtStng_zRMJX8comaDXIkw,142
|
|
9
|
-
bio2zarr/tskit.py,sha256=6YWbh8M3VJQtVpy2pD8x7Zf0jmc4HOIZwIlWcVaqjvU,10816
|
|
10
|
-
bio2zarr/typing.py,sha256=HdXNwIBEqYtGNwKyeUDQv6-H-pKSwNZO0qD2_VxTXEY,48
|
|
11
|
-
bio2zarr/vcf.py,sha256=_eQJm74YcKBfKDGM283ibhE40nUrkxO6Ee1giDfKjLg,60207
|
|
12
|
-
bio2zarr/vcf_utils.py,sha256=xrsmxpu1xyXtl6FaYuU562WZP-iVUIaqzxD-11MHfAM,19541
|
|
13
|
-
bio2zarr/vcz.py,sha256=yD2mvDZuzlAH73qPRVsUwqHSK-9HMdV4Vcif2JxfcCM,42610
|
|
14
|
-
bio2zarr/vcz_verification.py,sha256=4YZZnAuMH-z9uPqAeBONdsZADz2MtY57D7RAbMa90yY,8119
|
|
15
|
-
bio2zarr/zarr_utils.py,sha256=99J7ycaG92K_AcWRF2S9A4ec2_4cXL6kjYT99GBfli4,415
|
|
16
|
-
bio2zarr-0.1.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
17
|
-
bio2zarr-0.1.6.dist-info/METADATA,sha256=Me_jLTDVz76lOtidDs1gVrXnwU_rm4ARBpEz_Ozmt6U,15405
|
|
18
|
-
bio2zarr-0.1.6.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
19
|
-
bio2zarr-0.1.6.dist-info/entry_points.txt,sha256=bbIbR8fWMGruyLaoCxO1O22nKidWKUzMgYbTYdsN6YQ,181
|
|
20
|
-
bio2zarr-0.1.6.dist-info/top_level.txt,sha256=ouAvp3u9N25eKrQbN8BCDLPcWWQLhtlgdHKu8AtEj5Q,9
|
|
21
|
-
bio2zarr-0.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|