bio2zarr 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bio2zarr might be problematic. Click here for more details.
- bio2zarr/_version.py +2 -2
- bio2zarr/cli.py +79 -35
- {bio2zarr-0.0.2.dist-info → bio2zarr-0.0.3.dist-info}/METADATA +1 -1
- {bio2zarr-0.0.2.dist-info → bio2zarr-0.0.3.dist-info}/RECORD +8 -8
- {bio2zarr-0.0.2.dist-info → bio2zarr-0.0.3.dist-info}/LICENSE +0 -0
- {bio2zarr-0.0.2.dist-info → bio2zarr-0.0.3.dist-info}/WHEEL +0 -0
- {bio2zarr-0.0.2.dist-info → bio2zarr-0.0.3.dist-info}/entry_points.txt +0 -0
- {bio2zarr-0.0.2.dist-info → bio2zarr-0.0.3.dist-info}/top_level.txt +0 -0
bio2zarr/_version.py
CHANGED
bio2zarr/cli.py
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import pathlib
|
|
4
|
+
import shutil
|
|
5
|
+
|
|
1
6
|
import click
|
|
2
7
|
import tabulate
|
|
3
8
|
import coloredlogs
|
|
@@ -8,6 +13,9 @@ from . import plink
|
|
|
8
13
|
from . import provenance
|
|
9
14
|
|
|
10
15
|
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
11
19
|
class NaturalOrderGroup(click.Group):
|
|
12
20
|
"""
|
|
13
21
|
List commands in the order they are provided in the help text.
|
|
@@ -18,8 +26,32 @@ class NaturalOrderGroup(click.Group):
|
|
|
18
26
|
|
|
19
27
|
|
|
20
28
|
# Common arguments/options
|
|
29
|
+
vcfs = click.argument(
|
|
30
|
+
"vcfs", nargs=-1, required=True, type=click.Path(exists=True, dir_okay=False)
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
new_icf_path = click.argument(
|
|
34
|
+
"icf_path", type=click.Path(file_okay=False, dir_okay=True)
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
icf_path = click.argument(
|
|
38
|
+
"icf_path", type=click.Path(exists=True, file_okay=False, dir_okay=True)
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
new_zarr_path = click.argument(
|
|
42
|
+
"zarr_path", type=click.Path(file_okay=False, dir_okay=True)
|
|
43
|
+
)
|
|
44
|
+
|
|
21
45
|
verbose = click.option("-v", "--verbose", count=True, help="Increase verbosity")
|
|
22
46
|
|
|
47
|
+
force = click.option(
|
|
48
|
+
"-f",
|
|
49
|
+
"--force",
|
|
50
|
+
is_flag=True,
|
|
51
|
+
flag_value=True,
|
|
52
|
+
help="Force overwriting of existing directories",
|
|
53
|
+
)
|
|
54
|
+
|
|
23
55
|
version = click.version_option(version=f"{provenance.__version__}")
|
|
24
56
|
|
|
25
57
|
worker_processes = click.option(
|
|
@@ -64,20 +96,39 @@ def setup_logging(verbosity):
|
|
|
64
96
|
coloredlogs.install(level=level)
|
|
65
97
|
|
|
66
98
|
|
|
99
|
+
def check_overwrite_dir(path, force):
|
|
100
|
+
path = pathlib.Path(path)
|
|
101
|
+
if path.exists():
|
|
102
|
+
if not force:
|
|
103
|
+
click.confirm(
|
|
104
|
+
f"Do you want to overwrite {path}? (use --force to skip this check)",
|
|
105
|
+
abort=True,
|
|
106
|
+
)
|
|
107
|
+
# These trees can be mondo-big and on slow file systems, so it's entirely
|
|
108
|
+
# feasible that the delete would fail or be killed. This makes it less likely
|
|
109
|
+
# that partially deleted paths are mistaken for good paths.
|
|
110
|
+
tmp_delete_path = path.with_suffix(f"{path.suffix}.{os.getpid()}.DELETING")
|
|
111
|
+
logger.info(f"Deleting {path} (renamed to {tmp_delete_path} while in progress)")
|
|
112
|
+
os.rename(path, tmp_delete_path)
|
|
113
|
+
shutil.rmtree(tmp_delete_path)
|
|
114
|
+
|
|
115
|
+
|
|
67
116
|
@click.command
|
|
68
|
-
@
|
|
69
|
-
@
|
|
117
|
+
@vcfs
|
|
118
|
+
@new_icf_path
|
|
119
|
+
@force
|
|
70
120
|
@verbose
|
|
71
121
|
@worker_processes
|
|
72
122
|
@column_chunk_size
|
|
73
|
-
def explode(vcfs,
|
|
123
|
+
def explode(vcfs, icf_path, force, verbose, worker_processes, column_chunk_size):
|
|
74
124
|
"""
|
|
75
125
|
Convert VCF(s) to intermediate columnar format
|
|
76
126
|
"""
|
|
77
127
|
setup_logging(verbose)
|
|
128
|
+
check_overwrite_dir(icf_path, force)
|
|
78
129
|
vcf.explode(
|
|
79
130
|
vcfs,
|
|
80
|
-
|
|
131
|
+
icf_path,
|
|
81
132
|
worker_processes=worker_processes,
|
|
82
133
|
column_chunk_size=column_chunk_size,
|
|
83
134
|
show_progress=True,
|
|
@@ -85,20 +136,22 @@ def explode(vcfs, zarr_path, verbose, worker_processes, column_chunk_size):
|
|
|
85
136
|
|
|
86
137
|
|
|
87
138
|
@click.command
|
|
88
|
-
@
|
|
89
|
-
@
|
|
90
|
-
@click.argument("num_partitions", type=
|
|
139
|
+
@vcfs
|
|
140
|
+
@new_icf_path
|
|
141
|
+
@click.argument("num_partitions", type=click.IntRange(min=1))
|
|
142
|
+
@force
|
|
91
143
|
@column_chunk_size
|
|
92
144
|
@verbose
|
|
93
145
|
@worker_processes
|
|
94
146
|
def dexplode_init(
|
|
95
|
-
vcfs, icf_path, num_partitions, column_chunk_size, verbose, worker_processes
|
|
147
|
+
vcfs, icf_path, num_partitions, force, column_chunk_size, verbose, worker_processes
|
|
96
148
|
):
|
|
97
149
|
"""
|
|
98
|
-
Initial step for
|
|
150
|
+
Initial step for distributed conversion of VCF(s) to intermediate columnar format
|
|
99
151
|
over the requested number of paritions.
|
|
100
152
|
"""
|
|
101
153
|
setup_logging(verbose)
|
|
154
|
+
check_overwrite_dir(icf_path, force)
|
|
102
155
|
num_partitions = vcf.explode_init(
|
|
103
156
|
icf_path,
|
|
104
157
|
vcfs,
|
|
@@ -111,12 +164,12 @@ def dexplode_init(
|
|
|
111
164
|
|
|
112
165
|
|
|
113
166
|
@click.command
|
|
114
|
-
@
|
|
115
|
-
@click.argument("partition", type=
|
|
167
|
+
@icf_path
|
|
168
|
+
@click.argument("partition", type=click.IntRange(min=0))
|
|
116
169
|
@verbose
|
|
117
170
|
def dexplode_partition(icf_path, partition, verbose):
|
|
118
171
|
"""
|
|
119
|
-
Convert a VCF partition
|
|
172
|
+
Convert a VCF partition to intermediate columnar format. Must be called *after*
|
|
120
173
|
the ICF path has been initialised with dexplode_init. Partition indexes must be
|
|
121
174
|
from 0 (inclusive) to the number of paritions returned by dexplode_init (exclusive).
|
|
122
175
|
"""
|
|
@@ -129,26 +182,26 @@ def dexplode_partition(icf_path, partition, verbose):
|
|
|
129
182
|
@verbose
|
|
130
183
|
def dexplode_finalise(path, verbose):
|
|
131
184
|
"""
|
|
132
|
-
Final step for
|
|
185
|
+
Final step for distributed conversion of VCF(s) to intermediate columnar format.
|
|
133
186
|
"""
|
|
134
187
|
setup_logging(verbose)
|
|
135
188
|
vcf.explode_finalise(path)
|
|
136
189
|
|
|
137
190
|
|
|
138
191
|
@click.command
|
|
139
|
-
@click.argument("
|
|
192
|
+
@click.argument("path", type=click.Path())
|
|
140
193
|
@verbose
|
|
141
|
-
def inspect(
|
|
194
|
+
def inspect(path, verbose):
|
|
142
195
|
"""
|
|
143
|
-
Inspect an intermediate format or Zarr path.
|
|
196
|
+
Inspect an intermediate columnar format or Zarr path.
|
|
144
197
|
"""
|
|
145
198
|
setup_logging(verbose)
|
|
146
|
-
data = vcf.inspect(
|
|
199
|
+
data = vcf.inspect(path)
|
|
147
200
|
click.echo(tabulate.tabulate(data, headers="keys"))
|
|
148
201
|
|
|
149
202
|
|
|
150
203
|
@click.command
|
|
151
|
-
@
|
|
204
|
+
@icf_path
|
|
152
205
|
def mkschema(icf_path):
|
|
153
206
|
"""
|
|
154
207
|
Generate a schema for zarr encoding
|
|
@@ -158,8 +211,9 @@ def mkschema(icf_path):
|
|
|
158
211
|
|
|
159
212
|
|
|
160
213
|
@click.command
|
|
161
|
-
@
|
|
162
|
-
@
|
|
214
|
+
@icf_path
|
|
215
|
+
@new_zarr_path
|
|
216
|
+
@force
|
|
163
217
|
@verbose
|
|
164
218
|
@click.option("-s", "--schema", default=None, type=click.Path(exists=True))
|
|
165
219
|
@variants_chunk_size
|
|
@@ -186,6 +240,7 @@ def mkschema(icf_path):
|
|
|
186
240
|
def encode(
|
|
187
241
|
icf_path,
|
|
188
242
|
zarr_path,
|
|
243
|
+
force,
|
|
189
244
|
verbose,
|
|
190
245
|
schema,
|
|
191
246
|
variants_chunk_size,
|
|
@@ -198,10 +253,11 @@ def encode(
|
|
|
198
253
|
Encode intermediate columnar format (see explode) to vcfzarr.
|
|
199
254
|
"""
|
|
200
255
|
setup_logging(verbose)
|
|
256
|
+
check_overwrite_dir(zarr_path, force)
|
|
201
257
|
vcf.encode(
|
|
202
258
|
icf_path,
|
|
203
259
|
zarr_path,
|
|
204
|
-
schema,
|
|
260
|
+
schema_path=schema,
|
|
205
261
|
variants_chunk_size=variants_chunk_size,
|
|
206
262
|
samples_chunk_size=samples_chunk_size,
|
|
207
263
|
max_v_chunks=max_variant_chunks,
|
|
@@ -212,8 +268,8 @@ def encode(
|
|
|
212
268
|
|
|
213
269
|
|
|
214
270
|
@click.command(name="convert")
|
|
215
|
-
@
|
|
216
|
-
@
|
|
271
|
+
@vcfs
|
|
272
|
+
@new_zarr_path
|
|
217
273
|
@variants_chunk_size
|
|
218
274
|
@samples_chunk_size
|
|
219
275
|
@verbose
|
|
@@ -235,17 +291,6 @@ def convert_vcf(
|
|
|
235
291
|
)
|
|
236
292
|
|
|
237
293
|
|
|
238
|
-
@click.command
|
|
239
|
-
@click.argument("vcfs", nargs=-1, required=True)
|
|
240
|
-
@click.argument("zarr_path", type=click.Path())
|
|
241
|
-
def validate(vcfs, zarr_path):
|
|
242
|
-
"""
|
|
243
|
-
Development only, do not use. Will be removed before release.
|
|
244
|
-
"""
|
|
245
|
-
# FIXME! Will silently not look at remaining VCFs
|
|
246
|
-
vcf.validate(vcfs[0], zarr_path, show_progress=True)
|
|
247
|
-
|
|
248
|
-
|
|
249
294
|
@version
|
|
250
295
|
@click.group(cls=NaturalOrderGroup)
|
|
251
296
|
def vcf2zarr():
|
|
@@ -309,7 +354,6 @@ vcf2zarr.add_command(encode)
|
|
|
309
354
|
vcf2zarr.add_command(dexplode_init)
|
|
310
355
|
vcf2zarr.add_command(dexplode_partition)
|
|
311
356
|
vcf2zarr.add_command(dexplode_finalise)
|
|
312
|
-
vcf2zarr.add_command(validate)
|
|
313
357
|
|
|
314
358
|
|
|
315
359
|
@click.command(name="convert")
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
bio2zarr/__init__.py,sha256=yIJYx4GyKtOLOtODOX0kGCeGPYgQ-TBbsRdT1NwBpQQ,37
|
|
2
2
|
bio2zarr/__main__.py,sha256=3cgaQ4x8YKXt-9xC2GLrHnS6UA38y1GXqttwZiBZJg4,525
|
|
3
|
-
bio2zarr/_version.py,sha256=
|
|
4
|
-
bio2zarr/cli.py,sha256=
|
|
3
|
+
bio2zarr/_version.py,sha256=hB095avW4HuDZxn8qPHRG1UMzSSonb8ZDAsLxt9hmk8,411
|
|
4
|
+
bio2zarr/cli.py,sha256=N_vEFj730p_TL7Dk9m9T3ceAhVV58BMYRDmBmoeKH7A,10766
|
|
5
5
|
bio2zarr/core.py,sha256=sBlWmHjcb7tAn_7WQRBdrbGcEd_lT_3HTQ_JbzomVMg,8111
|
|
6
6
|
bio2zarr/plink.py,sha256=llhfP-v44BVPvgCcwXktk0YrKaJSII63U_PTtpHlGtM,6755
|
|
7
7
|
bio2zarr/provenance.py,sha256=c_Z__QbWkLS0Rfa8D7LgEhtStng_zRMJX8comaDXIkw,142
|
|
8
8
|
bio2zarr/typing.py,sha256=wZ99Zzp5BD9Nqpd-S5bn38fSdPzfj6Z9IHPBfZqt9Gs,78
|
|
9
9
|
bio2zarr/vcf.py,sha256=g2TqH9Lbp4Ds8kjOnjvHvoMAgnG6Kx8pKPN1bqBKKIQ,72201
|
|
10
10
|
bio2zarr/vcf_utils.py,sha256=_kMZdpye15HGpniv8wwISw0L6NEEi54ZFaTcM83wLGs,16751
|
|
11
|
-
bio2zarr-0.0.
|
|
12
|
-
bio2zarr-0.0.
|
|
13
|
-
bio2zarr-0.0.
|
|
14
|
-
bio2zarr-0.0.
|
|
15
|
-
bio2zarr-0.0.
|
|
16
|
-
bio2zarr-0.0.
|
|
11
|
+
bio2zarr-0.0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
12
|
+
bio2zarr-0.0.3.dist-info/METADATA,sha256=dc2y5xrnkcvD1qmKGFL5GrsbM1_tiIlAYB2GrAlLunM,1106
|
|
13
|
+
bio2zarr-0.0.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
14
|
+
bio2zarr-0.0.3.dist-info/entry_points.txt,sha256=pklStOdATE5hHJm4qiIvmhHkcn21Si_XAu6MC7ieNrk,131
|
|
15
|
+
bio2zarr-0.0.3.dist-info/top_level.txt,sha256=ouAvp3u9N25eKrQbN8BCDLPcWWQLhtlgdHKu8AtEj5Q,9
|
|
16
|
+
bio2zarr-0.0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|