bio2zarr 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bio2zarr might be problematic. Click here for more details.

bio2zarr/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.0.2'
16
- __version_tuple__ = version_tuple = (0, 0, 2)
15
+ __version__ = version = '0.0.3'
16
+ __version_tuple__ = version_tuple = (0, 0, 3)
bio2zarr/cli.py CHANGED
@@ -1,3 +1,8 @@
1
+ import logging
2
+ import os
3
+ import pathlib
4
+ import shutil
5
+
1
6
  import click
2
7
  import tabulate
3
8
  import coloredlogs
@@ -8,6 +13,9 @@ from . import plink
8
13
  from . import provenance
9
14
 
10
15
 
16
+ logger = logging.getLogger(__name__)
17
+
18
+
11
19
  class NaturalOrderGroup(click.Group):
12
20
  """
13
21
  List commands in the order they are provided in the help text.
@@ -18,8 +26,32 @@ class NaturalOrderGroup(click.Group):
18
26
 
19
27
 
20
28
  # Common arguments/options
29
+ vcfs = click.argument(
30
+ "vcfs", nargs=-1, required=True, type=click.Path(exists=True, dir_okay=False)
31
+ )
32
+
33
+ new_icf_path = click.argument(
34
+ "icf_path", type=click.Path(file_okay=False, dir_okay=True)
35
+ )
36
+
37
+ icf_path = click.argument(
38
+ "icf_path", type=click.Path(exists=True, file_okay=False, dir_okay=True)
39
+ )
40
+
41
+ new_zarr_path = click.argument(
42
+ "zarr_path", type=click.Path(file_okay=False, dir_okay=True)
43
+ )
44
+
21
45
  verbose = click.option("-v", "--verbose", count=True, help="Increase verbosity")
22
46
 
47
+ force = click.option(
48
+ "-f",
49
+ "--force",
50
+ is_flag=True,
51
+ flag_value=True,
52
+ help="Force overwriting of existing directories",
53
+ )
54
+
23
55
  version = click.version_option(version=f"{provenance.__version__}")
24
56
 
25
57
  worker_processes = click.option(
@@ -64,20 +96,39 @@ def setup_logging(verbosity):
64
96
  coloredlogs.install(level=level)
65
97
 
66
98
 
99
+ def check_overwrite_dir(path, force):
100
+ path = pathlib.Path(path)
101
+ if path.exists():
102
+ if not force:
103
+ click.confirm(
104
+ f"Do you want to overwrite {path}? (use --force to skip this check)",
105
+ abort=True,
106
+ )
107
+ # These trees can be mondo-big and on slow file systems, so it's entirely
108
+ # feasible that the delete would fail or be killed. This makes it less likely
109
+ # that partially deleted paths are mistaken for good paths.
110
+ tmp_delete_path = path.with_suffix(f"{path.suffix}.{os.getpid()}.DELETING")
111
+ logger.info(f"Deleting {path} (renamed to {tmp_delete_path} while in progress)")
112
+ os.rename(path, tmp_delete_path)
113
+ shutil.rmtree(tmp_delete_path)
114
+
115
+
67
116
  @click.command
68
- @click.argument("vcfs", nargs=-1, required=True)
69
- @click.argument("zarr_path", type=click.Path())
117
+ @vcfs
118
+ @new_icf_path
119
+ @force
70
120
  @verbose
71
121
  @worker_processes
72
122
  @column_chunk_size
73
- def explode(vcfs, zarr_path, verbose, worker_processes, column_chunk_size):
123
+ def explode(vcfs, icf_path, force, verbose, worker_processes, column_chunk_size):
74
124
  """
75
125
  Convert VCF(s) to intermediate columnar format
76
126
  """
77
127
  setup_logging(verbose)
128
+ check_overwrite_dir(icf_path, force)
78
129
  vcf.explode(
79
130
  vcfs,
80
- zarr_path,
131
+ icf_path,
81
132
  worker_processes=worker_processes,
82
133
  column_chunk_size=column_chunk_size,
83
134
  show_progress=True,
@@ -85,20 +136,22 @@ def explode(vcfs, zarr_path, verbose, worker_processes, column_chunk_size):
85
136
 
86
137
 
87
138
  @click.command
88
- @click.argument("vcfs", nargs=-1, required=True)
89
- @click.argument("icf_path", type=click.Path())
90
- @click.argument("num_partitions", type=int)
139
+ @vcfs
140
+ @new_icf_path
141
+ @click.argument("num_partitions", type=click.IntRange(min=1))
142
+ @force
91
143
  @column_chunk_size
92
144
  @verbose
93
145
  @worker_processes
94
146
  def dexplode_init(
95
- vcfs, icf_path, num_partitions, column_chunk_size, verbose, worker_processes
147
+ vcfs, icf_path, num_partitions, force, column_chunk_size, verbose, worker_processes
96
148
  ):
97
149
  """
98
- Initial step for parallel conversion of VCF(s) to intermediate columnar format
150
+ Initial step for distributed conversion of VCF(s) to intermediate columnar format
99
151
  over the requested number of paritions.
100
152
  """
101
153
  setup_logging(verbose)
154
+ check_overwrite_dir(icf_path, force)
102
155
  num_partitions = vcf.explode_init(
103
156
  icf_path,
104
157
  vcfs,
@@ -111,12 +164,12 @@ def dexplode_init(
111
164
 
112
165
 
113
166
  @click.command
114
- @click.argument("icf_path", type=click.Path())
115
- @click.argument("partition", type=int)
167
+ @icf_path
168
+ @click.argument("partition", type=click.IntRange(min=0))
116
169
  @verbose
117
170
  def dexplode_partition(icf_path, partition, verbose):
118
171
  """
119
- Convert a VCF partition into intermediate columnar format. Must be called *after*
172
+ Convert a VCF partition to intermediate columnar format. Must be called *after*
120
173
  the ICF path has been initialised with dexplode_init. Partition indexes must be
121
174
  from 0 (inclusive) to the number of paritions returned by dexplode_init (exclusive).
122
175
  """
@@ -129,26 +182,26 @@ def dexplode_partition(icf_path, partition, verbose):
129
182
  @verbose
130
183
  def dexplode_finalise(path, verbose):
131
184
  """
132
- Final step for parallel conversion of VCF(s) to intermediate columnar format
185
+ Final step for distributed conversion of VCF(s) to intermediate columnar format.
133
186
  """
134
187
  setup_logging(verbose)
135
188
  vcf.explode_finalise(path)
136
189
 
137
190
 
138
191
  @click.command
139
- @click.argument("icf_path", type=click.Path())
192
+ @click.argument("path", type=click.Path())
140
193
  @verbose
141
- def inspect(icf_path, verbose):
194
+ def inspect(path, verbose):
142
195
  """
143
- Inspect an intermediate format or Zarr path.
196
+ Inspect an intermediate columnar format or Zarr path.
144
197
  """
145
198
  setup_logging(verbose)
146
- data = vcf.inspect(icf_path)
199
+ data = vcf.inspect(path)
147
200
  click.echo(tabulate.tabulate(data, headers="keys"))
148
201
 
149
202
 
150
203
  @click.command
151
- @click.argument("icf_path", type=click.Path())
204
+ @icf_path
152
205
  def mkschema(icf_path):
153
206
  """
154
207
  Generate a schema for zarr encoding
@@ -158,8 +211,9 @@ def mkschema(icf_path):
158
211
 
159
212
 
160
213
  @click.command
161
- @click.argument("icf_path", type=click.Path())
162
- @click.argument("zarr_path", type=click.Path())
214
+ @icf_path
215
+ @new_zarr_path
216
+ @force
163
217
  @verbose
164
218
  @click.option("-s", "--schema", default=None, type=click.Path(exists=True))
165
219
  @variants_chunk_size
@@ -186,6 +240,7 @@ def mkschema(icf_path):
186
240
  def encode(
187
241
  icf_path,
188
242
  zarr_path,
243
+ force,
189
244
  verbose,
190
245
  schema,
191
246
  variants_chunk_size,
@@ -198,10 +253,11 @@ def encode(
198
253
  Encode intermediate columnar format (see explode) to vcfzarr.
199
254
  """
200
255
  setup_logging(verbose)
256
+ check_overwrite_dir(zarr_path, force)
201
257
  vcf.encode(
202
258
  icf_path,
203
259
  zarr_path,
204
- schema,
260
+ schema_path=schema,
205
261
  variants_chunk_size=variants_chunk_size,
206
262
  samples_chunk_size=samples_chunk_size,
207
263
  max_v_chunks=max_variant_chunks,
@@ -212,8 +268,8 @@ def encode(
212
268
 
213
269
 
214
270
  @click.command(name="convert")
215
- @click.argument("vcfs", nargs=-1, required=True)
216
- @click.argument("zarr_path", type=click.Path())
271
+ @vcfs
272
+ @new_zarr_path
217
273
  @variants_chunk_size
218
274
  @samples_chunk_size
219
275
  @verbose
@@ -235,17 +291,6 @@ def convert_vcf(
235
291
  )
236
292
 
237
293
 
238
- @click.command
239
- @click.argument("vcfs", nargs=-1, required=True)
240
- @click.argument("zarr_path", type=click.Path())
241
- def validate(vcfs, zarr_path):
242
- """
243
- Development only, do not use. Will be removed before release.
244
- """
245
- # FIXME! Will silently not look at remaining VCFs
246
- vcf.validate(vcfs[0], zarr_path, show_progress=True)
247
-
248
-
249
294
  @version
250
295
  @click.group(cls=NaturalOrderGroup)
251
296
  def vcf2zarr():
@@ -309,7 +354,6 @@ vcf2zarr.add_command(encode)
309
354
  vcf2zarr.add_command(dexplode_init)
310
355
  vcf2zarr.add_command(dexplode_partition)
311
356
  vcf2zarr.add_command(dexplode_finalise)
312
- vcf2zarr.add_command(validate)
313
357
 
314
358
 
315
359
  @click.command(name="convert")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bio2zarr
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: Convert bioinformatics data to Zarr
5
5
  Home-page: https://github.com/pystatgen/bio2zarr
6
6
  Author: sgkit Developers
@@ -1,16 +1,16 @@
1
1
  bio2zarr/__init__.py,sha256=yIJYx4GyKtOLOtODOX0kGCeGPYgQ-TBbsRdT1NwBpQQ,37
2
2
  bio2zarr/__main__.py,sha256=3cgaQ4x8YKXt-9xC2GLrHnS6UA38y1GXqttwZiBZJg4,525
3
- bio2zarr/_version.py,sha256=NDHlyIcJZjLz8wKlmD1-pr6me5FHBAYwO_ynLG-37N8,411
4
- bio2zarr/cli.py,sha256=rNgxpjIwpltEHj1NOpJtwLvGOA0etuxcqMXyNlPbCts,9882
3
+ bio2zarr/_version.py,sha256=hB095avW4HuDZxn8qPHRG1UMzSSonb8ZDAsLxt9hmk8,411
4
+ bio2zarr/cli.py,sha256=N_vEFj730p_TL7Dk9m9T3ceAhVV58BMYRDmBmoeKH7A,10766
5
5
  bio2zarr/core.py,sha256=sBlWmHjcb7tAn_7WQRBdrbGcEd_lT_3HTQ_JbzomVMg,8111
6
6
  bio2zarr/plink.py,sha256=llhfP-v44BVPvgCcwXktk0YrKaJSII63U_PTtpHlGtM,6755
7
7
  bio2zarr/provenance.py,sha256=c_Z__QbWkLS0Rfa8D7LgEhtStng_zRMJX8comaDXIkw,142
8
8
  bio2zarr/typing.py,sha256=wZ99Zzp5BD9Nqpd-S5bn38fSdPzfj6Z9IHPBfZqt9Gs,78
9
9
  bio2zarr/vcf.py,sha256=g2TqH9Lbp4Ds8kjOnjvHvoMAgnG6Kx8pKPN1bqBKKIQ,72201
10
10
  bio2zarr/vcf_utils.py,sha256=_kMZdpye15HGpniv8wwISw0L6NEEi54ZFaTcM83wLGs,16751
11
- bio2zarr-0.0.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
12
- bio2zarr-0.0.2.dist-info/METADATA,sha256=Uqirw85BARPHIZmkPJJKfWRKQgjhtQDDfH9wLJDoxj8,1106
13
- bio2zarr-0.0.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
14
- bio2zarr-0.0.2.dist-info/entry_points.txt,sha256=pklStOdATE5hHJm4qiIvmhHkcn21Si_XAu6MC7ieNrk,131
15
- bio2zarr-0.0.2.dist-info/top_level.txt,sha256=ouAvp3u9N25eKrQbN8BCDLPcWWQLhtlgdHKu8AtEj5Q,9
16
- bio2zarr-0.0.2.dist-info/RECORD,,
11
+ bio2zarr-0.0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
12
+ bio2zarr-0.0.3.dist-info/METADATA,sha256=dc2y5xrnkcvD1qmKGFL5GrsbM1_tiIlAYB2GrAlLunM,1106
13
+ bio2zarr-0.0.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
14
+ bio2zarr-0.0.3.dist-info/entry_points.txt,sha256=pklStOdATE5hHJm4qiIvmhHkcn21Si_XAu6MC7ieNrk,131
15
+ bio2zarr-0.0.3.dist-info/top_level.txt,sha256=ouAvp3u9N25eKrQbN8BCDLPcWWQLhtlgdHKu8AtEj5Q,9
16
+ bio2zarr-0.0.3.dist-info/RECORD,,