bio2zarr 0.0.10__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bio2zarr/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.0.10'
16
- __version_tuple__ = version_tuple = (0, 0, 10)
15
+ __version__ = version = '0.1.1'
16
+ __version_tuple__ = version_tuple = (0, 1, 1)
bio2zarr/cli.py CHANGED
@@ -44,7 +44,13 @@ zarr_path = click.argument(
44
44
  "zarr_path", type=click.Path(exists=True, file_okay=False, dir_okay=True)
45
45
  )
46
46
 
47
- num_partitions = click.argument("num_partitions", type=click.IntRange(min=1))
47
+ num_partitions = click.option(
48
+ "-n",
49
+ "--num-partitions",
50
+ type=click.IntRange(min=1),
51
+ default=None,
52
+ help="Target number of partitions to split into",
53
+ )
48
54
 
49
55
  partition = click.argument("partition", type=click.IntRange(min=0))
50
56
 
@@ -58,6 +64,13 @@ force = click.option(
58
64
  help="Force overwriting of existing directories",
59
65
  )
60
66
 
67
+ progress = click.option(
68
+ "-P /-Q",
69
+ "--progress/--no-progress",
70
+ default=True,
71
+ help="Show progress bars (default: show)",
72
+ )
73
+
61
74
  one_based = click.option(
62
75
  "--one-based",
63
76
  is_flag=True,
@@ -165,6 +178,15 @@ def check_overwrite_dir(path, force):
165
178
  shutil.rmtree(tmp_delete_path)
166
179
 
167
180
 
181
+ def check_partitions(num_partitions):
182
+ if num_partitions is None:
183
+ raise click.UsageError(
184
+ "-n/--num-partitions must currently be specified. Future versions "
185
+ "will provide reasonable defaults or other means of specifying "
186
+ "partitions."
187
+ )
188
+
189
+
168
190
  def get_compressor(cname):
169
191
  if cname is None:
170
192
  return None
@@ -190,9 +212,17 @@ def show_work_summary(work_summary, json):
190
212
  @verbose
191
213
  @column_chunk_size
192
214
  @compressor
215
+ @progress
193
216
  @worker_processes
194
217
  def explode(
195
- vcfs, icf_path, force, verbose, column_chunk_size, compressor, worker_processes
218
+ vcfs,
219
+ icf_path,
220
+ force,
221
+ verbose,
222
+ column_chunk_size,
223
+ compressor,
224
+ progress,
225
+ worker_processes,
196
226
  ):
197
227
  """
198
228
  Convert VCF(s) to intermediate columnar format
@@ -205,7 +235,7 @@ def explode(
205
235
  worker_processes=worker_processes,
206
236
  column_chunk_size=column_chunk_size,
207
237
  compressor=get_compressor(compressor),
208
- show_progress=True,
238
+ show_progress=progress,
209
239
  )
210
240
 
211
241
 
@@ -218,6 +248,7 @@ def explode(
218
248
  @compressor
219
249
  @json
220
250
  @verbose
251
+ @progress
221
252
  @worker_processes
222
253
  def dexplode_init(
223
254
  vcfs,
@@ -228,14 +259,16 @@ def dexplode_init(
228
259
  compressor,
229
260
  json,
230
261
  verbose,
262
+ progress,
231
263
  worker_processes,
232
264
  ):
233
265
  """
234
266
  Initial step for distributed conversion of VCF(s) to intermediate columnar format
235
- over the requested number of paritions.
267
+ over some number of paritions.
236
268
  """
237
269
  setup_logging(verbose)
238
270
  check_overwrite_dir(icf_path, force)
271
+ check_partitions(num_partitions)
239
272
  work_summary = vcf2zarr.explode_init(
240
273
  icf_path,
241
274
  vcfs,
@@ -243,7 +276,7 @@ def dexplode_init(
243
276
  column_chunk_size=column_chunk_size,
244
277
  worker_processes=worker_processes,
245
278
  compressor=get_compressor(compressor),
246
- show_progress=True,
279
+ show_progress=progress,
247
280
  )
248
281
  show_work_summary(work_summary, json)
249
282
 
@@ -310,6 +343,7 @@ def mkschema(icf_path):
310
343
  @samples_chunk_size
311
344
  @max_variant_chunks
312
345
  @max_memory
346
+ @progress
313
347
  @worker_processes
314
348
  def encode(
315
349
  icf_path,
@@ -321,6 +355,7 @@ def encode(
321
355
  samples_chunk_size,
322
356
  max_variant_chunks,
323
357
  max_memory,
358
+ progress,
324
359
  worker_processes,
325
360
  ):
326
361
  """
@@ -337,7 +372,7 @@ def encode(
337
372
  max_variant_chunks=max_variant_chunks,
338
373
  worker_processes=worker_processes,
339
374
  max_memory=max_memory,
340
- show_progress=True,
375
+ show_progress=progress,
341
376
  )
342
377
 
343
378
 
@@ -351,6 +386,7 @@ def encode(
351
386
  @samples_chunk_size
352
387
  @max_variant_chunks
353
388
  @json
389
+ @progress
354
390
  @verbose
355
391
  def dencode_init(
356
392
  icf_path,
@@ -362,12 +398,13 @@ def dencode_init(
362
398
  samples_chunk_size,
363
399
  max_variant_chunks,
364
400
  json,
401
+ progress,
365
402
  verbose,
366
403
  ):
367
404
  """
368
405
  Initialise conversion of intermediate format to VCF Zarr. This will
369
406
  set up the specified ZARR_PATH to perform this conversion over
370
- NUM_PARTITIONS.
407
+ some number of partitions.
371
408
 
372
409
  The output of this commmand is the actual number of partitions generated
373
410
  (which may be less then the requested number, if there is not sufficient
@@ -379,6 +416,7 @@ def dencode_init(
379
416
  """
380
417
  setup_logging(verbose)
381
418
  check_overwrite_dir(zarr_path, force)
419
+ check_partitions(num_partitions)
382
420
  work_summary = vcf2zarr.encode_init(
383
421
  icf_path,
384
422
  zarr_path,
@@ -387,7 +425,7 @@ def dencode_init(
387
425
  variants_chunk_size=variants_chunk_size,
388
426
  samples_chunk_size=samples_chunk_size,
389
427
  max_variant_chunks=max_variant_chunks,
390
- show_progress=True,
428
+ show_progress=progress,
391
429
  )
392
430
  show_work_summary(work_summary, json)
393
431
 
@@ -413,12 +451,13 @@ def dencode_partition(zarr_path, partition, verbose, one_based):
413
451
  @click.command
414
452
  @zarr_path
415
453
  @verbose
416
- def dencode_finalise(zarr_path, verbose):
454
+ @progress
455
+ def dencode_finalise(zarr_path, verbose, progress):
417
456
  """
418
457
  Final step for distributed conversion of ICF to VCF Zarr.
419
458
  """
420
459
  setup_logging(verbose)
421
- vcf2zarr.encode_finalise(zarr_path, show_progress=True)
460
+ vcf2zarr.encode_finalise(zarr_path, show_progress=progress)
422
461
 
423
462
 
424
463
  @click.command(name="convert")
@@ -428,6 +467,7 @@ def dencode_finalise(zarr_path, verbose):
428
467
  @variants_chunk_size
429
468
  @samples_chunk_size
430
469
  @verbose
470
+ @progress
431
471
  @worker_processes
432
472
  def convert_vcf(
433
473
  vcfs,
@@ -436,6 +476,7 @@ def convert_vcf(
436
476
  variants_chunk_size,
437
477
  samples_chunk_size,
438
478
  verbose,
479
+ progress,
439
480
  worker_processes,
440
481
  ):
441
482
  """
@@ -448,7 +489,7 @@ def convert_vcf(
448
489
  zarr_path,
449
490
  variants_chunk_size=variants_chunk_size,
450
491
  samples_chunk_size=samples_chunk_size,
451
- show_progress=True,
492
+ show_progress=progress,
452
493
  worker_processes=worker_processes,
453
494
  )
454
495
 
@@ -481,6 +522,7 @@ vcf2zarr_main.add_command(dencode_finalise)
481
522
  @click.argument("in_path", type=click.Path())
482
523
  @click.argument("zarr_path", type=click.Path())
483
524
  @worker_processes
525
+ @progress
484
526
  @verbose
485
527
  @variants_chunk_size
486
528
  @samples_chunk_size
@@ -489,6 +531,7 @@ def convert_plink(
489
531
  zarr_path,
490
532
  verbose,
491
533
  worker_processes,
534
+ progress,
492
535
  variants_chunk_size,
493
536
  samples_chunk_size,
494
537
  ):
@@ -499,7 +542,7 @@ def convert_plink(
499
542
  plink.convert(
500
543
  in_path,
501
544
  zarr_path,
502
- show_progress=True,
545
+ show_progress=progress,
503
546
  worker_processes=worker_processes,
504
547
  samples_chunk_size=samples_chunk_size,
505
548
  variants_chunk_size=variants_chunk_size,
@@ -517,11 +560,39 @@ plink2zarr.add_command(convert_plink)
517
560
 
518
561
  @click.command
519
562
  @version
520
- @click.argument("vcf_path", type=click.Path())
521
- @click.option("-i", "--index", type=click.Path(), default=None)
522
- @click.option("-n", "--num-parts", type=int, default=None)
523
- # @click.option("-s", "--part-size", type=int, default=None)
524
- def vcfpartition(vcf_path, index, num_parts):
525
- indexed_vcf = vcf_utils.IndexedVcf(vcf_path, index)
526
- regions = indexed_vcf.partition_into_regions(num_parts=num_parts)
527
- click.echo("\n".join(map(str, regions)))
563
+ @click.argument("vcf_path", type=click.Path(exists=True, dir_okay=False))
564
+ @verbose
565
+ @num_partitions
566
+ @click.option(
567
+ "-s",
568
+ "--partition-size",
569
+ type=str,
570
+ default=None,
571
+ help="Target (compressed) size of VCF partitions, e.g. 100KB, 10MiB, 1G.",
572
+ )
573
+ def vcfpartition(vcf_path, verbose, num_partitions, partition_size):
574
+ """
575
+ Output bcftools region strings that partition an indexed VCF/BCF file
576
+ into either an approximate number of parts (-n), or parts of approximately
577
+ a given size (-s). One of -n or -s must be supplied.
578
+
579
+ Note that both the number of partitions and sizes are a target, and the
580
+ returned number of partitions may not exactly correspond. In particular,
581
+ there is a maximum level of granularity determined by the associated index
582
+ which cannot be exceeded.
583
+
584
+ Note also that the partitions returned may vary considerably in the number
585
+ of records that they contain.
586
+ """
587
+ setup_logging(verbose)
588
+ if num_partitions is None and partition_size is None:
589
+ raise click.UsageError(
590
+ "Either --num-partitions or --partition-size must be specified"
591
+ )
592
+
593
+ indexed_vcf = vcf_utils.IndexedVcf(vcf_path)
594
+ regions = indexed_vcf.partition_into_regions(
595
+ num_parts=num_partitions, target_part_size=partition_size
596
+ )
597
+ for region in regions:
598
+ click.echo(f"{region}\t{vcf_path}")
bio2zarr/core.py CHANGED
@@ -7,8 +7,10 @@ import math
7
7
  import multiprocessing
8
8
  import os
9
9
  import os.path
10
+ import sys
10
11
  import threading
11
12
  import time
13
+ import warnings
12
14
 
13
15
  import humanfriendly
14
16
  import numcodecs
@@ -78,6 +80,11 @@ def du(path):
78
80
 
79
81
 
80
82
  class SynchronousExecutor(cf.Executor):
83
+ # Arguably we should use workers=0 as the default and use this
84
+ # executor implementation. However, the docs are fairly explicit
85
+ # about saying we shouldn't instantiate Future objects directly,
86
+ # so it's best to keep this as a semi-secret debugging interface
87
+ # for now.
81
88
  def submit(self, fn, /, *args, **kwargs):
82
89
  future = cf.Future()
83
90
  future.set_result(fn(*args, **kwargs))
@@ -191,8 +198,11 @@ _progress_counter = None
191
198
 
192
199
 
193
200
  def update_progress(inc):
194
- with _progress_counter.get_lock():
195
- _progress_counter.value += inc
201
+ # If the _progress_counter has not been set we are working in a
202
+ # synchronous non-progress tracking context
203
+ if _progress_counter is not None:
204
+ with _progress_counter.get_lock():
205
+ _progress_counter.value += inc
196
206
 
197
207
 
198
208
  def get_progress():
@@ -206,6 +216,22 @@ def setup_progress_counter(counter):
206
216
  _progress_counter = counter
207
217
 
208
218
 
219
+ def warn_py39_mac():
220
+ if sys.platform == "darwin" and sys.version_info[:2] == (3, 9):
221
+ warnings.warn(
222
+ "There is a known issue with bio2zarr on MacOS Python 3.9 "
223
+ "in which OS-level named semaphores are leaked. "
224
+ "You will also probably see warnings like 'There appear to be N "
225
+ "leaked semaphore objects at shutdown'. "
226
+ "While this is likely harmless for a few runs, it could lead to "
227
+ "issues if you do a lot of conversion. To get prevent this issue "
228
+ "either: (1) use --worker-processes=0 or (2) upgrade to a newer "
229
+ "Python version. See https://github.com/sgkit-dev/bio2zarr/issues/209 "
230
+ "for more details.",
231
+ stacklevel=2,
232
+ )
233
+
234
+
209
235
  class ParallelWorkManager(contextlib.AbstractContextManager):
210
236
  def __init__(self, worker_processes=1, progress_config=None):
211
237
  # Need to specify this explicitly to suppport Macs and
@@ -214,9 +240,11 @@ class ParallelWorkManager(contextlib.AbstractContextManager):
214
240
  global _progress_counter
215
241
  _progress_counter = ctx.Value("Q", 0)
216
242
  if worker_processes <= 0:
217
- # NOTE: this is only for testing, not for production use!
243
+ # NOTE: this is only for testing and debugging, not for
244
+ # production. See note on the SynchronousExecutor class.
218
245
  self.executor = SynchronousExecutor()
219
246
  else:
247
+ warn_py39_mac()
220
248
  self.executor = cf.ProcessPoolExecutor(
221
249
  max_workers=worker_processes,
222
250
  mp_context=ctx,
@@ -248,7 +276,6 @@ class ParallelWorkManager(contextlib.AbstractContextManager):
248
276
  def _update_progress(self):
249
277
  current = get_progress()
250
278
  inc = current - self.progress_bar.n
251
- # print("UPDATE PROGRESS: current = ", current, self.progress_config.total, inc)
252
279
  self.progress_bar.update(inc)
253
280
 
254
281
  def _update_progress_worker(self):
bio2zarr/vcf2zarr/icf.py CHANGED
@@ -263,9 +263,6 @@ def scan_vcf(path, target_num_partitions):
263
263
  )
264
264
 
265
265
  regions = indexed_vcf.partition_into_regions(num_parts=target_num_partitions)
266
- logger.info(
267
- f"Split {path} into {len(regions)} regions (target={target_num_partitions})"
268
- )
269
266
  for region in regions:
270
267
  metadata.partitions.append(
271
268
  VcfPartition(
@@ -275,6 +272,10 @@ def scan_vcf(path, target_num_partitions):
275
272
  region=region,
276
273
  )
277
274
  )
275
+ logger.info(
276
+ f"Split {path} into {len(metadata.partitions)} "
277
+ f"partitions target={target_num_partitions})"
278
+ )
278
279
  core.update_progress(1)
279
280
  return metadata, vcf.raw_header
280
281
 
bio2zarr/vcf2zarr/vcz.py CHANGED
@@ -34,7 +34,7 @@ DEFAULT_ZARR_COMPRESSOR = numcodecs.Blosc(cname="zstd", clevel=7)
34
34
 
35
35
 
36
36
  @dataclasses.dataclass
37
- class ZarrColumnSpec:
37
+ class ZarrArraySpec:
38
38
  name: str
39
39
  dtype: str
40
40
  shape: tuple
@@ -54,7 +54,7 @@ class ZarrColumnSpec:
54
54
 
55
55
  @staticmethod
56
56
  def new(**kwargs):
57
- spec = ZarrColumnSpec(
57
+ spec = ZarrArraySpec(
58
58
  **kwargs, compressor=DEFAULT_ZARR_COMPRESSOR.get_config(), filters=[]
59
59
  )
60
60
  spec._choose_compressor_settings()
@@ -94,7 +94,7 @@ class ZarrColumnSpec:
94
94
  dimensions.append("genotypes")
95
95
  else:
96
96
  dimensions.append(f"{vcf_field.category}_{vcf_field.name}_dim")
97
- return ZarrColumnSpec.new(
97
+ return ZarrArraySpec.new(
98
98
  vcf_field=vcf_field.full_name,
99
99
  name=variable_name,
100
100
  dtype=vcf_field.smallest_dtype(),
@@ -127,6 +127,23 @@ class ZarrColumnSpec:
127
127
 
128
128
  self.compressor["shuffle"] = shuffle
129
129
 
130
+ @property
131
+ def chunk_nbytes(self):
132
+ """
133
+ Returns the nbytes for a single chunk in this array.
134
+ """
135
+ items = 1
136
+ dim = 0
137
+ for chunk_size in self.chunks:
138
+ size = min(chunk_size, self.shape[dim])
139
+ items *= size
140
+ dim += 1
141
+ # Include sizes for extra dimensions.
142
+ for size in self.shape[dim:]:
143
+ items *= size
144
+ dt = np.dtype(self.dtype)
145
+ return items * dt.itemsize
146
+
130
147
  @property
131
148
  def variant_chunk_nbytes(self):
132
149
  """
@@ -157,6 +174,24 @@ class VcfZarrSchema(core.JsonDataclass):
157
174
  filters: list
158
175
  fields: list
159
176
 
177
+ def validate(self):
178
+ """
179
+ Checks that the schema is well-formed and within required limits.
180
+ """
181
+ for field in self.fields:
182
+ # This is the Blosc max buffer size
183
+ if field.chunk_nbytes > 2147483647:
184
+ # TODO add some links to documentation here advising how to
185
+ # deal with PL values.
186
+ raise ValueError(
187
+ f"Field {field.name} chunks are too large "
188
+ f"({field.chunk_nbytes} > 2**31 - 1 bytes). "
189
+ "Either generate a schema and drop this field (if you don't "
190
+ "need it) or reduce the variant or sample chunk sizes."
191
+ )
192
+ # TODO other checks? There must be lots of ways people could mess
193
+ # up the schema leading to cryptic errors.
194
+
160
195
  def field_map(self):
161
196
  return {field.name: field for field in self.fields}
162
197
 
@@ -171,7 +206,7 @@ class VcfZarrSchema(core.JsonDataclass):
171
206
  ret.samples = [icf.Sample(**sd) for sd in d["samples"]]
172
207
  ret.contigs = [icf.Contig(**sd) for sd in d["contigs"]]
173
208
  ret.filters = [icf.Filter(**sd) for sd in d["filters"]]
174
- ret.fields = [ZarrColumnSpec(**sd) for sd in d["fields"]]
209
+ ret.fields = [ZarrArraySpec(**sd) for sd in d["fields"]]
175
210
  return ret
176
211
 
177
212
  @staticmethod
@@ -192,7 +227,7 @@ class VcfZarrSchema(core.JsonDataclass):
192
227
  )
193
228
 
194
229
  def spec_from_field(field, variable_name=None):
195
- return ZarrColumnSpec.from_field(
230
+ return ZarrArraySpec.from_field(
196
231
  field,
197
232
  num_samples=n,
198
233
  num_variants=m,
@@ -204,7 +239,7 @@ class VcfZarrSchema(core.JsonDataclass):
204
239
  def fixed_field_spec(
205
240
  name, dtype, vcf_field=None, shape=(m,), dimensions=("variants",)
206
241
  ):
207
- return ZarrColumnSpec.new(
242
+ return ZarrArraySpec.new(
208
243
  vcf_field=vcf_field,
209
244
  name=name,
210
245
  dtype=dtype,
@@ -230,13 +265,13 @@ class VcfZarrSchema(core.JsonDataclass):
230
265
  ),
231
266
  fixed_field_spec(
232
267
  name="variant_allele",
233
- dtype="str",
268
+ dtype="O",
234
269
  shape=(m, max_alleles),
235
270
  dimensions=["variants", "alleles"],
236
271
  ),
237
272
  fixed_field_spec(
238
273
  name="variant_id",
239
- dtype="str",
274
+ dtype="O",
240
275
  ),
241
276
  fixed_field_spec(
242
277
  name="variant_id_mask",
@@ -267,7 +302,7 @@ class VcfZarrSchema(core.JsonDataclass):
267
302
  chunks = [variants_chunk_size, samples_chunk_size]
268
303
  dimensions = ["variants", "samples"]
269
304
  colspecs.append(
270
- ZarrColumnSpec.new(
305
+ ZarrArraySpec.new(
271
306
  vcf_field=None,
272
307
  name="call_genotype_phased",
273
308
  dtype="bool",
@@ -280,7 +315,7 @@ class VcfZarrSchema(core.JsonDataclass):
280
315
  shape += [ploidy]
281
316
  dimensions += ["ploidy"]
282
317
  colspecs.append(
283
- ZarrColumnSpec.new(
318
+ ZarrArraySpec.new(
284
319
  vcf_field=None,
285
320
  name="call_genotype",
286
321
  dtype=gt_field.smallest_dtype(),
@@ -291,7 +326,7 @@ class VcfZarrSchema(core.JsonDataclass):
291
326
  )
292
327
  )
293
328
  colspecs.append(
294
- ZarrColumnSpec.new(
329
+ ZarrArraySpec.new(
295
330
  vcf_field=None,
296
331
  name="call_genotype_mask",
297
332
  dtype="bool",
@@ -447,6 +482,7 @@ class VcfZarrWriter:
447
482
  self.icf = icf
448
483
  if self.path.exists():
449
484
  raise ValueError("Zarr path already exists") # NEEDS TEST
485
+ schema.validate()
450
486
  partitions = VcfZarrPartition.generate_partitions(
451
487
  self.icf.num_records,
452
488
  schema.variants_chunk_size,
bio2zarr/vcf_utils.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import contextlib
2
2
  import gzip
3
+ import logging
3
4
  import os
4
5
  import pathlib
5
6
  import struct
@@ -13,6 +14,8 @@ import numpy as np
13
14
 
14
15
  from bio2zarr.typing import PathType
15
16
 
17
+ logger = logging.getLogger(__name__)
18
+
16
19
  CSI_EXTENSION = ".csi"
17
20
  TABIX_EXTENSION = ".tbi"
18
21
  TABIX_LINEAR_INDEX_INTERVAL_SIZE = 1 << 14 # 16kb interval size
@@ -411,6 +414,7 @@ class IndexedVcf(contextlib.AbstractContextManager):
411
414
  raise ValueError("Only .tbi or .csi indexes are supported.")
412
415
  self.vcf = cyvcf2.VCF(vcf_path)
413
416
  self.vcf.set_index(str(self.index_path))
417
+ logger.debug(f"Loaded {vcf_path} with index {self.index_path}")
414
418
  self.sequence_names = None
415
419
  if self.index_type == "csi":
416
420
  # Determine the file-type based on the "aux" field.
@@ -450,15 +454,16 @@ class IndexedVcf(contextlib.AbstractContextManager):
450
454
  def _filter_empty_and_refine(self, regions):
451
455
  """
452
456
  Return all regions in the specified list that have one or more records,
453
- and refine the start coordinate of the region to be the actual first coord
457
+ and refine the start coordinate of the region to be the actual first coord.
458
+
459
+ Because this is a relatively expensive operation requiring seeking around
460
+ the file, we return the results as an iterator.
454
461
  """
455
- ret = []
456
462
  for region in regions:
457
463
  var = next(self.variants(region), None)
458
464
  if var is not None:
459
465
  region.start = var.POS
460
- ret.append(region)
461
- return ret
466
+ yield region
462
467
 
463
468
  def partition_into_regions(
464
469
  self,
@@ -490,7 +495,7 @@ class IndexedVcf(contextlib.AbstractContextManager):
490
495
  target_part_size_bytes = file_length // num_parts
491
496
  elif target_part_size_bytes is not None:
492
497
  num_parts = ceildiv(file_length, target_part_size_bytes)
493
- part_lengths = np.array([i * target_part_size_bytes for i in range(num_parts)])
498
+ part_lengths = target_part_size_bytes * np.arange(num_parts, dtype=int)
494
499
  file_offsets, region_contig_indexes, region_positions = self.index.offsets()
495
500
 
496
501
  # Search the file offsets to find which indexes the part lengths fall at
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bio2zarr
3
- Version: 0.0.10
3
+ Version: 0.1.1
4
4
  Summary: Convert bioinformatics data to Zarr
5
5
  Author-email: sgkit Developers <project@sgkit.dev>
6
6
  License: Apache License
@@ -207,7 +207,7 @@ License: Apache License
207
207
 
208
208
  Project-URL: repository, https://github.com/sgkit-dev/bio2zarr
209
209
  Project-URL: documentation, https://sgkit-dev.github.io/bio2zarr/
210
- Classifier: Development Status :: 3 - Alpha
210
+ Classifier: Development Status :: 4 - Beta
211
211
  Classifier: License :: OSI Approved :: Apache Software License
212
212
  Classifier: Operating System :: POSIX
213
213
  Classifier: Operating System :: POSIX :: Linux
@@ -223,8 +223,8 @@ Classifier: Topic :: Scientific/Engineering
223
223
  Requires-Python: >=3.9
224
224
  Description-Content-Type: text/markdown
225
225
  License-File: LICENSE
226
- Requires-Dist: numpy
227
- Requires-Dist: zarr >=2.17
226
+ Requires-Dist: numpy <2
227
+ Requires-Dist: zarr <3,>=2.17
228
228
  Requires-Dist: click
229
229
  Requires-Dist: tabulate
230
230
  Requires-Dist: tqdm
@@ -0,0 +1,20 @@
1
+ bio2zarr/__init__.py,sha256=KiUGyya-9RHNcBldB8Lc1g3rP3CRjaL-5Olben0_6qA,49
2
+ bio2zarr/__main__.py,sha256=wUKNNps8MAAEpMvLgVaI449eKyfr7Jpk2mMtYbNl4Ek,531
3
+ bio2zarr/_version.py,sha256=PKIMyjdUACH4-ONvtunQCnYE2UhlMfp9su83e3HXl5E,411
4
+ bio2zarr/cli.py,sha256=-6cU26n5f8CpBSj6RGC-fpNByjuJ0KxSFz85O9tITPg,14961
5
+ bio2zarr/constants.py,sha256=QjbtFeBUZ-XqG35ZFIFj8EYrta_EwUkC2B5VGRP7oQs,425
6
+ bio2zarr/core.py,sha256=Yd3Z6-mFI_neaxoWT6t6Tip0k1VZEcWbautHcJ0ep8Q,10486
7
+ bio2zarr/plink.py,sha256=huXMlxQ5C3gPmOYCavA-QW7PzaV48I2lo80cQqHT1wY,6768
8
+ bio2zarr/provenance.py,sha256=c_Z__QbWkLS0Rfa8D7LgEhtStng_zRMJX8comaDXIkw,142
9
+ bio2zarr/typing.py,sha256=BYxhL16sKRoNxa6amf6AYxvt5Ke9qzv2np_kOT_zPJo,79
10
+ bio2zarr/vcf_utils.py,sha256=R3bes-xYLZ4ekaxtqDd39YVV20qHmwei3XiIg1UFhRA,17996
11
+ bio2zarr/vcf2zarr/__init__.py,sha256=0_of1iGzIDhvti49Gbcgd47oP63mKvouk9uLgKgiwoQ,791
12
+ bio2zarr/vcf2zarr/icf.py,sha256=rIC35RIfkk5gEE8cOmBg1d9Pj-HkPivmGvYp4PrVN1Q,41589
13
+ bio2zarr/vcf2zarr/vcz.py,sha256=2WE4RX5jZBiKDFEztNGYgXyrLRmVWeLKlFzh0GOzylk,38198
14
+ bio2zarr/vcf2zarr/verification.py,sha256=6xcBy-cJLaQz2Qj2crffXFMjUG-H7z637Csxe5ZCmds,7898
15
+ bio2zarr-0.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
16
+ bio2zarr-0.1.1.dist-info/METADATA,sha256=RR9oM_5UYB5slsheIFzkIlRZt5du8eCb1_bMT_e7QjY,14854
17
+ bio2zarr-0.1.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
18
+ bio2zarr-0.1.1.dist-info/entry_points.txt,sha256=3adtRrClMpjatEbiYqK5bm9WHA2PaJN5hK-Cs_zkpaI,97
19
+ bio2zarr-0.1.1.dist-info/top_level.txt,sha256=ouAvp3u9N25eKrQbN8BCDLPcWWQLhtlgdHKu8AtEj5Q,9
20
+ bio2zarr-0.1.1.dist-info/RECORD,,
@@ -1,20 +0,0 @@
1
- bio2zarr/__init__.py,sha256=KiUGyya-9RHNcBldB8Lc1g3rP3CRjaL-5Olben0_6qA,49
2
- bio2zarr/__main__.py,sha256=wUKNNps8MAAEpMvLgVaI449eKyfr7Jpk2mMtYbNl4Ek,531
3
- bio2zarr/_version.py,sha256=IBUgg21Ew0JtWj9Z6eN1r4zXlrNseQQNV4zo-nYzlEY,413
4
- bio2zarr/cli.py,sha256=Bv4k9V-5HJVVbqBMiYLWz5IQyILQ0bTicqgkQrr9hd0,13209
5
- bio2zarr/constants.py,sha256=QjbtFeBUZ-XqG35ZFIFj8EYrta_EwUkC2B5VGRP7oQs,425
6
- bio2zarr/core.py,sha256=3UFh7nKB3CbAIaJV3wgoqlkRy1M235C2vz7Iua73qwM,9234
7
- bio2zarr/plink.py,sha256=huXMlxQ5C3gPmOYCavA-QW7PzaV48I2lo80cQqHT1wY,6768
8
- bio2zarr/provenance.py,sha256=c_Z__QbWkLS0Rfa8D7LgEhtStng_zRMJX8comaDXIkw,142
9
- bio2zarr/typing.py,sha256=BYxhL16sKRoNxa6amf6AYxvt5Ke9qzv2np_kOT_zPJo,79
10
- bio2zarr/vcf_utils.py,sha256=b3Ti1AFXFlK7S1mu6jotqHPrujCIQXBKIHH8yIzd3zk,17781
11
- bio2zarr/vcf2zarr/__init__.py,sha256=0_of1iGzIDhvti49Gbcgd47oP63mKvouk9uLgKgiwoQ,791
12
- bio2zarr/vcf2zarr/icf.py,sha256=BJKPJDeqP8QtVz7ebm6NQQgvsba8H-JLsNEz4whOxsw,41559
13
- bio2zarr/vcf2zarr/vcz.py,sha256=sy8VVYuOntMuPs5gUwQx6IA39_Gl_YFW2h-CeRyQw2A,36865
14
- bio2zarr/vcf2zarr/verification.py,sha256=6xcBy-cJLaQz2Qj2crffXFMjUG-H7z637Csxe5ZCmds,7898
15
- bio2zarr-0.0.10.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
16
- bio2zarr-0.0.10.dist-info/METADATA,sha256=7su1JbkFtR7eDjq2Rp5A8CjP9KnvwWaDS6bPH43Z2qI,14850
17
- bio2zarr-0.0.10.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
18
- bio2zarr-0.0.10.dist-info/entry_points.txt,sha256=3adtRrClMpjatEbiYqK5bm9WHA2PaJN5hK-Cs_zkpaI,97
19
- bio2zarr-0.0.10.dist-info/top_level.txt,sha256=ouAvp3u9N25eKrQbN8BCDLPcWWQLhtlgdHKu8AtEj5Q,9
20
- bio2zarr-0.0.10.dist-info/RECORD,,