bio2zarr 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of bio2zarr might be problematic. Click here for more details.

bio2zarr/vcf_utils.py CHANGED
@@ -1,14 +1,14 @@
1
- from typing import IO, Any, Dict, Optional, Sequence, Union
2
1
  import contextlib
3
- import struct
4
- import pathlib
5
2
  import gzip
6
- from dataclasses import dataclass
7
3
  import os
4
+ import pathlib
5
+ import struct
6
+ from dataclasses import dataclass
7
+ from typing import IO, Any, Dict, Optional, Sequence, Union
8
8
 
9
- import numpy as np
10
9
  import cyvcf2
11
10
  import humanfriendly
11
+ import numpy as np
12
12
 
13
13
  from bio2zarr.typing import PathType
14
14
 
@@ -38,7 +38,8 @@ def read_bytes_as_value(f: IO[Any], fmt: str, nodata: Optional[Any] = None) -> A
38
38
  fmt : str
39
39
  A Python `struct` format string.
40
40
  nodata : Optional[Any], optional
41
- The value to return in case there is no further data in the stream, by default None
41
+ The value to return in case there is no further data in the stream,
42
+ by default None
42
43
 
43
44
  Returns
44
45
  -------
@@ -277,7 +278,8 @@ class TabixIndex:
277
278
  # Create file offsets for each element in the linear index
278
279
  file_offsets = np.array([get_file_offset(vfp) for vfp in linear_index])
279
280
 
280
- # Calculate corresponding contigs and positions or each element in the linear index
281
+ # Calculate corresponding contigs and positions or each element in
282
+ # the linear index
281
283
  contig_indexes = np.hstack(
282
284
  [np.full(len(li), i) for (i, li) in enumerate(linear_indexes)]
283
285
  )
@@ -433,6 +435,22 @@ class IndexedVcf(contextlib.AbstractContextManager):
433
435
  if var.POS >= start:
434
436
  yield var
435
437
 
438
+ def _filter_empty(self, regions):
439
+ """
440
+ Return all regions in the specified list that have one or more records.
441
+
442
+ Sometimes with Tabix indexes these seem to crop up:
443
+
444
+ - https://github.com/sgkit-dev/bio2zarr/issues/45
445
+ - https://github.com/sgkit-dev/bio2zarr/issues/120
446
+ """
447
+ ret = []
448
+ for region in regions:
449
+ variants = self.variants(region)
450
+ if next(variants, None) is not None:
451
+ ret.append(region)
452
+ return ret
453
+
436
454
  def partition_into_regions(
437
455
  self,
438
456
  num_parts: Optional[int] = None,
@@ -509,4 +527,4 @@ class IndexedVcf(contextlib.AbstractContextManager):
509
527
  if self.index.record_counts[ri] > 0:
510
528
  regions.append(Region(self.sequence_names[ri]))
511
529
 
512
- return regions
530
+ return self._filter_empty(regions)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bio2zarr
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: Convert bioinformatics data to Zarr
5
5
  Home-page: https://github.com/pystatgen/bio2zarr
6
6
  Author: sgkit Developers
@@ -0,0 +1,16 @@
1
+ bio2zarr/__init__.py,sha256=KiUGyya-9RHNcBldB8Lc1g3rP3CRjaL-5Olben0_6qA,49
2
+ bio2zarr/__main__.py,sha256=hO4vV-kPFgsYq0NQwG2r-WkserPL27oqae_tUvNB7yE,527
3
+ bio2zarr/_version.py,sha256=c6ZQWSJeXXzGZ3WoZWjkA-MiNkBFXMIRV9kZPo4MQ_M,411
4
+ bio2zarr/cli.py,sha256=iQIT3c_mos97st9bTLiBuxLL70MbNOtlTegUti6q2-g,14397
5
+ bio2zarr/core.py,sha256=E8MF5PUVulYtq5gaxaMWN7g5YeWPDueqlGCck3DJCuM,8516
6
+ bio2zarr/plink.py,sha256=huXMlxQ5C3gPmOYCavA-QW7PzaV48I2lo80cQqHT1wY,6768
7
+ bio2zarr/provenance.py,sha256=c_Z__QbWkLS0Rfa8D7LgEhtStng_zRMJX8comaDXIkw,142
8
+ bio2zarr/typing.py,sha256=BYxhL16sKRoNxa6amf6AYxvt5Ke9qzv2np_kOT_zPJo,79
9
+ bio2zarr/vcf.py,sha256=dJcaRCdlDlUxYE_Z3hQaqSy7EivfpihP78XSRMzlWbU,83330
10
+ bio2zarr/vcf_utils.py,sha256=r3NQXxWK1SYU7CcwDzSWXdX5Q8Ixk7gdCTEiFPzfUAk,17307
11
+ bio2zarr-0.0.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
12
+ bio2zarr-0.0.6.dist-info/METADATA,sha256=siwhDelRAqr4Zo4EAPqZ4c9SkMOsU9ixRtgALAROP9A,1077
13
+ bio2zarr-0.0.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
14
+ bio2zarr-0.0.6.dist-info/entry_points.txt,sha256=pklStOdATE5hHJm4qiIvmhHkcn21Si_XAu6MC7ieNrk,131
15
+ bio2zarr-0.0.6.dist-info/top_level.txt,sha256=ouAvp3u9N25eKrQbN8BCDLPcWWQLhtlgdHKu8AtEj5Q,9
16
+ bio2zarr-0.0.6.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- bio2zarr/__init__.py,sha256=yIJYx4GyKtOLOtODOX0kGCeGPYgQ-TBbsRdT1NwBpQQ,37
2
- bio2zarr/__main__.py,sha256=3cgaQ4x8YKXt-9xC2GLrHnS6UA38y1GXqttwZiBZJg4,525
3
- bio2zarr/_version.py,sha256=yBVOKdXLEcTVc7YV7ZPqRXhRDRt-pKrfXxcgHkgPY5g,411
4
- bio2zarr/cli.py,sha256=QE0DfoZHbBbxq9K_im9y4tJ49_Wss0zzavSjjz-85Xw,11484
5
- bio2zarr/core.py,sha256=tZb9exfFmuzbA8tUpPY8avSm9YvfH31-vUCTM4fpj78,8128
6
- bio2zarr/plink.py,sha256=llhfP-v44BVPvgCcwXktk0YrKaJSII63U_PTtpHlGtM,6755
7
- bio2zarr/provenance.py,sha256=c_Z__QbWkLS0Rfa8D7LgEhtStng_zRMJX8comaDXIkw,142
8
- bio2zarr/typing.py,sha256=wZ99Zzp5BD9Nqpd-S5bn38fSdPzfj6Z9IHPBfZqt9Gs,78
9
- bio2zarr/vcf.py,sha256=MEskVTDq4QntzoawPz0sfmInV0aPkIPLXXNv7GmVcmY,73870
10
- bio2zarr/vcf_utils.py,sha256=_kMZdpye15HGpniv8wwISw0L6NEEi54ZFaTcM83wLGs,16751
11
- bio2zarr-0.0.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
12
- bio2zarr-0.0.4.dist-info/METADATA,sha256=DISckjzZ0b6FpBTfBvpmJmEe00SIdTHyB3UTsTR8rws,1077
13
- bio2zarr-0.0.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
14
- bio2zarr-0.0.4.dist-info/entry_points.txt,sha256=pklStOdATE5hHJm4qiIvmhHkcn21Si_XAu6MC7ieNrk,131
15
- bio2zarr-0.0.4.dist-info/top_level.txt,sha256=ouAvp3u9N25eKrQbN8BCDLPcWWQLhtlgdHKu8AtEj5Q,9
16
- bio2zarr-0.0.4.dist-info/RECORD,,