phc-ingestion 0.11.1__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,6 +44,7 @@ def check_manifest(manifest_file_path: str, case_id, log: Logger):
44
44
  Optional("plasmaTumorFraction"): str,
45
45
  Optional("cellPurity"): float,
46
46
  Optional("hrdStatus"): str,
47
+ Optional("sampleId"): str,
47
48
  }
48
49
  )
49
50
 
@@ -4,6 +4,8 @@ import re
4
4
  import os
5
5
  from typing import Iterator, Optional
6
6
 
7
+ import pysam
8
+
7
9
 
8
10
  def check_vcf(infile: str, log: Logger) -> None:
9
11
  log.info("Checking VCF file")
@@ -72,11 +74,35 @@ def write_vcf(
72
74
  ) -> int:
73
75
  log.info(f"Writing standardized VCF to {outfile}")
74
76
 
75
- with gzip.open(outfile, "wt") if compression else open(outfile, "w") as w:
76
- w.write("\n".join(headers) + "\n")
77
- for variant in variants_gen:
78
- line_count += 1
79
- if variant:
80
- w.write(variant + "\n")
77
+ if compression:
78
+ # Write uncompressed first, then compress with bgzip (indexable format)
79
+ # pysam.tabix_compress requires an uncompressed input file
80
+ # Create a temporary uncompressed file
81
+ if outfile.endswith(".gz"):
82
+ temp_uncompressed = outfile[:-3] # Remove .gz extension
83
+ else:
84
+ temp_uncompressed = outfile + ".tmp"
85
+
86
+ with open(temp_uncompressed, "w") as w:
87
+ w.write("\n".join(headers) + "\n")
88
+ for variant in variants_gen:
89
+ line_count += 1
90
+ if variant:
91
+ w.write(variant + "\n")
92
+
93
+ # Compress with bgzip (indexable format) using pysam
94
+ log.info(f"Compressing VCF with bgzip to {outfile}")
95
+ pysam.tabix_compress(temp_uncompressed, outfile, force=True)
96
+
97
+ # Clean up temporary uncompressed file
98
+ if os.path.exists(temp_uncompressed):
99
+ os.remove(temp_uncompressed)
100
+ else:
101
+ with open(outfile, "w") as w:
102
+ w.write("\n".join(headers) + "\n")
103
+ for variant in variants_gen:
104
+ line_count += 1
105
+ if variant:
106
+ w.write(variant + "\n")
81
107
 
82
108
  return line_count
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.11.1
3
+ Version: 1.0.1
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -13,6 +13,7 @@ Requires-Dist: natsort==7.1.1
13
13
  Requires-Dist: numpy>=2.1.2
14
14
  Requires-Dist: packaging>=23.1
15
15
  Requires-Dist: pandas>=2.2.3
16
+ Requires-Dist: pysam>=0.21.0
16
17
  Requires-Dist: ruamel.yaml==0.17.21
17
18
  Requires-Dist: schema>=0.7.5
18
19
  Requires-Dist: urllib3>=1.26.16
@@ -25,7 +25,7 @@ ingestion/foundation/util/interpretation.py,sha256=LVVUmMyD6Un1rIKXqiyQDUC6oIJUd
25
25
  ingestion/foundation/util/vcf_etl.py,sha256=GXV5JXswwdyHEEdPsM3Qq8tDPFkvZajrZn5chWgF53k,2266
26
26
  ingestion/generic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  ingestion/generic/process.py,sha256=ZaVnZ_gx9faDUsuresI1A0oCegTa-dPQT7DBFMeZGyY,1777
28
- ingestion/generic/utils.py,sha256=1MEIru7uq38IjUdL8lcHqDH0oTki9uWrz1f2e-pmRoU,2814
28
+ ingestion/generic/utils.py,sha256=YHzbmB0tP-aQjvY9vRSn8iOhailleYwvctDbB0IEKb8,2853
29
29
  ingestion/nebula/__init__.py,sha256=VauK-rup_N8ZXVohx3HYqHX_PE_WoPyMUhdv2R7al4o,45
30
30
  ingestion/nebula/constants.py,sha256=thKqSwemdaAwAmKvF4FEVI9l1Ph5ergsnMlx6nWte7E,357
31
31
  ingestion/nebula/manifest_assembler.py,sha256=kcRSy6pixHkuVEK9QSoM-i6ZdLWMSYXw39eKGHvam34,7995
@@ -59,7 +59,7 @@ ingestion/vcf_standardization/standardize.py,sha256=zYzZxncq8USA1bUs26L-ByLPTnUl
59
59
  ingestion/vcf_standardization/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
60
  ingestion/vcf_standardization/util/af_helpers.py,sha256=dpTzoeIQVeBRt0ETF3a9rp5ojZqznHg4x_hCZ8OPcOg,1061
61
61
  ingestion/vcf_standardization/util/dp_helpers.py,sha256=Nq8oLOLObu4_pv16qwwgpALRlUoJVCULrd9cFOD-eoI,823
62
- ingestion/vcf_standardization/util/read_write.py,sha256=x3Pf6Dq8tmolblbCS5CrNmrcHS3FGfqBSFpFgvFGC4g,2526
63
- phc_ingestion-0.11.1.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
64
- phc_ingestion-0.11.1.dist-info/METADATA,sha256=ucEZCziomM9ANPMV8jqSBT5Huigd0UownHOnSKWqKdc,677
65
- phc_ingestion-0.11.1.dist-info/RECORD,,
62
+ ingestion/vcf_standardization/util/read_write.py,sha256=xogLdqtm1xGzigY459LqP_1zM6c5X9AjAFGkfaDI-bg,3479
63
+ phc_ingestion-1.0.1.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
64
+ phc_ingestion-1.0.1.dist-info/METADATA,sha256=jvycAlRde4lUsP8O87F9a5bSF3fFINgtkMrr5FKsqWE,705
65
+ phc_ingestion-1.0.1.dist-info/RECORD,,