phc-ingestion 0.8.33__py3-none-any.whl → 0.8.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,9 @@ from logging import Logger
2
2
 
3
3
 
4
4
  def map_interpretation(status: str, log: Logger):
5
+ """
6
+ Map interpretation for structural and copy number variants
7
+ """
5
8
  if status == "Pathogenic":
6
9
  return "Pathogenic"
7
10
  elif "VUS" in status:
@@ -9,3 +12,17 @@ def map_interpretation(status: str, log: Logger):
9
12
  else:
10
13
  log.error(f"Failed to resolve interpretation: {status}")
11
14
  return ""
15
+
16
+
17
+ def map_vendsig(vendsig: str) -> str:
18
+ """
19
+ Map vendor significance for short variants
20
+ """
21
+ if vendsig in ["Pathogenic"]:
22
+ return "VENDSIG=Pathogenic"
23
+ elif vendsig in ["Likely Pathogenic", "LikelyPathogenic"]:
24
+ return "VENDSIG=Likely pathogenic"
25
+ elif vendsig in ["VUS"]:
26
+ return "VENDSIG=Uncertain significance"
27
+ else:
28
+ raise RuntimeError(f"Unable to map vendor significance: {vendsig}")
@@ -14,7 +14,7 @@ nextgen_specific_genes_with_location: list[GeneWithLocation] = [
14
14
  {"gene": "CCND3", "chr": "chr6", "start": 41920534, "end": 42562008},
15
15
  {"gene": "MYC", "chr": "chr8", "start": 125309416, "end": 129673293},
16
16
  {"gene": "CCND1", "chr": "chr11", "start": 69090733, "end": 69656860},
17
- {"gene": "IGH", "chr": "chr14", "start": 105516968, "end": 109902208},
17
+ {"gene": "IGH", "chr": "chr14", "start": 105325507, "end": 109902208},
18
18
  {"gene": "MAF", "chr": "chr16", "start": 78428398, "end": 79615096},
19
19
  {"gene": "MAFB", "chr": "chr20", "start": 39039005, "end": 40688948},
20
20
  {"gene": "IGL", "chr": "chr22", "start": 22012552, "end": 22965858},
@@ -65,7 +65,7 @@ def get_cell_purity(interpretation_lines: list):
65
65
  return float(00.00)
66
66
 
67
67
 
68
- def extract_patient_data(patient_info_lines: list):
68
+ def extract_patient_data(patient_info_lines: list[str]):
69
69
  patient_data: dict = {}
70
70
  patient_data["patientInfo"] = {}
71
71
 
@@ -5,6 +5,7 @@ from logging import Logger
5
5
  from typing import Literal
6
6
 
7
7
  from ingestion.nextgen.util.alteration_table import AlterationTableRow, ShortVariantGene
8
+ from ingestion.nextgen.util.interpretation import map_vendsig
8
9
 
9
10
  SequenceType = Literal["somatic", "germline"]
10
11
 
@@ -204,17 +205,6 @@ def process_vcf(
204
205
  return {"vcf_path_name": vcf_path, "vcf_line_count": line_count}
205
206
 
206
207
 
207
- def map_vendsig(vendsig: str) -> str:
208
- if vendsig in ["Pathogenic"]:
209
- return "VENDSIG=Pathogenic"
210
- elif vendsig in ["Likely Pathogenic", "LikelyPathogenic"]:
211
- return "VENDSIG=Likely pathogenic"
212
- elif vendsig in ["VUS"]:
213
- return "VENDSIG=Uncertain significance"
214
- else:
215
- raise RuntimeError(f"Unable to map vendor significance: {vendsig}")
216
-
217
-
218
208
  def add_vendsig_to_info(
219
209
  info: str,
220
210
  short_variant_table_rows: list[AlterationTableRow[ShortVariantGene]],
@@ -18,7 +18,7 @@ class Variant:
18
18
  self.info = {x.split("=")[0]: x.split("=")[1] for x in fields[7].split(";") if "=" in x}
19
19
  self.frmt = fields[8].split(":")
20
20
  self.smpl = fields[9].split(":")
21
- self.ad_af_dp = {"AD": False, "AF": False, "DP": False}
21
+ self.ad_af_dp: dict[str, bool | str] = {"AD": False, "AF": False, "DP": False}
22
22
 
23
23
  def standardize_allele_frequency(self, log):
24
24
  # Detect if allele frequency is present either in the INFO or FORMAT/SAMPLE fields
@@ -131,19 +131,19 @@ class Variant:
131
131
  )
132
132
  return updated_variant
133
133
 
134
+ @classmethod
135
+ def check_formatting(cls, var: str):
136
+ # Loose formatting check, return as Variant class object
137
+ split_var = var.split("\t")
138
+ if len(split_var) < 8 or not split_var[1].isdigit():
139
+ raise RuntimeError(f"Variant contains incorrect number, or invalid fields: {var}")
134
140
 
135
- def check_formatting(var: str) -> Variant:
136
- # Loose formatting check, return as Variant class object
137
- split_var = var.split("\t")
138
- if len(split_var) < 8 or not split_var[1].isdigit():
139
- raise RuntimeError(f"Variant contains incorrect number, or invalid fields: {var}")
141
+ if len(split_var) == 8:
142
+ split_var.append(".") # Add placeholder for FORMAT
143
+ split_var.append(".") # Add placeholder for SAMPLE
140
144
 
141
- if len(split_var) == 8:
142
- split_var.append(".") # Add placeholder for FORMAT
143
- split_var.append(".") # Add placeholder for SAMPLE
145
+ elif len(split_var) == 9:
146
+ split_var.append(".") # Add placeholder for SAMPLE
144
147
 
145
- elif len(split_var) == 9:
146
- split_var.append(".") # Add placeholder for SAMPLE
147
-
148
- working_variant = Variant(split_var)
149
- return working_variant
148
+ working_variant = cls(split_var)
149
+ return working_variant
@@ -9,15 +9,15 @@ from ingestion.vcf_standardization.util.read_write import (
9
9
  read_headers,
10
10
  read_variants,
11
11
  )
12
- from ingestion.vcf_standardization.Variant import check_formatting
12
+ from ingestion.vcf_standardization.Variant import Variant
13
13
 
14
14
 
15
- def format_variant(variant: str, log: Logger, vendsig_dict: dict = None) -> Optional[str]:
15
+ def format_variant(variant: str, log: Logger, vendsig_dict: dict | None = None) -> Optional[str]:
16
16
  # Ignore structural variants
17
17
  if "SVTYPE" in variant:
18
18
  return None
19
19
  # Working variant
20
- wv = check_formatting(variant)
20
+ wv = Variant.check_formatting(variant)
21
21
 
22
22
  # Only process variants that aren't multiallelic
23
23
  if len(wv.alt.split(",")) == 1:
@@ -48,7 +48,7 @@ def standardize_vcf(
48
48
  out_path: str,
49
49
  case_id: str,
50
50
  log: Logger,
51
- vendsig_dict: dict = None,
51
+ vendsig_dict: dict | None = None,
52
52
  compression: bool = False,
53
53
  ) -> Optional[int]:
54
54
  check_vcf(infile, log)
@@ -1,10 +1,11 @@
1
1
  import gzip
2
+ from logging import Logger
2
3
  import re
3
4
  import os
4
5
  from typing import Iterator, Optional
5
6
 
6
7
 
7
- def check_vcf(infile, log):
8
+ def check_vcf(infile: str, log: Logger) -> None:
8
9
  log.info("Checking VCF file")
9
10
  # Check if file exists. Raise if it doesn't.
10
11
  if os.path.exists(infile) == False:
@@ -68,7 +69,7 @@ def write_vcf(
68
69
  compression: bool,
69
70
  line_count: int,
70
71
  log,
71
- ):
72
+ ) -> int:
72
73
  log.info(f"Writing standardized VCF to {outfile}")
73
74
 
74
75
  with gzip.open(outfile, "wt") if compression else open(outfile, "w") as w:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.8.33
3
+ Version: 0.8.34
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -31,14 +31,14 @@ ingestion/generic/utils.py,sha256=1MEIru7uq38IjUdL8lcHqDH0oTki9uWrz1f2e-pmRoU,28
31
31
  ingestion/nextgen/__init__.py,sha256=7LQ-h_Bvc5P1QcHMdzsqi1Qm4fTJn04-ozar2ty9wSc,59
32
32
  ingestion/nextgen/process.py,sha256=kDCnU685v7aqJ3i4HpFdb7HqgHRSBKqtYPpuyN7qWmM,3976
33
33
  ingestion/nextgen/util/alteration_table.py,sha256=OqstLK6cgoNvRWy8bW6_iABaAn5ggCi1xBM8GOU6wYQ,6060
34
- ingestion/nextgen/util/interpretation.py,sha256=ozuzb0vozff34zfP6AdOiUmI8Q77hI02jve_nCPZHfE,297
34
+ ingestion/nextgen/util/interpretation.py,sha256=tQ3qzAUwBzwK1tQzH9ujZk_VCQ2wP8HzSZY0fImJ5Wo,818
35
35
  ingestion/nextgen/util/manifest_helpers.py,sha256=2xrpEtHbCb1Kea1wJeObkDfTiBklmffQt_o2hMgOSOE,1208
36
- ingestion/nextgen/util/nextgen_specific_genes.py,sha256=hgam7HVE324FwOf7G4Wk4cUArch9vHIjBZRUUyF3ukg,1206
36
+ ingestion/nextgen/util/nextgen_specific_genes.py,sha256=1jFcqvtYAlJ7eBwOBm1UC2TzAbjHjdlvPBUzxr1G8dY,1206
37
37
  ingestion/nextgen/util/pre_filter_somatic_vcf.py,sha256=mIaUihmGLbS38D4Gy_Qtf1lFAfW0A-LgAgQmsrEiI-M,3529
38
38
  ingestion/nextgen/util/process_cnv.py,sha256=MIirc8e0k6lsaTZkRM3U3L3IvbrcHmKQ4xlIu585514,2430
39
- ingestion/nextgen/util/process_manifest.py,sha256=EGYaTcub4M08mFTAh4CNHPRkP8_a5r4jMJaExm9Nkko,8423
39
+ ingestion/nextgen/util/process_manifest.py,sha256=EnV9I90vnanDvuoErbMfz6yAfjzM5LdhhUF4q5DJd8w,8428
40
40
  ingestion/nextgen/util/process_structural.py,sha256=FKjkK7BkIlocnLs8rFCjrMC39FCQnD0nQCeWvi7cRoA,7539
41
- ingestion/nextgen/util/process_vcf.py,sha256=SN0C13F45R_N5UaMaVSUDSCtIMmpHfaMTo7_5PkFkrM,8085
41
+ ingestion/nextgen/util/process_vcf.py,sha256=ZZURSMnZhHDpFahzijZ4MvCfSWTPdIktzmnCKVVUbGs,7768
42
42
  ingestion/nextgen/util/types.py,sha256=SSzt5gv-kss1PR45eQUelypWrGI-dAfQMO3GSD-T-Wg,22
43
43
  ingestion/resources/GRCh37_map.csv.gz,sha256=JOEkjtbYrJpIdyoZdCvfJhvvz2dNfkSve7lXSXkCCD8,408290
44
44
  ingestion/resources/GRCh38_map.csv.gz,sha256=qriYO2_buCCb4T6WcuZ-pCwPxMsm0TL2OxAHvJ1cEfA,612373
@@ -47,13 +47,13 @@ ingestion/shared_util/coords_to_genes.py,sha256=vz9EfgFm3BS6pEPnslbEka8cJKlQZtHJ
47
47
  ingestion/shared_util/gene_to_coords.py,sha256=M-q5ateLSQ4fCF0uMk5TX2uBLRrcZzXqXEf05TPaLsU,876
48
48
  ingestion/shared_util/open_maybe_gzipped.py,sha256=FrOPJ4OgfpQGyT3f1Su1rFeuuYYu6QJ-nVIBIosbfhw,232
49
49
  ingestion/shared_util/types.py,sha256=u9AD2OrTQWMBtK_7VXHsD8Rv6HFs-7ZUItNl4KXdL7k,68
50
- ingestion/vcf_standardization/Variant.py,sha256=MBT8x25Ub1GRkTtnvPMV9SN7LUoF4Xkons-m3kYDkeY,5398
50
+ ingestion/vcf_standardization/Variant.py,sha256=aoSqT4XAECxCF0JZgv1YRmxuw20WGeWkwFTw0x6FmWc,5475
51
51
  ingestion/vcf_standardization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- ingestion/vcf_standardization/standardize.py,sha256=PaRqQRSrnI79WFgNvbvnXFcvXHYqshOOxCXup5eMZ_M,2289
52
+ ingestion/vcf_standardization/standardize.py,sha256=zYzZxncq8USA1bUs26L-ByLPTnUlGyVvS3LJVGD19BM,2302
53
53
  ingestion/vcf_standardization/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
54
  ingestion/vcf_standardization/util/af_helpers.py,sha256=dpTzoeIQVeBRt0ETF3a9rp5ojZqznHg4x_hCZ8OPcOg,1061
55
55
  ingestion/vcf_standardization/util/dp_helpers.py,sha256=Nq8oLOLObu4_pv16qwwgpALRlUoJVCULrd9cFOD-eoI,823
56
- ingestion/vcf_standardization/util/read_write.py,sha256=IQotJ27To1MoQcRstc5AbHZtUuJz5cqkkZiHsDNaBvI,2471
57
- phc_ingestion-0.8.33.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
58
- phc_ingestion-0.8.33.dist-info/METADATA,sha256=CM6kTtndCIkqq55vXl_x2vehEZ_mL29fK7TblsCsz9E,552
59
- phc_ingestion-0.8.33.dist-info/RECORD,,
56
+ ingestion/vcf_standardization/util/read_write.py,sha256=x3Pf6Dq8tmolblbCS5CrNmrcHS3FGfqBSFpFgvFGC4g,2526
57
+ phc_ingestion-0.8.34.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
58
+ phc_ingestion-0.8.34.dist-info/METADATA,sha256=5afEPjKh3tGTH02AjkVcQ-AluTJ-JuEqSBiZkD7HHAU,552
59
+ phc_ingestion-0.8.34.dist-info/RECORD,,