phc-ingestion 0.8.33__tar.gz → 0.8.34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/PKG-INFO +1 -1
  2. phc-ingestion-0.8.34/ingestion/nextgen/util/interpretation.py +28 -0
  3. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/nextgen_specific_genes.py +1 -1
  4. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/process_manifest.py +1 -1
  5. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/process_vcf.py +1 -11
  6. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/Variant.py +14 -14
  7. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/standardize.py +4 -4
  8. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/read_write.py +3 -2
  9. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/pyproject.toml +1 -1
  10. phc-ingestion-0.8.33/ingestion/nextgen/util/interpretation.py +0 -11
  11. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/PYPI.md +0 -0
  12. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/__init__.py +0 -0
  13. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/__init__.py +0 -0
  14. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/process.py +0 -0
  15. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/__init__.py +0 -0
  16. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/cnv.py +0 -0
  17. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/detect_genome_ref.py +0 -0
  18. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/ga4gh.py +0 -0
  19. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/hla.py +0 -0
  20. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/ihc.py +0 -0
  21. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/interpretation.py +0 -0
  22. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/json.py +0 -0
  23. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/metadata.py +0 -0
  24. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/specimen_details.py +0 -0
  25. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/structural.py +0 -0
  26. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/tar.py +0 -0
  27. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/tests.py +0 -0
  28. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/tmb.py +0 -0
  29. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/tsv.py +0 -0
  30. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/vcf.py +0 -0
  31. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/__init__.py +0 -0
  32. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/process.py +0 -0
  33. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/__init__.py +0 -0
  34. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/cnv.py +0 -0
  35. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/fnv.py +0 -0
  36. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/ga4gh.py +0 -0
  37. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/interpretation.py +0 -0
  38. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/vcf_etl.py +0 -0
  39. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/generic/__init__.py +0 -0
  40. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/generic/process.py +0 -0
  41. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/generic/utils.py +0 -0
  42. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/__init__.py +0 -0
  43. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/process.py +0 -0
  44. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/alteration_table.py +0 -0
  45. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/manifest_helpers.py +0 -0
  46. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -0
  47. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/process_cnv.py +0 -0
  48. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/process_structural.py +0 -0
  49. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/types.py +0 -0
  50. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/resources/GRCh37_map.csv.gz +0 -0
  51. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/resources/GRCh38_map.csv.gz +0 -0
  52. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/shared_util/__init__.py +0 -0
  53. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/shared_util/coords_to_genes.py +0 -0
  54. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/shared_util/gene_to_coords.py +0 -0
  55. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
  56. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/shared_util/types.py +0 -0
  57. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/__init__.py +0 -0
  58. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/__init__.py +0 -0
  59. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
  60. {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.8.33
3
+ Version: 0.8.34
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -0,0 +1,28 @@
1
+ from logging import Logger
2
+
3
+
4
+ def map_interpretation(status: str, log: Logger):
5
+ """
6
+ Map interpretation for structural and copy number variants
7
+ """
8
+ if status == "Pathogenic":
9
+ return "Pathogenic"
10
+ elif "VUS" in status:
11
+ return "Uncertain significance"
12
+ else:
13
+ log.error(f"Failed to resolve interpretation: {status}")
14
+ return ""
15
+
16
+
17
+ def map_vendsig(vendsig: str) -> str:
18
+ """
19
+ Map vendor significance for short variants
20
+ """
21
+ if vendsig in ["Pathogenic"]:
22
+ return "VENDSIG=Pathogenic"
23
+ elif vendsig in ["Likely Pathogenic", "LikelyPathogenic"]:
24
+ return "VENDSIG=Likely pathogenic"
25
+ elif vendsig in ["VUS"]:
26
+ return "VENDSIG=Uncertain significance"
27
+ else:
28
+ raise RuntimeError(f"Unable to map vendor significance: {vendsig}")
@@ -14,7 +14,7 @@ nextgen_specific_genes_with_location: list[GeneWithLocation] = [
14
14
  {"gene": "CCND3", "chr": "chr6", "start": 41920534, "end": 42562008},
15
15
  {"gene": "MYC", "chr": "chr8", "start": 125309416, "end": 129673293},
16
16
  {"gene": "CCND1", "chr": "chr11", "start": 69090733, "end": 69656860},
17
- {"gene": "IGH", "chr": "chr14", "start": 105516968, "end": 109902208},
17
+ {"gene": "IGH", "chr": "chr14", "start": 105325507, "end": 109902208},
18
18
  {"gene": "MAF", "chr": "chr16", "start": 78428398, "end": 79615096},
19
19
  {"gene": "MAFB", "chr": "chr20", "start": 39039005, "end": 40688948},
20
20
  {"gene": "IGL", "chr": "chr22", "start": 22012552, "end": 22965858},
@@ -65,7 +65,7 @@ def get_cell_purity(interpretation_lines: list):
65
65
  return float(00.00)
66
66
 
67
67
 
68
- def extract_patient_data(patient_info_lines: list):
68
+ def extract_patient_data(patient_info_lines: list[str]):
69
69
  patient_data: dict = {}
70
70
  patient_data["patientInfo"] = {}
71
71
 
@@ -5,6 +5,7 @@ from logging import Logger
5
5
  from typing import Literal
6
6
 
7
7
  from ingestion.nextgen.util.alteration_table import AlterationTableRow, ShortVariantGene
8
+ from ingestion.nextgen.util.interpretation import map_vendsig
8
9
 
9
10
  SequenceType = Literal["somatic", "germline"]
10
11
 
@@ -204,17 +205,6 @@ def process_vcf(
204
205
  return {"vcf_path_name": vcf_path, "vcf_line_count": line_count}
205
206
 
206
207
 
207
- def map_vendsig(vendsig: str) -> str:
208
- if vendsig in ["Pathogenic"]:
209
- return "VENDSIG=Pathogenic"
210
- elif vendsig in ["Likely Pathogenic", "LikelyPathogenic"]:
211
- return "VENDSIG=Likely pathogenic"
212
- elif vendsig in ["VUS"]:
213
- return "VENDSIG=Uncertain significance"
214
- else:
215
- raise RuntimeError(f"Unable to map vendor significance: {vendsig}")
216
-
217
-
218
208
  def add_vendsig_to_info(
219
209
  info: str,
220
210
  short_variant_table_rows: list[AlterationTableRow[ShortVariantGene]],
@@ -18,7 +18,7 @@ class Variant:
18
18
  self.info = {x.split("=")[0]: x.split("=")[1] for x in fields[7].split(";") if "=" in x}
19
19
  self.frmt = fields[8].split(":")
20
20
  self.smpl = fields[9].split(":")
21
- self.ad_af_dp = {"AD": False, "AF": False, "DP": False}
21
+ self.ad_af_dp: dict[str, bool | str] = {"AD": False, "AF": False, "DP": False}
22
22
 
23
23
  def standardize_allele_frequency(self, log):
24
24
  # Detect if allele frequency is present either in the INFO or FORMAT/SAMPLE fields
@@ -131,19 +131,19 @@ class Variant:
131
131
  )
132
132
  return updated_variant
133
133
 
134
+ @classmethod
135
+ def check_formatting(cls, var: str):
136
+ # Loose formatting check, return as Variant class object
137
+ split_var = var.split("\t")
138
+ if len(split_var) < 8 or not split_var[1].isdigit():
139
+ raise RuntimeError(f"Variant contains incorrect number, or invalid fields: {var}")
134
140
 
135
- def check_formatting(var: str) -> Variant:
136
- # Loose formatting check, return as Variant class object
137
- split_var = var.split("\t")
138
- if len(split_var) < 8 or not split_var[1].isdigit():
139
- raise RuntimeError(f"Variant contains incorrect number, or invalid fields: {var}")
141
+ if len(split_var) == 8:
142
+ split_var.append(".") # Add placeholder for FORMAT
143
+ split_var.append(".") # Add placeholder for SAMPLE
140
144
 
141
- if len(split_var) == 8:
142
- split_var.append(".") # Add placeholder for FORMAT
143
- split_var.append(".") # Add placeholder for SAMPLE
145
+ elif len(split_var) == 9:
146
+ split_var.append(".") # Add placeholder for SAMPLE
144
147
 
145
- elif len(split_var) == 9:
146
- split_var.append(".") # Add placeholder for SAMPLE
147
-
148
- working_variant = Variant(split_var)
149
- return working_variant
148
+ working_variant = cls(split_var)
149
+ return working_variant
@@ -9,15 +9,15 @@ from ingestion.vcf_standardization.util.read_write import (
9
9
  read_headers,
10
10
  read_variants,
11
11
  )
12
- from ingestion.vcf_standardization.Variant import check_formatting
12
+ from ingestion.vcf_standardization.Variant import Variant
13
13
 
14
14
 
15
- def format_variant(variant: str, log: Logger, vendsig_dict: dict = None) -> Optional[str]:
15
+ def format_variant(variant: str, log: Logger, vendsig_dict: dict | None = None) -> Optional[str]:
16
16
  # Ignore structural variants
17
17
  if "SVTYPE" in variant:
18
18
  return None
19
19
  # Working variant
20
- wv = check_formatting(variant)
20
+ wv = Variant.check_formatting(variant)
21
21
 
22
22
  # Only process variants that aren't multiallelic
23
23
  if len(wv.alt.split(",")) == 1:
@@ -48,7 +48,7 @@ def standardize_vcf(
48
48
  out_path: str,
49
49
  case_id: str,
50
50
  log: Logger,
51
- vendsig_dict: dict = None,
51
+ vendsig_dict: dict | None = None,
52
52
  compression: bool = False,
53
53
  ) -> Optional[int]:
54
54
  check_vcf(infile, log)
@@ -1,10 +1,11 @@
1
1
  import gzip
2
+ from logging import Logger
2
3
  import re
3
4
  import os
4
5
  from typing import Iterator, Optional
5
6
 
6
7
 
7
- def check_vcf(infile, log):
8
+ def check_vcf(infile: str, log: Logger) -> None:
8
9
  log.info("Checking VCF file")
9
10
  # Check if file exists. Raise if it doesn't.
10
11
  if os.path.exists(infile) == False:
@@ -68,7 +69,7 @@ def write_vcf(
68
69
  compression: bool,
69
70
  line_count: int,
70
71
  log,
71
- ):
72
+ ) -> int:
72
73
  log.info(f"Writing standardized VCF to {outfile}")
73
74
 
74
75
  with gzip.open(outfile, "wt") if compression else open(outfile, "w") as w:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "phc-ingestion"
3
- version = "0.8.33"
3
+ version = "0.8.34"
4
4
  description = "Functions for LifeOmic PHC genomic ingestions"
5
5
  authors = [
6
6
  { name = "LifeOmic Development", email = "development@lifeomic.com" },
@@ -1,11 +0,0 @@
1
- from logging import Logger
2
-
3
-
4
- def map_interpretation(status: str, log: Logger):
5
- if status == "Pathogenic":
6
- return "Pathogenic"
7
- elif "VUS" in status:
8
- return "Uncertain significance"
9
- else:
10
- log.error(f"Failed to resolve interpretation: {status}")
11
- return ""
File without changes