phc-ingestion 0.8.33__tar.gz → 0.8.34__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/PKG-INFO +1 -1
- phc-ingestion-0.8.34/ingestion/nextgen/util/interpretation.py +28 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/nextgen_specific_genes.py +1 -1
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/process_manifest.py +1 -1
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/process_vcf.py +1 -11
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/Variant.py +14 -14
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/standardize.py +4 -4
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/read_write.py +3 -2
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/pyproject.toml +1 -1
- phc-ingestion-0.8.33/ingestion/nextgen/util/interpretation.py +0 -11
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/PYPI.md +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/__init__.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/__init__.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/process.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/__init__.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/cnv.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/detect_genome_ref.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/ga4gh.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/hla.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/ihc.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/interpretation.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/json.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/metadata.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/specimen_details.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/structural.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/tar.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/tests.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/tmb.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/tsv.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/caris/util/vcf.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/__init__.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/process.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/__init__.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/cnv.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/fnv.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/ga4gh.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/interpretation.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/foundation/util/vcf_etl.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/generic/__init__.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/generic/process.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/generic/utils.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/__init__.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/process.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/alteration_table.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/manifest_helpers.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/process_cnv.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/process_structural.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/types.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/resources/GRCh37_map.csv.gz +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/resources/GRCh38_map.csv.gz +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/shared_util/__init__.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/shared_util/coords_to_genes.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/shared_util/gene_to_coords.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/shared_util/types.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/__init__.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/__init__.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
- {phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from logging import Logger
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def map_interpretation(status: str, log: Logger):
|
|
5
|
+
"""
|
|
6
|
+
Map interpretation for structural and copy number variants
|
|
7
|
+
"""
|
|
8
|
+
if status == "Pathogenic":
|
|
9
|
+
return "Pathogenic"
|
|
10
|
+
elif "VUS" in status:
|
|
11
|
+
return "Uncertain significance"
|
|
12
|
+
else:
|
|
13
|
+
log.error(f"Failed to resolve interpretation: {status}")
|
|
14
|
+
return ""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def map_vendsig(vendsig: str) -> str:
|
|
18
|
+
"""
|
|
19
|
+
Map vendor significance for short variants
|
|
20
|
+
"""
|
|
21
|
+
if vendsig in ["Pathogenic"]:
|
|
22
|
+
return "VENDSIG=Pathogenic"
|
|
23
|
+
elif vendsig in ["Likely Pathogenic", "LikelyPathogenic"]:
|
|
24
|
+
return "VENDSIG=Likely pathogenic"
|
|
25
|
+
elif vendsig in ["VUS"]:
|
|
26
|
+
return "VENDSIG=Uncertain significance"
|
|
27
|
+
else:
|
|
28
|
+
raise RuntimeError(f"Unable to map vendor significance: {vendsig}")
|
{phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/nextgen_specific_genes.py
RENAMED
|
@@ -14,7 +14,7 @@ nextgen_specific_genes_with_location: list[GeneWithLocation] = [
|
|
|
14
14
|
{"gene": "CCND3", "chr": "chr6", "start": 41920534, "end": 42562008},
|
|
15
15
|
{"gene": "MYC", "chr": "chr8", "start": 125309416, "end": 129673293},
|
|
16
16
|
{"gene": "CCND1", "chr": "chr11", "start": 69090733, "end": 69656860},
|
|
17
|
-
{"gene": "IGH", "chr": "chr14", "start":
|
|
17
|
+
{"gene": "IGH", "chr": "chr14", "start": 105325507, "end": 109902208},
|
|
18
18
|
{"gene": "MAF", "chr": "chr16", "start": 78428398, "end": 79615096},
|
|
19
19
|
{"gene": "MAFB", "chr": "chr20", "start": 39039005, "end": 40688948},
|
|
20
20
|
{"gene": "IGL", "chr": "chr22", "start": 22012552, "end": 22965858},
|
|
@@ -5,6 +5,7 @@ from logging import Logger
|
|
|
5
5
|
from typing import Literal
|
|
6
6
|
|
|
7
7
|
from ingestion.nextgen.util.alteration_table import AlterationTableRow, ShortVariantGene
|
|
8
|
+
from ingestion.nextgen.util.interpretation import map_vendsig
|
|
8
9
|
|
|
9
10
|
SequenceType = Literal["somatic", "germline"]
|
|
10
11
|
|
|
@@ -204,17 +205,6 @@ def process_vcf(
|
|
|
204
205
|
return {"vcf_path_name": vcf_path, "vcf_line_count": line_count}
|
|
205
206
|
|
|
206
207
|
|
|
207
|
-
def map_vendsig(vendsig: str) -> str:
|
|
208
|
-
if vendsig in ["Pathogenic"]:
|
|
209
|
-
return "VENDSIG=Pathogenic"
|
|
210
|
-
elif vendsig in ["Likely Pathogenic", "LikelyPathogenic"]:
|
|
211
|
-
return "VENDSIG=Likely pathogenic"
|
|
212
|
-
elif vendsig in ["VUS"]:
|
|
213
|
-
return "VENDSIG=Uncertain significance"
|
|
214
|
-
else:
|
|
215
|
-
raise RuntimeError(f"Unable to map vendor significance: {vendsig}")
|
|
216
|
-
|
|
217
|
-
|
|
218
208
|
def add_vendsig_to_info(
|
|
219
209
|
info: str,
|
|
220
210
|
short_variant_table_rows: list[AlterationTableRow[ShortVariantGene]],
|
|
@@ -18,7 +18,7 @@ class Variant:
|
|
|
18
18
|
self.info = {x.split("=")[0]: x.split("=")[1] for x in fields[7].split(";") if "=" in x}
|
|
19
19
|
self.frmt = fields[8].split(":")
|
|
20
20
|
self.smpl = fields[9].split(":")
|
|
21
|
-
self.ad_af_dp = {"AD": False, "AF": False, "DP": False}
|
|
21
|
+
self.ad_af_dp: dict[str, bool | str] = {"AD": False, "AF": False, "DP": False}
|
|
22
22
|
|
|
23
23
|
def standardize_allele_frequency(self, log):
|
|
24
24
|
# Detect if allele frequency is present either in the INFO or FORMAT/SAMPLE fields
|
|
@@ -131,19 +131,19 @@ class Variant:
|
|
|
131
131
|
)
|
|
132
132
|
return updated_variant
|
|
133
133
|
|
|
134
|
+
@classmethod
|
|
135
|
+
def check_formatting(cls, var: str):
|
|
136
|
+
# Loose formatting check, return as Variant class object
|
|
137
|
+
split_var = var.split("\t")
|
|
138
|
+
if len(split_var) < 8 or not split_var[1].isdigit():
|
|
139
|
+
raise RuntimeError(f"Variant contains incorrect number, or invalid fields: {var}")
|
|
134
140
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
if len(split_var) < 8 or not split_var[1].isdigit():
|
|
139
|
-
raise RuntimeError(f"Variant contains incorrect number, or invalid fields: {var}")
|
|
141
|
+
if len(split_var) == 8:
|
|
142
|
+
split_var.append(".") # Add placeholder for FORMAT
|
|
143
|
+
split_var.append(".") # Add placeholder for SAMPLE
|
|
140
144
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
split_var.append(".") # Add placeholder for SAMPLE
|
|
145
|
+
elif len(split_var) == 9:
|
|
146
|
+
split_var.append(".") # Add placeholder for SAMPLE
|
|
144
147
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
working_variant = Variant(split_var)
|
|
149
|
-
return working_variant
|
|
148
|
+
working_variant = cls(split_var)
|
|
149
|
+
return working_variant
|
|
@@ -9,15 +9,15 @@ from ingestion.vcf_standardization.util.read_write import (
|
|
|
9
9
|
read_headers,
|
|
10
10
|
read_variants,
|
|
11
11
|
)
|
|
12
|
-
from ingestion.vcf_standardization.Variant import
|
|
12
|
+
from ingestion.vcf_standardization.Variant import Variant
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def format_variant(variant: str, log: Logger, vendsig_dict: dict = None) -> Optional[str]:
|
|
15
|
+
def format_variant(variant: str, log: Logger, vendsig_dict: dict | None = None) -> Optional[str]:
|
|
16
16
|
# Ignore structural variants
|
|
17
17
|
if "SVTYPE" in variant:
|
|
18
18
|
return None
|
|
19
19
|
# Working variant
|
|
20
|
-
wv = check_formatting(variant)
|
|
20
|
+
wv = Variant.check_formatting(variant)
|
|
21
21
|
|
|
22
22
|
# Only process variants that aren't multiallelic
|
|
23
23
|
if len(wv.alt.split(",")) == 1:
|
|
@@ -48,7 +48,7 @@ def standardize_vcf(
|
|
|
48
48
|
out_path: str,
|
|
49
49
|
case_id: str,
|
|
50
50
|
log: Logger,
|
|
51
|
-
vendsig_dict: dict = None,
|
|
51
|
+
vendsig_dict: dict | None = None,
|
|
52
52
|
compression: bool = False,
|
|
53
53
|
) -> Optional[int]:
|
|
54
54
|
check_vcf(infile, log)
|
{phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/read_write.py
RENAMED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import gzip
|
|
2
|
+
from logging import Logger
|
|
2
3
|
import re
|
|
3
4
|
import os
|
|
4
5
|
from typing import Iterator, Optional
|
|
5
6
|
|
|
6
7
|
|
|
7
|
-
def check_vcf(infile, log):
|
|
8
|
+
def check_vcf(infile: str, log: Logger) -> None:
|
|
8
9
|
log.info("Checking VCF file")
|
|
9
10
|
# Check if file exists. Raise if it doesn't.
|
|
10
11
|
if os.path.exists(infile) == False:
|
|
@@ -68,7 +69,7 @@ def write_vcf(
|
|
|
68
69
|
compression: bool,
|
|
69
70
|
line_count: int,
|
|
70
71
|
log,
|
|
71
|
-
):
|
|
72
|
+
) -> int:
|
|
72
73
|
log.info(f"Writing standardized VCF to {outfile}")
|
|
73
74
|
|
|
74
75
|
with gzip.open(outfile, "wt") if compression else open(outfile, "w") as w:
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
from logging import Logger
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def map_interpretation(status: str, log: Logger):
|
|
5
|
-
if status == "Pathogenic":
|
|
6
|
-
return "Pathogenic"
|
|
7
|
-
elif "VUS" in status:
|
|
8
|
-
return "Uncertain significance"
|
|
9
|
-
else:
|
|
10
|
-
log.error(f"Failed to resolve interpretation: {status}")
|
|
11
|
-
return ""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/nextgen/util/pre_filter_somatic_vcf.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/__init__.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/af_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.33 → phc-ingestion-0.8.34}/ingestion/vcf_standardization/util/dp_helpers.py
RENAMED
|
File without changes
|