phc-ingestion 0.8.26__tar.gz → 0.8.28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/PKG-INFO +1 -1
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/process.py +2 -1
- phc-ingestion-0.8.28/ingestion/nextgen/util/nextgen_specific_genes.py +29 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/util/process_manifest.py +3 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/util/process_structural.py +47 -12
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/pyproject.toml +1 -1
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/PYPI.md +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/__init__.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/__init__.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/process.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/__init__.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/cnv.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/detect_genome_ref.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/ga4gh.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/hla.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/ihc.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/interpretation.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/json.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/metadata.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/specimen_details.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/structural.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/tar.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/tests.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/tmb.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/tsv.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/caris/util/vcf.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/foundation/__init__.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/foundation/process.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/foundation/util/__init__.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/foundation/util/cnv.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/foundation/util/fnv.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/foundation/util/ga4gh.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/foundation/util/interpretation.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/foundation/util/vcf_etl.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/generic/__init__.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/generic/process.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/generic/utils.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/__init__.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/util/alteration_table.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/util/interpretation.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/util/manifest_helpers.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/util/process_cnv.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/util/process_vcf.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/util/types.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/resources/GRCh37_map.csv.gz +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/resources/GRCh38_map.csv.gz +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/shared_util/__init__.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/shared_util/coords_to_genes.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/shared_util/gene_to_coords.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/shared_util/types.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/vcf_standardization/Variant.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/vcf_standardization/__init__.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/vcf_standardization/standardize.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/vcf_standardization/util/__init__.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
- {phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/vcf_standardization/util/read_write.py +0 -0
|
@@ -46,7 +46,7 @@ def process(
|
|
|
46
46
|
prefix=case_id,
|
|
47
47
|
log=log,
|
|
48
48
|
)
|
|
49
|
-
structural_path_name = process_structural(
|
|
49
|
+
structural_path_name, translocations = process_structural(
|
|
50
50
|
xml_in_file=vendor_files["xmlFile"],
|
|
51
51
|
sv_in_file=vendor_files["somaticSvVcfFile"],
|
|
52
52
|
root_path=local_output_dir,
|
|
@@ -59,6 +59,7 @@ def process(
|
|
|
59
59
|
prefix=case_id,
|
|
60
60
|
include_copy_number=bool(cnv_path_name),
|
|
61
61
|
include_structural=bool(structural_path_name),
|
|
62
|
+
somatic_translocations=translocations,
|
|
62
63
|
log=log,
|
|
63
64
|
)
|
|
64
65
|
pre_filtered_somatic_vcf_path = pre_filter_somatic_vcf(
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from typing import TypedDict
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class GeneWithLocation(TypedDict):
|
|
5
|
+
gene: str
|
|
6
|
+
chr: str
|
|
7
|
+
start: int
|
|
8
|
+
end: int
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
nextgen_specific_genes_with_location: list[GeneWithLocation] = [
|
|
12
|
+
{"gene": "IGK", "chr": "chr2", "start": 88852034, "end": 90258119},
|
|
13
|
+
{"gene": "NSD2", "chr": "chr4", "start": 1792518, "end": 1940193},
|
|
14
|
+
{"gene": "CCND3", "chr": "chr6", "start": 41920534, "end": 42562008},
|
|
15
|
+
{"gene": "MYC", "chr": "chr8", "start": 125309416, "end": 129673293},
|
|
16
|
+
{"gene": "CCND1", "chr": "chr11", "start": 69090733, "end": 69656860},
|
|
17
|
+
{"gene": "IGH", "chr": "chr14", "start": 105578834, "end": 109902208},
|
|
18
|
+
{"gene": "MAF", "chr": "chr16", "start": 78428398, "end": 79615096},
|
|
19
|
+
{"gene": "MAFB", "chr": "chr20", "start": 39039005, "end": 40688948},
|
|
20
|
+
{"gene": "IGL", "chr": "chr22", "start": 22012552, "end": 22965858},
|
|
21
|
+
]
|
|
22
|
+
nextgen_specific_genes: set[str] = {gene["gene"] for gene in nextgen_specific_genes_with_location}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def maybe_get_matching_gene_for_location(chr: str, position: int) -> str | None:
|
|
26
|
+
for gene in nextgen_specific_genes_with_location:
|
|
27
|
+
if gene["chr"] == chr and gene["start"] <= position <= gene["end"]:
|
|
28
|
+
return gene["gene"]
|
|
29
|
+
return None
|
|
@@ -176,6 +176,7 @@ def process_manifest(
|
|
|
176
176
|
prefix: str,
|
|
177
177
|
include_copy_number: bool,
|
|
178
178
|
include_structural: bool,
|
|
179
|
+
somatic_translocations: list[str],
|
|
179
180
|
log: Logger,
|
|
180
181
|
):
|
|
181
182
|
test_text = extract_xml_text(xml_in_file)
|
|
@@ -186,6 +187,8 @@ def process_manifest(
|
|
|
186
187
|
hyperdiploidy_chromosomes = manifest_helpers.extract_hyperdiploidy_chromosomes(xml_in_file, log)
|
|
187
188
|
if hyperdiploidy_chromosomes:
|
|
188
189
|
manifest["hyperdiploidyTrisomies"] = hyperdiploidy_chromosomes
|
|
190
|
+
if somatic_translocations:
|
|
191
|
+
manifest["somaticTranslocations"] = somatic_translocations
|
|
189
192
|
|
|
190
193
|
manifest["reportFile"] = f".lifeomic/nextgen/{prefix}/{prefix}.pdf"
|
|
191
194
|
manifest["sourceFileId"] = source_file_id
|
|
@@ -5,6 +5,10 @@ from typing import TypedDict
|
|
|
5
5
|
from ingestion.shared_util.coords_to_genes import coords_to_genes
|
|
6
6
|
from ingestion.nextgen.util.alteration_table import extract_variant_table
|
|
7
7
|
from ingestion.nextgen.util.interpretation import map_interpretation
|
|
8
|
+
from ingestion.nextgen.util.nextgen_specific_genes import (
|
|
9
|
+
maybe_get_matching_gene_for_location,
|
|
10
|
+
nextgen_specific_genes,
|
|
11
|
+
)
|
|
8
12
|
from ingestion.shared_util.open_maybe_gzipped import open_maybe_gzipped
|
|
9
13
|
|
|
10
14
|
|
|
@@ -42,9 +46,26 @@ def is_del_dup_or_ins(variant: list[str]) -> bool:
|
|
|
42
46
|
return any([x in variant[2] for x in ["MantaDEL", "MantaDUP", "MantaINS"]])
|
|
43
47
|
|
|
44
48
|
|
|
49
|
+
def get_gene_from_coords(
|
|
50
|
+
chromosome: str, start_position: str, end_position: str, log: Logger
|
|
51
|
+
) -> str:
|
|
52
|
+
"""
|
|
53
|
+
A number of genes of interest with specific start and end positions have been provided.
|
|
54
|
+
If a variant falls within the start and end positions of one of those genes of interest, that gene will be used.
|
|
55
|
+
Otherwise, we fall back to the standard gene lookup.
|
|
56
|
+
"""
|
|
57
|
+
center_position = int((int(start_position) + int(end_position)) / 2)
|
|
58
|
+
|
|
59
|
+
gene = maybe_get_matching_gene_for_location(chromosome, center_position)
|
|
60
|
+
if gene:
|
|
61
|
+
return gene
|
|
62
|
+
|
|
63
|
+
return coords_to_genes("GRCh38", chromosome, center_position, log)
|
|
64
|
+
|
|
65
|
+
|
|
45
66
|
def process_structural(
|
|
46
67
|
sv_in_file: str, xml_in_file, root_path: str, prefix: str, log: Logger
|
|
47
|
-
) -> str | None:
|
|
68
|
+
) -> tuple[str | None, list[str]]:
|
|
48
69
|
structural_variant_table = extract_variant_table(
|
|
49
70
|
xml_in_file=xml_in_file, variant_type="structural", log=log
|
|
50
71
|
)
|
|
@@ -74,9 +95,7 @@ def process_structural(
|
|
|
74
95
|
effect = "insertion"
|
|
75
96
|
|
|
76
97
|
# Get genes from coordinates using center point of start and end positions
|
|
77
|
-
gene1 =
|
|
78
|
-
"GRCh38", chromosome1, int((int(start_position1) + int(end_position1)) / 2), log
|
|
79
|
-
)
|
|
98
|
+
gene1 = get_gene_from_coords(chromosome1, start_position1, end_position1, log)
|
|
80
99
|
gene2 = "N/A"
|
|
81
100
|
|
|
82
101
|
else:
|
|
@@ -89,12 +108,8 @@ def process_structural(
|
|
|
89
108
|
effect = "translocation"
|
|
90
109
|
|
|
91
110
|
# Get genes from coordinates using center point of start and end positions
|
|
92
|
-
gene1 =
|
|
93
|
-
|
|
94
|
-
)
|
|
95
|
-
gene2 = coords_to_genes(
|
|
96
|
-
"GRCh38", chromosome2, int((int(start_position2) + int(end_position2)) / 2), log
|
|
97
|
-
)
|
|
111
|
+
gene1 = get_gene_from_coords(chromosome1, start_position1, end_position1, log)
|
|
112
|
+
gene2 = get_gene_from_coords(chromosome2, start_position2, end_position2, log)
|
|
98
113
|
|
|
99
114
|
# Scrape interpretation
|
|
100
115
|
interpretation = "unknown"
|
|
@@ -149,7 +164,7 @@ def process_structural(
|
|
|
149
164
|
|
|
150
165
|
if not deduped_structural_variants:
|
|
151
166
|
log.info(f"Ignoring empty structural variant file {sv_in_file}")
|
|
152
|
-
return None
|
|
167
|
+
return (None, [])
|
|
153
168
|
|
|
154
169
|
log.info(f"Saving file to {structural_variant_path_name}")
|
|
155
170
|
with open(structural_variant_path_name, "w+") as f:
|
|
@@ -159,4 +174,24 @@ def process_structural(
|
|
|
159
174
|
for sv in deduped_structural_variants:
|
|
160
175
|
f.write(structural_variant_to_csv_row(sv))
|
|
161
176
|
|
|
162
|
-
|
|
177
|
+
log.info("Finding structural variant translocations for genes of interest")
|
|
178
|
+
translocations = [sv for sv in deduped_structural_variants if sv["effect"] == "translocation"]
|
|
179
|
+
formatted_translocations: set[str] = set()
|
|
180
|
+
for translocation in translocations:
|
|
181
|
+
gene1, gene2 = translocation["gene1"], translocation["gene2"]
|
|
182
|
+
# MYC is a special case
|
|
183
|
+
if gene1 == "MYC" or gene2 == "MYC":
|
|
184
|
+
formatted_translocations.add("t(MYC)")
|
|
185
|
+
continue
|
|
186
|
+
if gene1 in nextgen_specific_genes and gene2 in nextgen_specific_genes:
|
|
187
|
+
chr1, chr2 = int(translocation["position1"][0][3:]), int(
|
|
188
|
+
translocation["position2"][0][3:]
|
|
189
|
+
)
|
|
190
|
+
# Ensure chromosomes are in ascending order
|
|
191
|
+
if chr1 > chr2:
|
|
192
|
+
chr1, chr2 = chr2, chr1
|
|
193
|
+
formatted_translocations.add(f"t({chr1};{chr2})")
|
|
194
|
+
|
|
195
|
+
log.info(f"Found {len(formatted_translocations)} translocations for genes of interest")
|
|
196
|
+
|
|
197
|
+
return structural_variant_path_name, list(formatted_translocations)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/nextgen/util/pre_filter_somatic_vcf.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/vcf_standardization/util/__init__.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/vcf_standardization/util/af_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/vcf_standardization/util/dp_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.26 → phc-ingestion-0.8.28}/ingestion/vcf_standardization/util/read_write.py
RENAMED
|
File without changes
|