phc-ingestion 0.8.26__py3-none-any.whl → 0.8.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,7 +46,7 @@ def process(
46
46
  prefix=case_id,
47
47
  log=log,
48
48
  )
49
- structural_path_name = process_structural(
49
+ structural_path_name, translocations = process_structural(
50
50
  xml_in_file=vendor_files["xmlFile"],
51
51
  sv_in_file=vendor_files["somaticSvVcfFile"],
52
52
  root_path=local_output_dir,
@@ -59,6 +59,7 @@ def process(
59
59
  prefix=case_id,
60
60
  include_copy_number=bool(cnv_path_name),
61
61
  include_structural=bool(structural_path_name),
62
+ somatic_translocations=translocations,
62
63
  log=log,
63
64
  )
64
65
  pre_filtered_somatic_vcf_path = pre_filter_somatic_vcf(
@@ -0,0 +1,29 @@
1
+ from typing import TypedDict
2
+
3
+
4
+ class GeneWithLocation(TypedDict):
5
+ gene: str
6
+ chr: str
7
+ start: int
8
+ end: int
9
+
10
+
11
+ nextgen_specific_genes_with_location: list[GeneWithLocation] = [
12
+ {"gene": "IGK", "chr": "chr2", "start": 88852034, "end": 90258119},
13
+ {"gene": "NSD2", "chr": "chr4", "start": 1792518, "end": 1940193},
14
+ {"gene": "CCND3", "chr": "chr6", "start": 41920534, "end": 42562008},
15
+ {"gene": "MYC", "chr": "chr8", "start": 125309416, "end": 129673293},
16
+ {"gene": "CCND1", "chr": "chr11", "start": 69090733, "end": 69656860},
17
+ {"gene": "IGH", "chr": "chr14", "start": 105578834, "end": 109902208},
18
+ {"gene": "MAF", "chr": "chr16", "start": 78428398, "end": 79615096},
19
+ {"gene": "MAFB", "chr": "chr20", "start": 39039005, "end": 40688948},
20
+ {"gene": "IGL", "chr": "chr22", "start": 22012552, "end": 22965858},
21
+ ]
22
+ nextgen_specific_genes: set[str] = {gene["gene"] for gene in nextgen_specific_genes_with_location}
23
+
24
+
25
+ def maybe_get_matching_gene_for_location(chr: str, position: int) -> str | None:
26
+ for gene in nextgen_specific_genes_with_location:
27
+ if gene["chr"] == chr and gene["start"] <= position <= gene["end"]:
28
+ return gene["gene"]
29
+ return None
@@ -176,6 +176,7 @@ def process_manifest(
176
176
  prefix: str,
177
177
  include_copy_number: bool,
178
178
  include_structural: bool,
179
+ somatic_translocations: list[str],
179
180
  log: Logger,
180
181
  ):
181
182
  test_text = extract_xml_text(xml_in_file)
@@ -186,6 +187,8 @@ def process_manifest(
186
187
  hyperdiploidy_chromosomes = manifest_helpers.extract_hyperdiploidy_chromosomes(xml_in_file, log)
187
188
  if hyperdiploidy_chromosomes:
188
189
  manifest["hyperdiploidyTrisomies"] = hyperdiploidy_chromosomes
190
+ if somatic_translocations:
191
+ manifest["somaticTranslocations"] = somatic_translocations
189
192
 
190
193
  manifest["reportFile"] = f".lifeomic/nextgen/{prefix}/{prefix}.pdf"
191
194
  manifest["sourceFileId"] = source_file_id
@@ -5,6 +5,10 @@ from typing import TypedDict
5
5
  from ingestion.shared_util.coords_to_genes import coords_to_genes
6
6
  from ingestion.nextgen.util.alteration_table import extract_variant_table
7
7
  from ingestion.nextgen.util.interpretation import map_interpretation
8
+ from ingestion.nextgen.util.nextgen_specific_genes import (
9
+ maybe_get_matching_gene_for_location,
10
+ nextgen_specific_genes,
11
+ )
8
12
  from ingestion.shared_util.open_maybe_gzipped import open_maybe_gzipped
9
13
 
10
14
 
@@ -42,9 +46,26 @@ def is_del_dup_or_ins(variant: list[str]) -> bool:
42
46
  return any([x in variant[2] for x in ["MantaDEL", "MantaDUP", "MantaINS"]])
43
47
 
44
48
 
49
+ def get_gene_from_coords(
50
+ chromosome: str, start_position: str, end_position: str, log: Logger
51
+ ) -> str:
52
+ """
53
+ A number of genes of interest with specific start and end positions have been provided.
54
+ If a variant falls within the start and end positions of one of those genes of interest, that gene will be used.
55
+ Otherwise, we fall back to the standard gene lookup.
56
+ """
57
+ center_position = int((int(start_position) + int(end_position)) / 2)
58
+
59
+ gene = maybe_get_matching_gene_for_location(chromosome, center_position)
60
+ if gene:
61
+ return gene
62
+
63
+ return coords_to_genes("GRCh38", chromosome, center_position, log)
64
+
65
+
45
66
  def process_structural(
46
67
  sv_in_file: str, xml_in_file, root_path: str, prefix: str, log: Logger
47
- ) -> str | None:
68
+ ) -> tuple[str | None, list[str]]:
48
69
  structural_variant_table = extract_variant_table(
49
70
  xml_in_file=xml_in_file, variant_type="structural", log=log
50
71
  )
@@ -74,9 +95,7 @@ def process_structural(
74
95
  effect = "insertion"
75
96
 
76
97
  # Get genes from coordinates using center point of start and end positions
77
- gene1 = coords_to_genes(
78
- "GRCh38", chromosome1, int((int(start_position1) + int(end_position1)) / 2), log
79
- )
98
+ gene1 = get_gene_from_coords(chromosome1, start_position1, end_position1, log)
80
99
  gene2 = "N/A"
81
100
 
82
101
  else:
@@ -89,12 +108,8 @@ def process_structural(
89
108
  effect = "translocation"
90
109
 
91
110
  # Get genes from coordinates using center point of start and end positions
92
- gene1 = coords_to_genes(
93
- "GRCh38", chromosome1, int((int(start_position1) + int(end_position1)) / 2), log
94
- )
95
- gene2 = coords_to_genes(
96
- "GRCh38", chromosome2, int((int(start_position2) + int(end_position2)) / 2), log
97
- )
111
+ gene1 = get_gene_from_coords(chromosome1, start_position1, end_position1, log)
112
+ gene2 = get_gene_from_coords(chromosome2, start_position2, end_position2, log)
98
113
 
99
114
  # Scrape interpretation
100
115
  interpretation = "unknown"
@@ -149,7 +164,7 @@ def process_structural(
149
164
 
150
165
  if not deduped_structural_variants:
151
166
  log.info(f"Ignoring empty structural variant file {sv_in_file}")
152
- return None
167
+ return (None, [])
153
168
 
154
169
  log.info(f"Saving file to {structural_variant_path_name}")
155
170
  with open(structural_variant_path_name, "w+") as f:
@@ -159,4 +174,24 @@ def process_structural(
159
174
  for sv in deduped_structural_variants:
160
175
  f.write(structural_variant_to_csv_row(sv))
161
176
 
162
- return structural_variant_path_name
177
+ log.info("Finding structural variant translocations for genes of interest")
178
+ translocations = [sv for sv in deduped_structural_variants if sv["effect"] == "translocation"]
179
+ formatted_translocations: set[str] = set()
180
+ for translocation in translocations:
181
+ gene1, gene2 = translocation["gene1"], translocation["gene2"]
182
+ # MYC is a special case
183
+ if gene1 == "MYC" or gene2 == "MYC":
184
+ formatted_translocations.add("t(MYC)")
185
+ continue
186
+ if gene1 in nextgen_specific_genes and gene2 in nextgen_specific_genes:
187
+ chr1, chr2 = int(translocation["position1"][0][3:]), int(
188
+ translocation["position2"][0][3:]
189
+ )
190
+ # Ensure chromosomes are in ascending order
191
+ if chr1 > chr2:
192
+ chr1, chr2 = chr2, chr1
193
+ formatted_translocations.add(f"t({chr1};{chr2})")
194
+
195
+ log.info(f"Found {len(formatted_translocations)} translocations for genes of interest")
196
+
197
+ return structural_variant_path_name, list(formatted_translocations)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.8.26
3
+ Version: 0.8.28
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -29,14 +29,15 @@ ingestion/generic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
29
29
  ingestion/generic/process.py,sha256=WJHV_-SKhrDZ3JS3fm9DVMoW3Zs2t50GiraSV3vlLHE,1548
30
30
  ingestion/generic/utils.py,sha256=1MEIru7uq38IjUdL8lcHqDH0oTki9uWrz1f2e-pmRoU,2814
31
31
  ingestion/nextgen/__init__.py,sha256=7LQ-h_Bvc5P1QcHMdzsqi1Qm4fTJn04-ozar2ty9wSc,59
32
- ingestion/nextgen/process.py,sha256=GKiQ2dCxrR7tBD8TSP6Wk-TyoX3xBGip5zfpeT2buiQ,3782
32
+ ingestion/nextgen/process.py,sha256=F0Ms8rTr_4boWPpE13D39C3ljFtyIVtw9XIIjCVI6f8,3849
33
33
  ingestion/nextgen/util/alteration_table.py,sha256=KwpJCQv_rVsL30jkzgZn0bKdd205fjVodYBNTcK3D1s,4220
34
34
  ingestion/nextgen/util/interpretation.py,sha256=ozuzb0vozff34zfP6AdOiUmI8Q77hI02jve_nCPZHfE,297
35
35
  ingestion/nextgen/util/manifest_helpers.py,sha256=PpSay-pe62jk735nom1tVD9nDE8-CxmzzCrgpBhgtjY,1571
36
+ ingestion/nextgen/util/nextgen_specific_genes.py,sha256=II_E2AgAqv35u_ga25geRn6UHuZy_Uk9itfyu_HybFY,1211
36
37
  ingestion/nextgen/util/pre_filter_somatic_vcf.py,sha256=K_gH4EnUXrKB22u_f8FqQVGrOS5LxXNsNO3VBn381eY,2301
37
38
  ingestion/nextgen/util/process_cnv.py,sha256=m-AhsXFlYw4LTzgJJaj5vXYbK5n3H7cImzBxD2To6M0,2598
38
- ingestion/nextgen/util/process_manifest.py,sha256=TAbCHwN_2m08jySn1J4qEd9Nrxjw9CrsspwFWack0V4,8448
39
- ingestion/nextgen/util/process_structural.py,sha256=BXhwbRtFLTZsDi4ioSna1qmMtxVcsB0R-xNIvymm5Vw,5947
39
+ ingestion/nextgen/util/process_manifest.py,sha256=FOa-m78layb5TFTktaDHkHT9hAGUeH9ZPGeqgBncz64,8585
40
+ ingestion/nextgen/util/process_structural.py,sha256=sdAMcsgXm1lUbvNN6URSTwMfRoKDuOzPG2NFtKyhL0I,7593
40
41
  ingestion/nextgen/util/process_vcf.py,sha256=TvyV5wyXXSpFy2lc6h3ljqVQ5VeDPGxihHl2_K6LalQ,8432
41
42
  ingestion/nextgen/util/types.py,sha256=SSzt5gv-kss1PR45eQUelypWrGI-dAfQMO3GSD-T-Wg,22
42
43
  ingestion/resources/GRCh37_map.csv.gz,sha256=JOEkjtbYrJpIdyoZdCvfJhvvz2dNfkSve7lXSXkCCD8,408290
@@ -53,6 +54,6 @@ ingestion/vcf_standardization/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
53
54
  ingestion/vcf_standardization/util/af_helpers.py,sha256=dpTzoeIQVeBRt0ETF3a9rp5ojZqznHg4x_hCZ8OPcOg,1061
54
55
  ingestion/vcf_standardization/util/dp_helpers.py,sha256=Nq8oLOLObu4_pv16qwwgpALRlUoJVCULrd9cFOD-eoI,823
55
56
  ingestion/vcf_standardization/util/read_write.py,sha256=IQotJ27To1MoQcRstc5AbHZtUuJz5cqkkZiHsDNaBvI,2471
56
- phc_ingestion-0.8.26.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
57
- phc_ingestion-0.8.26.dist-info/METADATA,sha256=e1BZcEIqQwMUSnYlBbGz4BLsHPYVZg8UOBOxbtxiZ-A,552
58
- phc_ingestion-0.8.26.dist-info/RECORD,,
57
+ phc_ingestion-0.8.28.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
58
+ phc_ingestion-0.8.28.dist-info/METADATA,sha256=sNMcZ4aHN1IukmJRigUx9yGU-QMfnN9a7im0ULXoUcA,552
59
+ phc_ingestion-0.8.28.dist-info/RECORD,,