phc-ingestion 0.8.25__py3-none-any.whl → 0.8.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,17 @@ def build_variant_key_from_vcf_line(line: str) -> str:
9
9
  return f"{chrom}:{pos}:{ref}:{alt}"
10
10
 
11
11
 
12
+ def extract_filter_from_vcf_line(line: str) -> str:
13
+ split_line = line.strip().split("\t")
14
+ return split_line[6]
15
+
16
+
17
+ def replace_filter_in_vcf_line(line: str, new_filter: str) -> str:
18
+ split_line = line.strip().split("\t")
19
+ split_line[6] = new_filter
20
+ return "\t".join(split_line) + "\n"
21
+
22
+
12
23
  def pre_filter_somatic_vcf(
13
24
  somatic_vcf_file: str,
14
25
  somatic_vcf_snv_file: str,
@@ -19,22 +30,25 @@ def pre_filter_somatic_vcf(
19
30
  """
20
31
  Removes all variants from the `somatic_vcf_file` that are not
21
32
  also in the `somatic_vcf_snv_file` or `somatic_vcf_indel_file`.
33
+
34
+ Also updates the FILTER field in the `somatic_vcf_file` to match
35
+ the FILTER field of the corresponding variant in the
36
+ `somatic_vcf_snv_file` or `somatic_vcf_indel_file`.
22
37
  """
23
38
  log.info("Pre-filtering somatic VCF file")
24
39
 
25
- valid_variant_keys = set()
26
- with open_maybe_gzipped(somatic_vcf_snv_file, "rt") as f:
27
- for line in f:
28
- if line.startswith("#"):
29
- continue
30
- valid_variant_keys.add(build_variant_key_from_vcf_line(line))
31
- with open_maybe_gzipped(somatic_vcf_indel_file, "rt") as f:
32
- for line in f:
33
- if line.startswith("#"):
34
- continue
35
- valid_variant_keys.add(build_variant_key_from_vcf_line(line))
40
+ valid_variants_with_filters: dict[str, str] = {}
41
+
42
+ for file in [somatic_vcf_snv_file, somatic_vcf_indel_file]:
43
+ with open_maybe_gzipped(file, "rt") as f:
44
+ for line in f:
45
+ if line.startswith("#"):
46
+ continue
47
+ valid_variants_with_filters[build_variant_key_from_vcf_line(line)] = (
48
+ extract_filter_from_vcf_line(line)
49
+ )
36
50
 
37
- log.info(f"Found {len(valid_variant_keys)} valid variants")
51
+ log.info(f"Found {len(valid_variants_with_filters)} valid variants")
38
52
 
39
53
  output_vcf_path = f"{working_dir}/filtered_somatic.vcf.gz"
40
54
  with (
@@ -45,8 +59,9 @@ def pre_filter_somatic_vcf(
45
59
  if line.startswith("#"):
46
60
  w.write(line)
47
61
  else:
48
- if build_variant_key_from_vcf_line(line) in valid_variant_keys:
49
- w.write(line)
62
+ key = build_variant_key_from_vcf_line(line)
63
+ if key in valid_variants_with_filters:
64
+ w.write(replace_filter_in_vcf_line(line, valid_variants_with_filters[key]))
50
65
 
51
66
  log.info(f"Successfully pre-filtered somatic VCF file to {output_vcf_path}")
52
67
  return output_vcf_path
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.8.25
3
+ Version: 0.8.26
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -33,7 +33,7 @@ ingestion/nextgen/process.py,sha256=GKiQ2dCxrR7tBD8TSP6Wk-TyoX3xBGip5zfpeT2buiQ,
33
33
  ingestion/nextgen/util/alteration_table.py,sha256=KwpJCQv_rVsL30jkzgZn0bKdd205fjVodYBNTcK3D1s,4220
34
34
  ingestion/nextgen/util/interpretation.py,sha256=ozuzb0vozff34zfP6AdOiUmI8Q77hI02jve_nCPZHfE,297
35
35
  ingestion/nextgen/util/manifest_helpers.py,sha256=PpSay-pe62jk735nom1tVD9nDE8-CxmzzCrgpBhgtjY,1571
36
- ingestion/nextgen/util/pre_filter_somatic_vcf.py,sha256=beAtOpznVtUOWKCUNNv1UukNBSa0LKmhXsJ_3K5xk2E,1759
36
+ ingestion/nextgen/util/pre_filter_somatic_vcf.py,sha256=K_gH4EnUXrKB22u_f8FqQVGrOS5LxXNsNO3VBn381eY,2301
37
37
  ingestion/nextgen/util/process_cnv.py,sha256=m-AhsXFlYw4LTzgJJaj5vXYbK5n3H7cImzBxD2To6M0,2598
38
38
  ingestion/nextgen/util/process_manifest.py,sha256=TAbCHwN_2m08jySn1J4qEd9Nrxjw9CrsspwFWack0V4,8448
39
39
  ingestion/nextgen/util/process_structural.py,sha256=BXhwbRtFLTZsDi4ioSna1qmMtxVcsB0R-xNIvymm5Vw,5947
@@ -53,6 +53,6 @@ ingestion/vcf_standardization/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
53
53
  ingestion/vcf_standardization/util/af_helpers.py,sha256=dpTzoeIQVeBRt0ETF3a9rp5ojZqznHg4x_hCZ8OPcOg,1061
54
54
  ingestion/vcf_standardization/util/dp_helpers.py,sha256=Nq8oLOLObu4_pv16qwwgpALRlUoJVCULrd9cFOD-eoI,823
55
55
  ingestion/vcf_standardization/util/read_write.py,sha256=IQotJ27To1MoQcRstc5AbHZtUuJz5cqkkZiHsDNaBvI,2471
56
- phc_ingestion-0.8.25.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
57
- phc_ingestion-0.8.25.dist-info/METADATA,sha256=AU2E4U0eLh5aXCGiCmVsVloyruqmkF_oNdkv7rmAfBQ,552
58
- phc_ingestion-0.8.25.dist-info/RECORD,,
56
+ phc_ingestion-0.8.26.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
57
+ phc_ingestion-0.8.26.dist-info/METADATA,sha256=e1BZcEIqQwMUSnYlBbGz4BLsHPYVZg8UOBOxbtxiZ-A,552
58
+ phc_ingestion-0.8.26.dist-info/RECORD,,