phc-ingestion 0.8.25__py3-none-any.whl → 0.8.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -9,6 +9,17 @@ def build_variant_key_from_vcf_line(line: str) -> str:
|
|
|
9
9
|
return f"{chrom}:{pos}:{ref}:{alt}"
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
def extract_filter_from_vcf_line(line: str) -> str:
|
|
13
|
+
split_line = line.strip().split("\t")
|
|
14
|
+
return split_line[6]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def replace_filter_in_vcf_line(line: str, new_filter: str) -> str:
|
|
18
|
+
split_line = line.strip().split("\t")
|
|
19
|
+
split_line[6] = new_filter
|
|
20
|
+
return "\t".join(split_line) + "\n"
|
|
21
|
+
|
|
22
|
+
|
|
12
23
|
def pre_filter_somatic_vcf(
|
|
13
24
|
somatic_vcf_file: str,
|
|
14
25
|
somatic_vcf_snv_file: str,
|
|
@@ -19,22 +30,25 @@ def pre_filter_somatic_vcf(
|
|
|
19
30
|
"""
|
|
20
31
|
Removes all variants from the `somatic_vcf_file` that are not
|
|
21
32
|
also in the `somatic_vcf_snv_file` or `somatic_vcf_indel_file`.
|
|
33
|
+
|
|
34
|
+
Also updates the FILTER field in the `somatic_vcf_file` to match
|
|
35
|
+
the FILTER field of the corresponding variant in the
|
|
36
|
+
`somatic_vcf_snv_file` or `somatic_vcf_indel_file`.
|
|
22
37
|
"""
|
|
23
38
|
log.info("Pre-filtering somatic VCF file")
|
|
24
39
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
valid_variant_keys.add(build_variant_key_from_vcf_line(line))
|
|
40
|
+
valid_variants_with_filters: dict[str, str] = {}
|
|
41
|
+
|
|
42
|
+
for file in [somatic_vcf_snv_file, somatic_vcf_indel_file]:
|
|
43
|
+
with open_maybe_gzipped(file, "rt") as f:
|
|
44
|
+
for line in f:
|
|
45
|
+
if line.startswith("#"):
|
|
46
|
+
continue
|
|
47
|
+
valid_variants_with_filters[build_variant_key_from_vcf_line(line)] = (
|
|
48
|
+
extract_filter_from_vcf_line(line)
|
|
49
|
+
)
|
|
36
50
|
|
|
37
|
-
log.info(f"Found {len(
|
|
51
|
+
log.info(f"Found {len(valid_variants_with_filters)} valid variants")
|
|
38
52
|
|
|
39
53
|
output_vcf_path = f"{working_dir}/filtered_somatic.vcf.gz"
|
|
40
54
|
with (
|
|
@@ -45,8 +59,9 @@ def pre_filter_somatic_vcf(
|
|
|
45
59
|
if line.startswith("#"):
|
|
46
60
|
w.write(line)
|
|
47
61
|
else:
|
|
48
|
-
|
|
49
|
-
|
|
62
|
+
key = build_variant_key_from_vcf_line(line)
|
|
63
|
+
if key in valid_variants_with_filters:
|
|
64
|
+
w.write(replace_filter_in_vcf_line(line, valid_variants_with_filters[key]))
|
|
50
65
|
|
|
51
66
|
log.info(f"Successfully pre-filtered somatic VCF file to {output_vcf_path}")
|
|
52
67
|
return output_vcf_path
|
|
@@ -33,7 +33,7 @@ ingestion/nextgen/process.py,sha256=GKiQ2dCxrR7tBD8TSP6Wk-TyoX3xBGip5zfpeT2buiQ,
|
|
|
33
33
|
ingestion/nextgen/util/alteration_table.py,sha256=KwpJCQv_rVsL30jkzgZn0bKdd205fjVodYBNTcK3D1s,4220
|
|
34
34
|
ingestion/nextgen/util/interpretation.py,sha256=ozuzb0vozff34zfP6AdOiUmI8Q77hI02jve_nCPZHfE,297
|
|
35
35
|
ingestion/nextgen/util/manifest_helpers.py,sha256=PpSay-pe62jk735nom1tVD9nDE8-CxmzzCrgpBhgtjY,1571
|
|
36
|
-
ingestion/nextgen/util/pre_filter_somatic_vcf.py,sha256=
|
|
36
|
+
ingestion/nextgen/util/pre_filter_somatic_vcf.py,sha256=K_gH4EnUXrKB22u_f8FqQVGrOS5LxXNsNO3VBn381eY,2301
|
|
37
37
|
ingestion/nextgen/util/process_cnv.py,sha256=m-AhsXFlYw4LTzgJJaj5vXYbK5n3H7cImzBxD2To6M0,2598
|
|
38
38
|
ingestion/nextgen/util/process_manifest.py,sha256=TAbCHwN_2m08jySn1J4qEd9Nrxjw9CrsspwFWack0V4,8448
|
|
39
39
|
ingestion/nextgen/util/process_structural.py,sha256=BXhwbRtFLTZsDi4ioSna1qmMtxVcsB0R-xNIvymm5Vw,5947
|
|
@@ -53,6 +53,6 @@ ingestion/vcf_standardization/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
|
|
|
53
53
|
ingestion/vcf_standardization/util/af_helpers.py,sha256=dpTzoeIQVeBRt0ETF3a9rp5ojZqznHg4x_hCZ8OPcOg,1061
|
|
54
54
|
ingestion/vcf_standardization/util/dp_helpers.py,sha256=Nq8oLOLObu4_pv16qwwgpALRlUoJVCULrd9cFOD-eoI,823
|
|
55
55
|
ingestion/vcf_standardization/util/read_write.py,sha256=IQotJ27To1MoQcRstc5AbHZtUuJz5cqkkZiHsDNaBvI,2471
|
|
56
|
-
phc_ingestion-0.8.
|
|
57
|
-
phc_ingestion-0.8.
|
|
58
|
-
phc_ingestion-0.8.
|
|
56
|
+
phc_ingestion-0.8.26.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
|
|
57
|
+
phc_ingestion-0.8.26.dist-info/METADATA,sha256=e1BZcEIqQwMUSnYlBbGz4BLsHPYVZg8UOBOxbtxiZ-A,552
|
|
58
|
+
phc_ingestion-0.8.26.dist-info/RECORD,,
|
|
File without changes
|