phc-ingestion 0.8.24__tar.gz → 0.8.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/PKG-INFO +1 -1
- phc-ingestion-0.8.26/ingestion/nextgen/util/pre_filter_somatic_vcf.py +67 -0
- phc-ingestion-0.8.26/ingestion/shared_util/open_maybe_gzipped.py +8 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/pyproject.toml +1 -1
- phc-ingestion-0.8.24/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -48
- phc-ingestion-0.8.24/ingestion/shared_util/open_maybe_gzipped.py +0 -5
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/PYPI.md +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/__init__.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/__init__.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/process.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/__init__.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/cnv.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/detect_genome_ref.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/ga4gh.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/hla.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/ihc.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/interpretation.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/json.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/metadata.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/specimen_details.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/structural.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/tar.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/tests.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/tmb.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/tsv.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/caris/util/vcf.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/foundation/__init__.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/foundation/process.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/foundation/util/__init__.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/foundation/util/cnv.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/foundation/util/fnv.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/foundation/util/ga4gh.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/foundation/util/interpretation.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/foundation/util/vcf_etl.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/generic/__init__.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/generic/process.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/generic/utils.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/nextgen/__init__.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/nextgen/process.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/nextgen/util/alteration_table.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/nextgen/util/interpretation.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/nextgen/util/manifest_helpers.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/nextgen/util/process_cnv.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/nextgen/util/process_manifest.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/nextgen/util/process_structural.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/nextgen/util/process_vcf.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/nextgen/util/types.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/resources/GRCh37_map.csv.gz +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/resources/GRCh38_map.csv.gz +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/shared_util/__init__.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/shared_util/coords_to_genes.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/shared_util/gene_to_coords.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/shared_util/types.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/Variant.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/__init__.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/standardize.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/__init__.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
- {phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/read_write.py +0 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from logging import Logger
|
|
2
|
+
|
|
3
|
+
from ingestion.shared_util.open_maybe_gzipped import open_maybe_gzipped
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def build_variant_key_from_vcf_line(line: str) -> str:
|
|
7
|
+
split_line = line.strip().split("\t")
|
|
8
|
+
chrom, pos, ref, alt = split_line[0], split_line[1], split_line[3], split_line[4]
|
|
9
|
+
return f"{chrom}:{pos}:{ref}:{alt}"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def extract_filter_from_vcf_line(line: str) -> str:
|
|
13
|
+
split_line = line.strip().split("\t")
|
|
14
|
+
return split_line[6]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def replace_filter_in_vcf_line(line: str, new_filter: str) -> str:
|
|
18
|
+
split_line = line.strip().split("\t")
|
|
19
|
+
split_line[6] = new_filter
|
|
20
|
+
return "\t".join(split_line) + "\n"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def pre_filter_somatic_vcf(
|
|
24
|
+
somatic_vcf_file: str,
|
|
25
|
+
somatic_vcf_snv_file: str,
|
|
26
|
+
somatic_vcf_indel_file: str,
|
|
27
|
+
working_dir: str,
|
|
28
|
+
log: Logger,
|
|
29
|
+
) -> str:
|
|
30
|
+
"""
|
|
31
|
+
Removes all variants from the `somatic_vcf_file` that are not
|
|
32
|
+
also in the `somatic_vcf_snv_file` or `somatic_vcf_indel_file`.
|
|
33
|
+
|
|
34
|
+
Also updates the FILTER field in the `somatic_vcf_file` to match
|
|
35
|
+
the FILTER field of the corresponding variant in the
|
|
36
|
+
`somatic_vcf_snv_file` or `somatic_vcf_indel_file`.
|
|
37
|
+
"""
|
|
38
|
+
log.info("Pre-filtering somatic VCF file")
|
|
39
|
+
|
|
40
|
+
valid_variants_with_filters: dict[str, str] = {}
|
|
41
|
+
|
|
42
|
+
for file in [somatic_vcf_snv_file, somatic_vcf_indel_file]:
|
|
43
|
+
with open_maybe_gzipped(file, "rt") as f:
|
|
44
|
+
for line in f:
|
|
45
|
+
if line.startswith("#"):
|
|
46
|
+
continue
|
|
47
|
+
valid_variants_with_filters[build_variant_key_from_vcf_line(line)] = (
|
|
48
|
+
extract_filter_from_vcf_line(line)
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
log.info(f"Found {len(valid_variants_with_filters)} valid variants")
|
|
52
|
+
|
|
53
|
+
output_vcf_path = f"{working_dir}/filtered_somatic.vcf.gz"
|
|
54
|
+
with (
|
|
55
|
+
open_maybe_gzipped(somatic_vcf_file, "rt") as f,
|
|
56
|
+
open_maybe_gzipped(output_vcf_path, "wt") as w,
|
|
57
|
+
):
|
|
58
|
+
for line in f:
|
|
59
|
+
if line.startswith("#"):
|
|
60
|
+
w.write(line)
|
|
61
|
+
else:
|
|
62
|
+
key = build_variant_key_from_vcf_line(line)
|
|
63
|
+
if key in valid_variants_with_filters:
|
|
64
|
+
w.write(replace_filter_in_vcf_line(line, valid_variants_with_filters[key]))
|
|
65
|
+
|
|
66
|
+
log.info(f"Successfully pre-filtered somatic VCF file to {output_vcf_path}")
|
|
67
|
+
return output_vcf_path
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
import gzip
|
|
2
|
-
from logging import Logger
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def build_variant_key_from_vcf_line(line: str) -> str:
|
|
6
|
-
split_line = line.strip().split("\t")
|
|
7
|
-
chrom, pos, ref, alt = split_line[0], split_line[1], split_line[3], split_line[4]
|
|
8
|
-
return f"{chrom}:{pos}:{ref}:{alt}"
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def pre_filter_somatic_vcf(
|
|
12
|
-
somatic_vcf_file: str,
|
|
13
|
-
somatic_vcf_snv_file: str,
|
|
14
|
-
somatic_vcf_indel_file: str,
|
|
15
|
-
working_dir: str,
|
|
16
|
-
log: Logger,
|
|
17
|
-
) -> str:
|
|
18
|
-
"""
|
|
19
|
-
Removes all variants from the `somatic_vcf_file` that are not
|
|
20
|
-
also in the `somatic_vcf_snv_file` or `somatic_vcf_indel_file`.
|
|
21
|
-
"""
|
|
22
|
-
log.info("Pre-filtering somatic VCF file")
|
|
23
|
-
|
|
24
|
-
valid_variant_keys = set()
|
|
25
|
-
with gzip.open(somatic_vcf_snv_file, "rt") as f:
|
|
26
|
-
for line in f:
|
|
27
|
-
if line.startswith("#"):
|
|
28
|
-
continue
|
|
29
|
-
valid_variant_keys.add(build_variant_key_from_vcf_line(line))
|
|
30
|
-
with gzip.open(somatic_vcf_indel_file, "rt") as f:
|
|
31
|
-
for line in f:
|
|
32
|
-
if line.startswith("#"):
|
|
33
|
-
continue
|
|
34
|
-
valid_variant_keys.add(build_variant_key_from_vcf_line(line))
|
|
35
|
-
|
|
36
|
-
log.info(f"Found {len(valid_variant_keys)} valid variants")
|
|
37
|
-
|
|
38
|
-
output_vcf_path = f"{working_dir}/filtered_somatic.vcf.gz"
|
|
39
|
-
with gzip.open(somatic_vcf_file, "rt") as f, gzip.open(output_vcf_path, "wt") as w:
|
|
40
|
-
for line in f:
|
|
41
|
-
if line.startswith("#"):
|
|
42
|
-
w.write(line)
|
|
43
|
-
else:
|
|
44
|
-
if build_variant_key_from_vcf_line(line) in valid_variant_keys:
|
|
45
|
-
w.write(line)
|
|
46
|
-
|
|
47
|
-
log.info(f"Successfully pre-filtered somatic VCF file to {output_vcf_path}")
|
|
48
|
-
return output_vcf_path
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/__init__.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/af_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/dp_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.24 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/read_write.py
RENAMED
|
File without changes
|