phc-ingestion 0.8.25__tar.gz → 0.8.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/PKG-INFO +1 -1
  2. phc-ingestion-0.8.26/ingestion/nextgen/util/pre_filter_somatic_vcf.py +67 -0
  3. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/pyproject.toml +1 -1
  4. phc-ingestion-0.8.25/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -52
  5. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/PYPI.md +0 -0
  6. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/__init__.py +0 -0
  7. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/__init__.py +0 -0
  8. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/process.py +0 -0
  9. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/__init__.py +0 -0
  10. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/cnv.py +0 -0
  11. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/detect_genome_ref.py +0 -0
  12. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/ga4gh.py +0 -0
  13. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/hla.py +0 -0
  14. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/ihc.py +0 -0
  15. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/interpretation.py +0 -0
  16. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/json.py +0 -0
  17. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/metadata.py +0 -0
  18. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/specimen_details.py +0 -0
  19. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/structural.py +0 -0
  20. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/tar.py +0 -0
  21. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/tests.py +0 -0
  22. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/tmb.py +0 -0
  23. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/tsv.py +0 -0
  24. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/caris/util/vcf.py +0 -0
  25. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/foundation/__init__.py +0 -0
  26. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/foundation/process.py +0 -0
  27. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/foundation/util/__init__.py +0 -0
  28. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/foundation/util/cnv.py +0 -0
  29. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/foundation/util/fnv.py +0 -0
  30. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/foundation/util/ga4gh.py +0 -0
  31. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/foundation/util/interpretation.py +0 -0
  32. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/foundation/util/vcf_etl.py +0 -0
  33. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/generic/__init__.py +0 -0
  34. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/generic/process.py +0 -0
  35. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/generic/utils.py +0 -0
  36. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/nextgen/__init__.py +0 -0
  37. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/nextgen/process.py +0 -0
  38. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/nextgen/util/alteration_table.py +0 -0
  39. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/nextgen/util/interpretation.py +0 -0
  40. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/nextgen/util/manifest_helpers.py +0 -0
  41. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/nextgen/util/process_cnv.py +0 -0
  42. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/nextgen/util/process_manifest.py +0 -0
  43. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/nextgen/util/process_structural.py +0 -0
  44. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/nextgen/util/process_vcf.py +0 -0
  45. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/nextgen/util/types.py +0 -0
  46. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/resources/GRCh37_map.csv.gz +0 -0
  47. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/resources/GRCh38_map.csv.gz +0 -0
  48. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/shared_util/__init__.py +0 -0
  49. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/shared_util/coords_to_genes.py +0 -0
  50. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/shared_util/gene_to_coords.py +0 -0
  51. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
  52. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/shared_util/types.py +0 -0
  53. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/Variant.py +0 -0
  54. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/__init__.py +0 -0
  55. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/standardize.py +0 -0
  56. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/__init__.py +0 -0
  57. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
  58. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
  59. {phc-ingestion-0.8.25 → phc-ingestion-0.8.26}/ingestion/vcf_standardization/util/read_write.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.8.25
3
+ Version: 0.8.26
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -0,0 +1,67 @@
1
+ from logging import Logger
2
+
3
+ from ingestion.shared_util.open_maybe_gzipped import open_maybe_gzipped
4
+
5
+
6
+ def build_variant_key_from_vcf_line(line: str) -> str:
7
+ split_line = line.strip().split("\t")
8
+ chrom, pos, ref, alt = split_line[0], split_line[1], split_line[3], split_line[4]
9
+ return f"{chrom}:{pos}:{ref}:{alt}"
10
+
11
+
12
+ def extract_filter_from_vcf_line(line: str) -> str:
13
+ split_line = line.strip().split("\t")
14
+ return split_line[6]
15
+
16
+
17
+ def replace_filter_in_vcf_line(line: str, new_filter: str) -> str:
18
+ split_line = line.strip().split("\t")
19
+ split_line[6] = new_filter
20
+ return "\t".join(split_line) + "\n"
21
+
22
+
23
+ def pre_filter_somatic_vcf(
24
+ somatic_vcf_file: str,
25
+ somatic_vcf_snv_file: str,
26
+ somatic_vcf_indel_file: str,
27
+ working_dir: str,
28
+ log: Logger,
29
+ ) -> str:
30
+ """
31
+ Removes all variants from the `somatic_vcf_file` that are not
32
+ also in the `somatic_vcf_snv_file` or `somatic_vcf_indel_file`.
33
+
34
+ Also updates the FILTER field in the `somatic_vcf_file` to match
35
+ the FILTER field of the corresponding variant in the
36
+ `somatic_vcf_snv_file` or `somatic_vcf_indel_file`.
37
+ """
38
+ log.info("Pre-filtering somatic VCF file")
39
+
40
+ valid_variants_with_filters: dict[str, str] = {}
41
+
42
+ for file in [somatic_vcf_snv_file, somatic_vcf_indel_file]:
43
+ with open_maybe_gzipped(file, "rt") as f:
44
+ for line in f:
45
+ if line.startswith("#"):
46
+ continue
47
+ valid_variants_with_filters[build_variant_key_from_vcf_line(line)] = (
48
+ extract_filter_from_vcf_line(line)
49
+ )
50
+
51
+ log.info(f"Found {len(valid_variants_with_filters)} valid variants")
52
+
53
+ output_vcf_path = f"{working_dir}/filtered_somatic.vcf.gz"
54
+ with (
55
+ open_maybe_gzipped(somatic_vcf_file, "rt") as f,
56
+ open_maybe_gzipped(output_vcf_path, "wt") as w,
57
+ ):
58
+ for line in f:
59
+ if line.startswith("#"):
60
+ w.write(line)
61
+ else:
62
+ key = build_variant_key_from_vcf_line(line)
63
+ if key in valid_variants_with_filters:
64
+ w.write(replace_filter_in_vcf_line(line, valid_variants_with_filters[key]))
65
+
66
+ log.info(f"Successfully pre-filtered somatic VCF file to {output_vcf_path}")
67
+ return output_vcf_path
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "phc-ingestion"
3
- version = "0.8.25"
3
+ version = "0.8.26"
4
4
  description = "Functions for LifeOmic PHC genomic ingestions"
5
5
  authors = [
6
6
  { name = "LifeOmic Development", email = "development@lifeomic.com" },
@@ -1,52 +0,0 @@
1
- from logging import Logger
2
-
3
- from ingestion.shared_util.open_maybe_gzipped import open_maybe_gzipped
4
-
5
-
6
- def build_variant_key_from_vcf_line(line: str) -> str:
7
- split_line = line.strip().split("\t")
8
- chrom, pos, ref, alt = split_line[0], split_line[1], split_line[3], split_line[4]
9
- return f"{chrom}:{pos}:{ref}:{alt}"
10
-
11
-
12
- def pre_filter_somatic_vcf(
13
- somatic_vcf_file: str,
14
- somatic_vcf_snv_file: str,
15
- somatic_vcf_indel_file: str,
16
- working_dir: str,
17
- log: Logger,
18
- ) -> str:
19
- """
20
- Removes all variants from the `somatic_vcf_file` that are not
21
- also in the `somatic_vcf_snv_file` or `somatic_vcf_indel_file`.
22
- """
23
- log.info("Pre-filtering somatic VCF file")
24
-
25
- valid_variant_keys = set()
26
- with open_maybe_gzipped(somatic_vcf_snv_file, "rt") as f:
27
- for line in f:
28
- if line.startswith("#"):
29
- continue
30
- valid_variant_keys.add(build_variant_key_from_vcf_line(line))
31
- with open_maybe_gzipped(somatic_vcf_indel_file, "rt") as f:
32
- for line in f:
33
- if line.startswith("#"):
34
- continue
35
- valid_variant_keys.add(build_variant_key_from_vcf_line(line))
36
-
37
- log.info(f"Found {len(valid_variant_keys)} valid variants")
38
-
39
- output_vcf_path = f"{working_dir}/filtered_somatic.vcf.gz"
40
- with (
41
- open_maybe_gzipped(somatic_vcf_file, "rt") as f,
42
- open_maybe_gzipped(output_vcf_path, "wt") as w,
43
- ):
44
- for line in f:
45
- if line.startswith("#"):
46
- w.write(line)
47
- else:
48
- if build_variant_key_from_vcf_line(line) in valid_variant_keys:
49
- w.write(line)
50
-
51
- log.info(f"Successfully pre-filtered somatic VCF file to {output_vcf_path}")
52
- return output_vcf_path
File without changes