phc-ingestion 0.8.37__tar.gz → 0.8.38__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/PKG-INFO +1 -1
  2. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/json.py +10 -2
  3. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/metadata.py +9 -1
  4. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/vcf.py +42 -7
  5. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/vcf_etl.py +0 -1
  6. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/pyproject.toml +4 -3
  7. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/PYPI.md +0 -0
  8. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/__init__.py +0 -0
  9. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/__init__.py +0 -0
  10. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/process.py +0 -0
  11. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/__init__.py +0 -0
  12. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/cnv.py +0 -0
  13. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/detect_genome_ref.py +0 -0
  14. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/ga4gh.py +0 -0
  15. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/hla.py +0 -0
  16. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/ihc.py +0 -0
  17. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/interpretation.py +0 -0
  18. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/specimen_details.py +0 -0
  19. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/structural.py +0 -0
  20. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/tar.py +0 -0
  21. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/tests.py +0 -0
  22. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/tmb.py +0 -0
  23. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/tsv.py +0 -0
  24. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/__init__.py +0 -0
  25. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/process.py +0 -0
  26. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/__init__.py +0 -0
  27. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/cnv.py +0 -0
  28. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/fnv.py +0 -0
  29. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/ga4gh.py +0 -0
  30. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/interpretation.py +0 -0
  31. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/generic/__init__.py +0 -0
  32. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/generic/process.py +0 -0
  33. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/generic/utils.py +0 -0
  34. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/__init__.py +0 -0
  35. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/process.py +0 -0
  36. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/alteration_table.py +0 -0
  37. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/interpretation.py +0 -0
  38. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/manifest_helpers.py +0 -0
  39. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/nextgen_specific_genes.py +0 -0
  40. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -0
  41. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/process_cnv.py +0 -0
  42. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/process_manifest.py +0 -0
  43. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/process_structural.py +0 -0
  44. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/process_vcf.py +0 -0
  45. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/types.py +0 -0
  46. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/resources/GRCh37_map.csv.gz +0 -0
  47. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/resources/GRCh38_map.csv.gz +0 -0
  48. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/shared_util/__init__.py +0 -0
  49. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/shared_util/coords_to_genes.py +0 -0
  50. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/shared_util/gene_to_coords.py +0 -0
  51. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
  52. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/shared_util/types.py +0 -0
  53. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/Variant.py +0 -0
  54. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/__init__.py +0 -0
  55. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/standardize.py +0 -0
  56. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/__init__.py +0 -0
  57. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
  58. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
  59. {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/read_write.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.8.37
3
+ Version: 0.8.38
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -84,11 +84,17 @@ def process_caris_json(infile: str, outpath: str, file_name: str, source_file_id
84
84
  ]
85
85
 
86
86
  # Get patient
87
- metadata = extract_metadata(data, file_name, files, source_file_id, log)
87
+ metadata, is_test_cancelled_permit_vcf_skip = extract_metadata(
88
+ data, file_name, files, source_file_id, log
89
+ )
88
90
  structural_results = extract_structural(file_name, data, log)
89
91
  cnv_results = extract_cnv(file_name, data, log)
90
92
  rgel_results = convert_tsv_to_rgel(file_name, files, log)
91
- vcf_results = extract_sv(file_name, bool(somatic_filename), bool(germline_filename))
93
+
94
+ include_empty = metadata["ihcTests"] and is_test_cancelled_permit_vcf_skip
95
+ vcf_results = extract_sv(
96
+ file_name, bool(somatic_filename), bool(germline_filename), include_empty
97
+ )
92
98
 
93
99
  # We might not have any of these files but we need an empty json object here.
94
100
  file_genome_references = {}
@@ -117,5 +123,7 @@ def process_caris_json(infile: str, outpath: str, file_name: str, source_file_id
117
123
  result["somatic_vcf"] = f"{outpath}/{somatic_filename}"
118
124
  if germline_filename is not None:
119
125
  result["germline_vcf"] = f"{outpath}/{germline_filename}"
126
+ if not germline_filename and not somatic_filename and include_empty:
127
+ result["somatic_vcf"] = f"{outpath}/{file_name}.modified.somatic.vcf.gz"
120
128
 
121
129
  return (result, germline_case_id, file_genome_references, data)
@@ -80,6 +80,7 @@ def is_valid_test_entry(test: dict):
80
80
 
81
81
  # Build up the manifest iteratively because almost everything is optional
82
82
  def extract_metadata(data, prefix, files, source_file_id, log: Logger) -> dict:
83
+ is_test_cancelled_permit_vcf_skip = False
83
84
  metadata = {}
84
85
 
85
86
  test_details = data["testDetails"]
@@ -158,6 +159,13 @@ def extract_metadata(data, prefix, files, source_file_id, log: Logger) -> dict:
158
159
  # if not sufficient quantity we won't have test results
159
160
  if test_details["reportType"] != "QNS":
160
161
  for test in tests:
162
+ if "test_cancellation_reason" in test:
163
+ if test["test_cancellation_reason"] == "Quantitation quantity not sufficient":
164
+ # capture cancellation reason before bailing
165
+ # this is so we can generate an empty vcf so present biomarkers are
166
+ # still ingested: https://lifeomic.atlassian.net/browse/PHC-5748
167
+ is_test_cancelled_permit_vcf_skip = True
168
+
161
169
  if not is_valid_test_entry(test):
162
170
  continue
163
171
  # Sometimes, if there is only a single test result,
@@ -244,4 +252,4 @@ def extract_metadata(data, prefix, files, source_file_id, log: Logger) -> dict:
244
252
  )
245
253
 
246
254
  active_metadata = {k: v for k, v in metadata.items() if v is not None}
247
- return active_metadata
255
+ return (active_metadata, is_test_cancelled_permit_vcf_skip)
@@ -1,11 +1,5 @@
1
1
  import datetime
2
2
  import gzip
3
- import io
4
- import os
5
- import re
6
- import subprocess
7
- import sys
8
- import zipfile
9
3
 
10
4
  from logging import Logger
11
5
 
@@ -13,8 +7,38 @@ from ingestion.caris.util.tests import safely_extract_tests_from_json_data
13
7
  from ingestion.vcf_standardization.standardize import standardize_vcf
14
8
 
15
9
 
10
+ def create_empty_vcf_zip(prefix):
11
+ vcf_gzip_path = f"{prefix}.modified.somatic.vcf.gz"
12
+ content = (
13
+ """##fileformat=VCFv4.1
14
+ ##filedate="""
15
+ + datetime.datetime.now().isoformat()
16
+ + """
17
+ ##FILTER=<ID=PASS,Description="All filters passed">
18
+ ##FILTER=<ID=R8,Description="IndelRepeatLength is greater than 8">
19
+ ##FILTER=<ID=R8.1,Description="IndelRepeatLength of a monomer is greater than 8">
20
+ ##FILTER=<ID=R8.2,Description="IndelRepeatLength of a dimer is greater than 8">
21
+ ##FILTER=<ID=sb,Description="Variant strand bias high">
22
+ ##FILTER=<ID=sb.s,Description="Variant strand bias significantly high (only for SNV)">
23
+ ##FILTER=<ID=rs,Description="Variant with rs (dbSNP) number in a non-core gene">
24
+ ##FILTER=<ID=FP,Description="Possibly false positives due to high similarity to off-target regions">
25
+ ##FILTER=<ID=NC,Description="Noncoding INDELs on non-core genes">
26
+ ##FILTER=<ID=lowDP,Description="low depth variant">
27
+ ##FILTER=<ID=Benign,Description="Benign variant">
28
+ ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
29
+ ##FORMAT=<ID=AF,Number=1,Type=String,Description="Variant Allele Frequency">
30
+ #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT """
31
+ + prefix
32
+ + """
33
+ """
34
+ )
35
+
36
+ with gzip.open(vcf_gzip_path, "wb") as f:
37
+ f.write(content.encode("utf-8"))
38
+
39
+
16
40
  # This is done in next step, we are just adding to yaml
17
- def extract_sv(prefix, include_somatic: bool, include_germline: bool):
41
+ def extract_sv(prefix, include_somatic: bool, include_germline: bool, include_empty: bool):
18
42
  vcfs = []
19
43
 
20
44
  # Hard-code genome reference for Caris VCFs
@@ -40,6 +64,17 @@ def extract_sv(prefix, include_somatic: bool, include_germline: bool):
40
64
  }
41
65
  )
42
66
 
67
+ if not vcfs and include_empty:
68
+ create_empty_vcf_zip(prefix)
69
+ vcfs.append(
70
+ {
71
+ "fileName": f".lifeomic/caris/{prefix}/{prefix}.modified.somatic.nrm.filtered.vcf.gz",
72
+ "sequenceType": "somatic",
73
+ "type": "shortVariant",
74
+ "reference": genome_reference,
75
+ }
76
+ )
77
+
43
78
  return vcfs
44
79
 
45
80
 
@@ -1,6 +1,5 @@
1
1
  import gzip
2
2
  import xmltodict
3
- from natsort import natsorted
4
3
  from logging import Logger
5
4
  import re
6
5
  import os
@@ -1,19 +1,20 @@
1
1
  [project]
2
2
  name = "phc-ingestion"
3
- version = "0.8.37"
3
+ version = "0.8.38"
4
4
  description = "Functions for LifeOmic PHC genomic ingestions"
5
5
  authors = [
6
6
  { name = "LifeOmic Development", email = "development@lifeomic.com" },
7
7
  ]
8
8
  dependencies = [
9
9
  "lifeomic-logging>=0.3.2,<0.4.0",
10
- "xmltodict==0.13.0",
10
+ "xmltodict>=0.14.2",
11
11
  "natsort==7.1.1",
12
12
  "ruamel.yaml==0.17.21",
13
- "pandas>=1.5.0,<1.6.0",
13
+ "pandas>=2.2.3",
14
14
  "jsonschema>=4.16.0,<5.0.0",
15
15
  "schema>=0.7.5",
16
16
  "packaging>=23.1",
17
+ "numpy>=2.1.2",
17
18
  ]
18
19
  requires-python = ">=3.11"
19
20
  readme = "PYPI.md"
File without changes