phc-ingestion 0.8.37__tar.gz → 0.8.38__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/PKG-INFO +1 -1
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/json.py +10 -2
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/metadata.py +9 -1
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/vcf.py +42 -7
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/vcf_etl.py +0 -1
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/pyproject.toml +4 -3
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/PYPI.md +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/__init__.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/__init__.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/process.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/__init__.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/cnv.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/detect_genome_ref.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/ga4gh.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/hla.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/ihc.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/interpretation.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/specimen_details.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/structural.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/tar.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/tests.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/tmb.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/caris/util/tsv.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/__init__.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/process.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/__init__.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/cnv.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/fnv.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/ga4gh.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/foundation/util/interpretation.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/generic/__init__.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/generic/process.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/generic/utils.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/__init__.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/process.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/alteration_table.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/interpretation.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/manifest_helpers.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/nextgen_specific_genes.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/process_cnv.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/process_manifest.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/process_structural.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/process_vcf.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/types.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/resources/GRCh37_map.csv.gz +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/resources/GRCh38_map.csv.gz +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/shared_util/__init__.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/shared_util/coords_to_genes.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/shared_util/gene_to_coords.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/shared_util/types.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/Variant.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/__init__.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/standardize.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/__init__.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
- {phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/read_write.py +0 -0
|
@@ -84,11 +84,17 @@ def process_caris_json(infile: str, outpath: str, file_name: str, source_file_id
|
|
|
84
84
|
]
|
|
85
85
|
|
|
86
86
|
# Get patient
|
|
87
|
-
metadata = extract_metadata(
|
|
87
|
+
metadata, is_test_cancelled_permit_vcf_skip = extract_metadata(
|
|
88
|
+
data, file_name, files, source_file_id, log
|
|
89
|
+
)
|
|
88
90
|
structural_results = extract_structural(file_name, data, log)
|
|
89
91
|
cnv_results = extract_cnv(file_name, data, log)
|
|
90
92
|
rgel_results = convert_tsv_to_rgel(file_name, files, log)
|
|
91
|
-
|
|
93
|
+
|
|
94
|
+
include_empty = metadata["ihcTests"] and is_test_cancelled_permit_vcf_skip
|
|
95
|
+
vcf_results = extract_sv(
|
|
96
|
+
file_name, bool(somatic_filename), bool(germline_filename), include_empty
|
|
97
|
+
)
|
|
92
98
|
|
|
93
99
|
# We might not have any of these files but we need an empty json object here.
|
|
94
100
|
file_genome_references = {}
|
|
@@ -117,5 +123,7 @@ def process_caris_json(infile: str, outpath: str, file_name: str, source_file_id
|
|
|
117
123
|
result["somatic_vcf"] = f"{outpath}/{somatic_filename}"
|
|
118
124
|
if germline_filename is not None:
|
|
119
125
|
result["germline_vcf"] = f"{outpath}/{germline_filename}"
|
|
126
|
+
if not germline_filename and not somatic_filename and include_empty:
|
|
127
|
+
result["somatic_vcf"] = f"{outpath}/{file_name}.modified.somatic.vcf.gz"
|
|
120
128
|
|
|
121
129
|
return (result, germline_case_id, file_genome_references, data)
|
|
@@ -80,6 +80,7 @@ def is_valid_test_entry(test: dict):
|
|
|
80
80
|
|
|
81
81
|
# Build up the manifest iteratively because almost everything is optional
|
|
82
82
|
def extract_metadata(data, prefix, files, source_file_id, log: Logger) -> dict:
|
|
83
|
+
is_test_cancelled_permit_vcf_skip = False
|
|
83
84
|
metadata = {}
|
|
84
85
|
|
|
85
86
|
test_details = data["testDetails"]
|
|
@@ -158,6 +159,13 @@ def extract_metadata(data, prefix, files, source_file_id, log: Logger) -> dict:
|
|
|
158
159
|
# if not sufficient quantity we won't have test results
|
|
159
160
|
if test_details["reportType"] != "QNS":
|
|
160
161
|
for test in tests:
|
|
162
|
+
if "test_cancellation_reason" in test:
|
|
163
|
+
if test["test_cancellation_reason"] == "Quantitation quantity not sufficient":
|
|
164
|
+
# capture cancellation reason before bailing
|
|
165
|
+
# this is so we can generate an empty vcf so present biomarkers are
|
|
166
|
+
# still ingested: https://lifeomic.atlassian.net/browse/PHC-5748
|
|
167
|
+
is_test_cancelled_permit_vcf_skip = True
|
|
168
|
+
|
|
161
169
|
if not is_valid_test_entry(test):
|
|
162
170
|
continue
|
|
163
171
|
# Sometimes, if there is only a single test result,
|
|
@@ -244,4 +252,4 @@ def extract_metadata(data, prefix, files, source_file_id, log: Logger) -> dict:
|
|
|
244
252
|
)
|
|
245
253
|
|
|
246
254
|
active_metadata = {k: v for k, v in metadata.items() if v is not None}
|
|
247
|
-
return active_metadata
|
|
255
|
+
return (active_metadata, is_test_cancelled_permit_vcf_skip)
|
|
@@ -1,11 +1,5 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import gzip
|
|
3
|
-
import io
|
|
4
|
-
import os
|
|
5
|
-
import re
|
|
6
|
-
import subprocess
|
|
7
|
-
import sys
|
|
8
|
-
import zipfile
|
|
9
3
|
|
|
10
4
|
from logging import Logger
|
|
11
5
|
|
|
@@ -13,8 +7,38 @@ from ingestion.caris.util.tests import safely_extract_tests_from_json_data
|
|
|
13
7
|
from ingestion.vcf_standardization.standardize import standardize_vcf
|
|
14
8
|
|
|
15
9
|
|
|
10
|
+
def create_empty_vcf_zip(prefix):
|
|
11
|
+
vcf_gzip_path = f"{prefix}.modified.somatic.vcf.gz"
|
|
12
|
+
content = (
|
|
13
|
+
"""##fileformat=VCFv4.1
|
|
14
|
+
##filedate="""
|
|
15
|
+
+ datetime.datetime.now().isoformat()
|
|
16
|
+
+ """
|
|
17
|
+
##FILTER=<ID=PASS,Description="All filters passed">
|
|
18
|
+
##FILTER=<ID=R8,Description="IndelRepeatLength is greater than 8">
|
|
19
|
+
##FILTER=<ID=R8.1,Description="IndelRepeatLength of a monomer is greater than 8">
|
|
20
|
+
##FILTER=<ID=R8.2,Description="IndelRepeatLength of a dimer is greater than 8">
|
|
21
|
+
##FILTER=<ID=sb,Description="Variant strand bias high">
|
|
22
|
+
##FILTER=<ID=sb.s,Description="Variant strand bias significantly high (only for SNV)">
|
|
23
|
+
##FILTER=<ID=rs,Description="Variant with rs (dbSNP) number in a non-core gene">
|
|
24
|
+
##FILTER=<ID=FP,Description="Possibly false positives due to high similarity to off-target regions">
|
|
25
|
+
##FILTER=<ID=NC,Description="Noncoding INDELs on non-core genes">
|
|
26
|
+
##FILTER=<ID=lowDP,Description="low depth variant">
|
|
27
|
+
##FILTER=<ID=Benign,Description="Benign variant">
|
|
28
|
+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
|
|
29
|
+
##FORMAT=<ID=AF,Number=1,Type=String,Description="Variant Allele Frequency">
|
|
30
|
+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT """
|
|
31
|
+
+ prefix
|
|
32
|
+
+ """
|
|
33
|
+
"""
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
with gzip.open(vcf_gzip_path, "wb") as f:
|
|
37
|
+
f.write(content.encode("utf-8"))
|
|
38
|
+
|
|
39
|
+
|
|
16
40
|
# This is done in next step, we are just adding to yaml
|
|
17
|
-
def extract_sv(prefix, include_somatic: bool, include_germline: bool):
|
|
41
|
+
def extract_sv(prefix, include_somatic: bool, include_germline: bool, include_empty: bool):
|
|
18
42
|
vcfs = []
|
|
19
43
|
|
|
20
44
|
# Hard-code genome reference for Caris VCFs
|
|
@@ -40,6 +64,17 @@ def extract_sv(prefix, include_somatic: bool, include_germline: bool):
|
|
|
40
64
|
}
|
|
41
65
|
)
|
|
42
66
|
|
|
67
|
+
if not vcfs and include_empty:
|
|
68
|
+
create_empty_vcf_zip(prefix)
|
|
69
|
+
vcfs.append(
|
|
70
|
+
{
|
|
71
|
+
"fileName": f".lifeomic/caris/{prefix}/{prefix}.modified.somatic.nrm.filtered.vcf.gz",
|
|
72
|
+
"sequenceType": "somatic",
|
|
73
|
+
"type": "shortVariant",
|
|
74
|
+
"reference": genome_reference,
|
|
75
|
+
}
|
|
76
|
+
)
|
|
77
|
+
|
|
43
78
|
return vcfs
|
|
44
79
|
|
|
45
80
|
|
|
@@ -1,19 +1,20 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "phc-ingestion"
|
|
3
|
-
version = "0.8.
|
|
3
|
+
version = "0.8.38"
|
|
4
4
|
description = "Functions for LifeOmic PHC genomic ingestions"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "LifeOmic Development", email = "development@lifeomic.com" },
|
|
7
7
|
]
|
|
8
8
|
dependencies = [
|
|
9
9
|
"lifeomic-logging>=0.3.2,<0.4.0",
|
|
10
|
-
"xmltodict
|
|
10
|
+
"xmltodict>=0.14.2",
|
|
11
11
|
"natsort==7.1.1",
|
|
12
12
|
"ruamel.yaml==0.17.21",
|
|
13
|
-
"pandas>=
|
|
13
|
+
"pandas>=2.2.3",
|
|
14
14
|
"jsonschema>=4.16.0,<5.0.0",
|
|
15
15
|
"schema>=0.7.5",
|
|
16
16
|
"packaging>=23.1",
|
|
17
|
+
"numpy>=2.1.2",
|
|
17
18
|
]
|
|
18
19
|
requires-python = ">=3.11"
|
|
19
20
|
readme = "PYPI.md"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/nextgen_specific_genes.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/nextgen/util/pre_filter_somatic_vcf.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/__init__.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/af_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/dp_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.37 → phc-ingestion-0.8.38}/ingestion/vcf_standardization/util/read_write.py
RENAMED
|
File without changes
|