phc-ingestion 0.8.35__tar.gz → 0.8.37__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/PKG-INFO +1 -1
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/process.py +1 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/util/interpretation.py +4 -3
- phc-ingestion-0.8.37/ingestion/nextgen/util/manifest_helpers.py +88 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/util/process_manifest.py +9 -9
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/pyproject.toml +1 -1
- phc-ingestion-0.8.35/ingestion/nextgen/util/manifest_helpers.py +0 -42
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/PYPI.md +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/__init__.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/__init__.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/process.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/__init__.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/cnv.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/detect_genome_ref.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/ga4gh.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/hla.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/ihc.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/interpretation.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/json.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/metadata.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/specimen_details.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/structural.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/tar.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/tests.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/tmb.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/tsv.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/caris/util/vcf.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/foundation/__init__.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/foundation/process.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/foundation/util/__init__.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/foundation/util/cnv.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/foundation/util/fnv.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/foundation/util/ga4gh.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/foundation/util/interpretation.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/foundation/util/vcf_etl.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/generic/__init__.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/generic/process.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/generic/utils.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/__init__.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/util/alteration_table.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/util/nextgen_specific_genes.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/util/process_cnv.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/util/process_structural.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/util/process_vcf.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/util/types.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/resources/GRCh37_map.csv.gz +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/resources/GRCh38_map.csv.gz +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/shared_util/__init__.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/shared_util/coords_to_genes.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/shared_util/gene_to_coords.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/shared_util/types.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/vcf_standardization/Variant.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/vcf_standardization/__init__.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/vcf_standardization/standardize.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/vcf_standardization/util/__init__.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
- {phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/vcf_standardization/util/read_write.py +0 -0
|
@@ -18,11 +18,12 @@ def map_vendsig(vendsig: str) -> str:
|
|
|
18
18
|
"""
|
|
19
19
|
Map vendor significance for short variants
|
|
20
20
|
"""
|
|
21
|
-
|
|
21
|
+
vendsig_lower = vendsig.lower()
|
|
22
|
+
if vendsig_lower in ["pathogenic"]:
|
|
22
23
|
return "VENDSIG=Pathogenic"
|
|
23
|
-
elif
|
|
24
|
+
elif vendsig_lower in ["likely pathogenic", "likelypathogenic"]:
|
|
24
25
|
return "VENDSIG=Likely pathogenic"
|
|
25
|
-
elif
|
|
26
|
+
elif vendsig_lower in ["vus"]:
|
|
26
27
|
return "VENDSIG=Uncertain significance"
|
|
27
28
|
else:
|
|
28
29
|
raise RuntimeError(f"Unable to map vendor significance: {vendsig}")
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from logging import Logger
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def parse_pattern(pattern: str, line: str, name: str) -> str:
|
|
6
|
+
regex = re.compile(pattern)
|
|
7
|
+
match = regex.match(line)
|
|
8
|
+
|
|
9
|
+
if not match:
|
|
10
|
+
raise ValueError(f"Could not parse {name} from line")
|
|
11
|
+
|
|
12
|
+
return match.group(1).strip()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def parse_indication(line: str) -> str:
|
|
16
|
+
return parse_pattern(r"^.*Reason for Referral:(.*?)(Patient|<).*$", line, "indication")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def parse_ordering_md(line: str) -> str:
|
|
20
|
+
return parse_pattern(r"^.*Physician Name:(.*?)(Reason|<).*$", line, "ordering MD")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def parse_patient_name(line: str) -> str:
|
|
24
|
+
return parse_pattern(r"^.*Patient Name: (.*?)(Accession|<).*$", line, "patient name")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def parse_sample_number(line: str) -> str:
|
|
28
|
+
return parse_pattern(r"^.*Specimen #: (\d*) .*$", line, "sample number")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def parse_body_site(line: str) -> str:
|
|
32
|
+
return parse_pattern(r"^.*Specimen:(.*?)(Age|Birthdate|<).*$", line, "body site")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def parse_report_id(line: str) -> str:
|
|
36
|
+
return parse_pattern(r"^.*Accession #: (.*?) .*$", line, "report ID")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def parse_report_date_single_line(line: str) -> str:
|
|
40
|
+
return parse_pattern(
|
|
41
|
+
r"^.*Diagnostic Genomics Laboratory.*(\d{2}\/\d{2}\/\d{4}).*$", line, "report date"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def parse_report_date_multiline(patient_info_lines: list[str]) -> str:
|
|
46
|
+
in_range_trigger = False
|
|
47
|
+
|
|
48
|
+
for line in patient_info_lines:
|
|
49
|
+
if "Laboratory" in line:
|
|
50
|
+
in_range_trigger = True
|
|
51
|
+
continue
|
|
52
|
+
if in_range_trigger:
|
|
53
|
+
formatted_line = re.sub(r"<\/?T.\/?>", "", line).strip()
|
|
54
|
+
if not formatted_line:
|
|
55
|
+
continue
|
|
56
|
+
return parse_pattern(
|
|
57
|
+
r"^.*(\d{2}\/\d{2}\/\d{4}).*$", formatted_line, "report date from multiline"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
raise ValueError("Could not parse report date from lines")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def parse_report_date(patient_info_lines: list[str], log: Logger) -> str:
|
|
64
|
+
"""
|
|
65
|
+
Typically, the report date is in a form like:
|
|
66
|
+
```
|
|
67
|
+
Diagnostic Genomics Laboratory 01/01/2021
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
However, sometimes the date is split across multiple lines, like:
|
|
71
|
+
```
|
|
72
|
+
Diagnostic Genomics Laboratory
|
|
73
|
+
...random empty lines or lines with only tags...
|
|
74
|
+
01/01/2021
|
|
75
|
+
```
|
|
76
|
+
This function attempts to first parse the date from a single line, and if that fails,
|
|
77
|
+
it will attempt to parse it from multiple lines.
|
|
78
|
+
"""
|
|
79
|
+
for line in patient_info_lines:
|
|
80
|
+
if "Laboratory" in line:
|
|
81
|
+
try:
|
|
82
|
+
report_date = parse_report_date_single_line(line)
|
|
83
|
+
return report_date
|
|
84
|
+
except ValueError:
|
|
85
|
+
log.warning("Could not parse report date from single line")
|
|
86
|
+
break
|
|
87
|
+
|
|
88
|
+
return parse_report_date_multiline(patient_info_lines)
|
|
@@ -17,7 +17,7 @@ def search_and_grab(array: list, search_item: str, grab_index: int):
|
|
|
17
17
|
return array[array.index([i for i in array if re.search(search_item, i)][0]) + grab_index]
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
def extract_xml_text(xml_in_file: str):
|
|
20
|
+
def extract_xml_text(xml_in_file: str) -> list[str]:
|
|
21
21
|
with open(xml_in_file, "r") as f:
|
|
22
22
|
xml_lines = f.readlines()
|
|
23
23
|
|
|
@@ -35,7 +35,7 @@ def extract_xml_text(xml_in_file: str):
|
|
|
35
35
|
return patient_info_lines
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
def extract_interpretation_text(xml_in_file: str):
|
|
38
|
+
def extract_interpretation_text(xml_in_file: str) -> list[str]:
|
|
39
39
|
with open(xml_in_file, "r") as f:
|
|
40
40
|
xml_lines = f.readlines()
|
|
41
41
|
|
|
@@ -111,7 +111,7 @@ def extract_patient_data(patient_info_lines: list[str]):
|
|
|
111
111
|
return patient_data
|
|
112
112
|
|
|
113
113
|
|
|
114
|
-
def extract_test_data(patient_info_lines: list, interpretation_lines: list):
|
|
114
|
+
def extract_test_data(patient_info_lines: list[str], interpretation_lines: list[str], log: Logger):
|
|
115
115
|
# Initialize manifest and hard-code some values
|
|
116
116
|
manifest: dict[str, Any] = {}
|
|
117
117
|
manifest["testType"] = "Plasma Cell Myeloma Panel"
|
|
@@ -128,12 +128,11 @@ def extract_test_data(patient_info_lines: list, interpretation_lines: list):
|
|
|
128
128
|
manifest["medFacilID"] = ""
|
|
129
129
|
manifest["medFacilName"] = "IU Health"
|
|
130
130
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
manifest["reportDate"] = transform_date(report_date)
|
|
135
|
-
manifest["indexedDate"] = manifest["reportDate"]
|
|
131
|
+
report_date = manifest_helpers.parse_report_date(patient_info_lines, log)
|
|
132
|
+
manifest["reportDate"] = transform_date(report_date)
|
|
133
|
+
manifest["indexedDate"] = manifest["reportDate"]
|
|
136
134
|
|
|
135
|
+
for line in patient_info_lines:
|
|
137
136
|
if "collDate" not in manifest and "Collected" in line:
|
|
138
137
|
collArray = line.split(" ")
|
|
139
138
|
coll_date = search_and_grab(collArray, "Collected", 1)
|
|
@@ -178,10 +177,11 @@ def process_manifest(
|
|
|
178
177
|
include_structural: bool,
|
|
179
178
|
somatic_translocations: list[str],
|
|
180
179
|
hyperdiploidy_chromosomes: list[str] | None,
|
|
180
|
+
log: Logger,
|
|
181
181
|
):
|
|
182
182
|
test_text = extract_xml_text(xml_in_file)
|
|
183
183
|
interpretation_text = extract_interpretation_text(xml_in_file)
|
|
184
|
-
manifest = extract_test_data(test_text, interpretation_text)
|
|
184
|
+
manifest = extract_test_data(test_text, interpretation_text, log)
|
|
185
185
|
manifest.update(extract_patient_data(test_text))
|
|
186
186
|
|
|
187
187
|
file_prefix = f".lifeomic/nextgen/{case_id}/{case_id}"
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
from logging import Logger
|
|
2
|
-
import re
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def parse_pattern(pattern: str, line: str, name: str) -> str:
|
|
6
|
-
regex = re.compile(pattern)
|
|
7
|
-
match = regex.match(line)
|
|
8
|
-
|
|
9
|
-
if not match:
|
|
10
|
-
raise ValueError(f"Could not parse {name} from line")
|
|
11
|
-
|
|
12
|
-
return match.group(1).strip()
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def parse_indication(line: str) -> str:
|
|
16
|
-
return parse_pattern(r"^.*Reason for Referral:(.*?)(Patient|<).*$", line, "indication")
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def parse_ordering_md(line: str) -> str:
|
|
20
|
-
return parse_pattern(r"^.*Physician Name:(.*?)(Reason|<).*$", line, "ordering MD")
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def parse_patient_name(line: str) -> str:
|
|
24
|
-
return parse_pattern(r"^.*Patient Name: (.*?)(Accession|<).*$", line, "patient name")
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def parse_sample_number(line: str) -> str:
|
|
28
|
-
return parse_pattern(r"^.*Specimen #: (\d*) .*$", line, "sample number")
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def parse_body_site(line: str) -> str:
|
|
32
|
-
return parse_pattern(r"^.*Specimen:(.*?)(Age|Birthdate|<).*$", line, "body site")
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def parse_report_id(line: str) -> str:
|
|
36
|
-
return parse_pattern(r"^.*Accession #: (.*?) .*$", line, "report ID")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def parse_report_date(line: str) -> str:
|
|
40
|
-
return parse_pattern(
|
|
41
|
-
r"^.*Diagnostic Genomics Laboratory.*(\d{2}\/\d{2}\/\d{4}).*$", line, "report date"
|
|
42
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/util/nextgen_specific_genes.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/nextgen/util/pre_filter_somatic_vcf.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/vcf_standardization/util/__init__.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/vcf_standardization/util/af_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/vcf_standardization/util/dp_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.35 → phc-ingestion-0.8.37}/ingestion/vcf_standardization/util/read_write.py
RENAMED
|
File without changes
|