phc-ingestion 0.8.35__py3-none-any.whl → 0.8.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -68,6 +68,7 @@ def process(
68
68
  bool(structural_path_name),
69
69
  translocations,
70
70
  hyperdiploidy_chromosomes,
71
+ log,
71
72
  )
72
73
  pre_filtered_somatic_vcf_path = pre_filter_somatic_vcf(
73
74
  vendor_files["somaticVcfFile"],
@@ -18,11 +18,12 @@ def map_vendsig(vendsig: str) -> str:
18
18
  """
19
19
  Map vendor significance for short variants
20
20
  """
21
- if vendsig in ["Pathogenic"]:
21
+ vendsig_lower = vendsig.lower()
22
+ if vendsig_lower in ["pathogenic"]:
22
23
  return "VENDSIG=Pathogenic"
23
- elif vendsig in ["Likely Pathogenic", "LikelyPathogenic"]:
24
+ elif vendsig_lower in ["likely pathogenic", "likelypathogenic"]:
24
25
  return "VENDSIG=Likely pathogenic"
25
- elif vendsig in ["VUS"]:
26
+ elif vendsig_lower in ["vus"]:
26
27
  return "VENDSIG=Uncertain significance"
27
28
  else:
28
29
  raise RuntimeError(f"Unable to map vendor significance: {vendsig}")
@@ -36,7 +36,53 @@ def parse_report_id(line: str) -> str:
36
36
  return parse_pattern(r"^.*Accession #: (.*?) .*$", line, "report ID")
37
37
 
38
38
 
39
- def parse_report_date(line: str) -> str:
39
+ def parse_report_date_single_line(line: str) -> str:
40
40
  return parse_pattern(
41
41
  r"^.*Diagnostic Genomics Laboratory.*(\d{2}\/\d{2}\/\d{4}).*$", line, "report date"
42
42
  )
43
+
44
+
45
+ def parse_report_date_multiline(patient_info_lines: list[str]) -> str:
46
+ in_range_trigger = False
47
+
48
+ for line in patient_info_lines:
49
+ if "Laboratory" in line:
50
+ in_range_trigger = True
51
+ continue
52
+ if in_range_trigger:
53
+ formatted_line = re.sub(r"<\/?T.\/?>", "", line).strip()
54
+ if not formatted_line:
55
+ continue
56
+ return parse_pattern(
57
+ r"^.*(\d{2}\/\d{2}\/\d{4}).*$", formatted_line, "report date from multiline"
58
+ )
59
+
60
+ raise ValueError("Could not parse report date from lines")
61
+
62
+
63
+ def parse_report_date(patient_info_lines: list[str], log: Logger) -> str:
64
+ """
65
+ Typically, the report date is in a form like:
66
+ ```
67
+ Diagnostic Genomics Laboratory 01/01/2021
68
+ ```
69
+
70
+ However, sometimes the date is split across multiple lines, like:
71
+ ```
72
+ Diagnostic Genomics Laboratory
73
+ ...random empty lines or lines with only tags...
74
+ 01/01/2021
75
+ ```
76
+ This function attempts to first parse the date from a single line, and if that fails,
77
+ it will attempt to parse it from multiple lines.
78
+ """
79
+ for line in patient_info_lines:
80
+ if "Laboratory" in line:
81
+ try:
82
+ report_date = parse_report_date_single_line(line)
83
+ return report_date
84
+ except ValueError:
85
+ log.warning("Could not parse report date from single line")
86
+ break
87
+
88
+ return parse_report_date_multiline(patient_info_lines)
@@ -17,7 +17,7 @@ def search_and_grab(array: list, search_item: str, grab_index: int):
17
17
  return array[array.index([i for i in array if re.search(search_item, i)][0]) + grab_index]
18
18
 
19
19
 
20
- def extract_xml_text(xml_in_file: str):
20
+ def extract_xml_text(xml_in_file: str) -> list[str]:
21
21
  with open(xml_in_file, "r") as f:
22
22
  xml_lines = f.readlines()
23
23
 
@@ -35,7 +35,7 @@ def extract_xml_text(xml_in_file: str):
35
35
  return patient_info_lines
36
36
 
37
37
 
38
- def extract_interpretation_text(xml_in_file: str):
38
+ def extract_interpretation_text(xml_in_file: str) -> list[str]:
39
39
  with open(xml_in_file, "r") as f:
40
40
  xml_lines = f.readlines()
41
41
 
@@ -111,7 +111,7 @@ def extract_patient_data(patient_info_lines: list[str]):
111
111
  return patient_data
112
112
 
113
113
 
114
- def extract_test_data(patient_info_lines: list, interpretation_lines: list):
114
+ def extract_test_data(patient_info_lines: list[str], interpretation_lines: list[str], log: Logger):
115
115
  # Initialize manifest and hard-code some values
116
116
  manifest: dict[str, Any] = {}
117
117
  manifest["testType"] = "Plasma Cell Myeloma Panel"
@@ -128,12 +128,11 @@ def extract_test_data(patient_info_lines: list, interpretation_lines: list):
128
128
  manifest["medFacilID"] = ""
129
129
  manifest["medFacilName"] = "IU Health"
130
130
 
131
- for line in patient_info_lines:
132
- if "reportDate" not in manifest and "Laboratory" in line:
133
- report_date = manifest_helpers.parse_report_date(line)
134
- manifest["reportDate"] = transform_date(report_date)
135
- manifest["indexedDate"] = manifest["reportDate"]
131
+ report_date = manifest_helpers.parse_report_date(patient_info_lines, log)
132
+ manifest["reportDate"] = transform_date(report_date)
133
+ manifest["indexedDate"] = manifest["reportDate"]
136
134
 
135
+ for line in patient_info_lines:
137
136
  if "collDate" not in manifest and "Collected" in line:
138
137
  collArray = line.split(" ")
139
138
  coll_date = search_and_grab(collArray, "Collected", 1)
@@ -178,10 +177,11 @@ def process_manifest(
178
177
  include_structural: bool,
179
178
  somatic_translocations: list[str],
180
179
  hyperdiploidy_chromosomes: list[str] | None,
180
+ log: Logger,
181
181
  ):
182
182
  test_text = extract_xml_text(xml_in_file)
183
183
  interpretation_text = extract_interpretation_text(xml_in_file)
184
- manifest = extract_test_data(test_text, interpretation_text)
184
+ manifest = extract_test_data(test_text, interpretation_text, log)
185
185
  manifest.update(extract_patient_data(test_text))
186
186
 
187
187
  file_prefix = f".lifeomic/nextgen/{case_id}/{case_id}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.8.35
3
+ Version: 0.8.37
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -29,14 +29,14 @@ ingestion/generic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
29
29
  ingestion/generic/process.py,sha256=WJHV_-SKhrDZ3JS3fm9DVMoW3Zs2t50GiraSV3vlLHE,1548
30
30
  ingestion/generic/utils.py,sha256=1MEIru7uq38IjUdL8lcHqDH0oTki9uWrz1f2e-pmRoU,2814
31
31
  ingestion/nextgen/__init__.py,sha256=7LQ-h_Bvc5P1QcHMdzsqi1Qm4fTJn04-ozar2ty9wSc,59
32
- ingestion/nextgen/process.py,sha256=kDCnU685v7aqJ3i4HpFdb7HqgHRSBKqtYPpuyN7qWmM,3976
32
+ ingestion/nextgen/process.py,sha256=5Z0RfclwTAYZruGDiLPutjPCYFh1DJpoWY9dnttghT4,3993
33
33
  ingestion/nextgen/util/alteration_table.py,sha256=bvTrXEhIRye5BzSXjZEBy1AvXLZgG0rNkNt3e3rcvv0,6127
34
- ingestion/nextgen/util/interpretation.py,sha256=tQ3qzAUwBzwK1tQzH9ujZk_VCQ2wP8HzSZY0fImJ5Wo,818
35
- ingestion/nextgen/util/manifest_helpers.py,sha256=2xrpEtHbCb1Kea1wJeObkDfTiBklmffQt_o2hMgOSOE,1208
34
+ ingestion/nextgen/util/interpretation.py,sha256=56wVk9j-w59gM-11iODXbKuUtZcEwe8zJQSXpjyCguw,872
35
+ ingestion/nextgen/util/manifest_helpers.py,sha256=LH5em0xsu9Hrs175vfx6SX8W1Ww2FFRp2wIBSfIEMUM,2725
36
36
  ingestion/nextgen/util/nextgen_specific_genes.py,sha256=1jFcqvtYAlJ7eBwOBm1UC2TzAbjHjdlvPBUzxr1G8dY,1206
37
37
  ingestion/nextgen/util/pre_filter_somatic_vcf.py,sha256=mIaUihmGLbS38D4Gy_Qtf1lFAfW0A-LgAgQmsrEiI-M,3529
38
38
  ingestion/nextgen/util/process_cnv.py,sha256=MIirc8e0k6lsaTZkRM3U3L3IvbrcHmKQ4xlIu585514,2430
39
- ingestion/nextgen/util/process_manifest.py,sha256=EnV9I90vnanDvuoErbMfz6yAfjzM5LdhhUF4q5DJd8w,8428
39
+ ingestion/nextgen/util/process_manifest.py,sha256=RsHzDGL1OBea2raoEHACo8owRodIFpX3xVE-aFOoyrg,8428
40
40
  ingestion/nextgen/util/process_structural.py,sha256=FKjkK7BkIlocnLs8rFCjrMC39FCQnD0nQCeWvi7cRoA,7539
41
41
  ingestion/nextgen/util/process_vcf.py,sha256=ZZURSMnZhHDpFahzijZ4MvCfSWTPdIktzmnCKVVUbGs,7768
42
42
  ingestion/nextgen/util/types.py,sha256=SSzt5gv-kss1PR45eQUelypWrGI-dAfQMO3GSD-T-Wg,22
@@ -54,6 +54,6 @@ ingestion/vcf_standardization/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
54
54
  ingestion/vcf_standardization/util/af_helpers.py,sha256=dpTzoeIQVeBRt0ETF3a9rp5ojZqznHg4x_hCZ8OPcOg,1061
55
55
  ingestion/vcf_standardization/util/dp_helpers.py,sha256=Nq8oLOLObu4_pv16qwwgpALRlUoJVCULrd9cFOD-eoI,823
56
56
  ingestion/vcf_standardization/util/read_write.py,sha256=x3Pf6Dq8tmolblbCS5CrNmrcHS3FGfqBSFpFgvFGC4g,2526
57
- phc_ingestion-0.8.35.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
58
- phc_ingestion-0.8.35.dist-info/METADATA,sha256=PctzE287ytgM7HHKcZO5VnMScgiX3bgpmgPNpF-ntWI,552
59
- phc_ingestion-0.8.35.dist-info/RECORD,,
57
+ phc_ingestion-0.8.37.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
58
+ phc_ingestion-0.8.37.dist-info/METADATA,sha256=06qKld0MIJchM-NfUQKojMeKDo1nnYHXPDpwLrbcJLk,552
59
+ phc_ingestion-0.8.37.dist-info/RECORD,,