phc-ingestion 0.10.11__py3-none-any.whl → 0.10.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,10 @@
1
- from typing import Optional, TypedDict, Any
2
- from datetime import datetime
1
+ import re
2
+ import gzip
3
+ from typing import Optional, TypedDict, Any, Callable
4
+ from datetime import datetime, timezone
5
+ from dateutil import parser
3
6
  from ingestion.shared_util.lambda_client import LambdaClient
7
+ from ingestion.vcf_standardization.util.read_write import read_headers
4
8
  from ingestion.nebula.constants import (
5
9
  DATASET_SYSTEM,
6
10
  NEBULA_KIT_ID_SYSTEM,
@@ -9,7 +13,6 @@ from ingestion.nebula.constants import (
9
13
  NEBULA_BODY_SITE,
10
14
  NEBULA_INDICATION,
11
15
  )
12
- import re
13
16
 
14
17
 
15
18
  class HumanName(TypedDict):
@@ -49,11 +52,14 @@ class Practitioner(Resource):
49
52
 
50
53
 
51
54
  class ManifestAssembler:
52
- def __init__(self, ingestion_id: str, account_id: str, project_id: str, kit_id: str):
55
+ def __init__(
56
+ self, ingestion_id: str, account_id: str, project_id: str, kit_id: str, vcf_file_path: str
57
+ ):
53
58
  self.ingestion_id = ingestion_id
54
59
  self.account_id = account_id
55
60
  self.project_id = project_id
56
61
  self.kit_id = kit_id
62
+ self.vcf_file_path = vcf_file_path
57
63
  self.client = LambdaClient(
58
64
  "patient-service",
59
65
  {
@@ -142,6 +148,37 @@ class ManifestAssembler:
142
148
  "fullName": f"{first_name} {last_name}",
143
149
  }
144
150
 
151
+ def _safe(self, lambda_func: Callable[[], Any]) -> Any:
152
+ try:
153
+ return lambda_func()
154
+ except Exception as e:
155
+ return None
156
+
157
+ def __extract_report_date(self) -> dict[str, str]:
158
+ in_file = self.vcf_file_path
159
+ with gzip.open(in_file, "rt") if in_file.endswith(".gz") else open(in_file, "r") as f:
160
+ headers = read_headers(f)
161
+ for header in headers:
162
+ epoch_parts = re.search(r"Epoch=(\d+)", header)
163
+ epoch_time_raw = epoch_parts.group(1) if epoch_parts else None
164
+ epoch_time = self._safe(
165
+ lambda: datetime.fromtimestamp(int(epoch_time_raw) / 1000.0, tz=timezone.utc)
166
+ )
167
+ date_parts = re.search(r"Date=\"(.*)\"", header)
168
+ date_string_raw = date_parts.group(1) if date_parts else None
169
+ date_string = self._safe(
170
+ lambda: parser.parse(date_string_raw, tzinfos={"GST": "UTC+4"})
171
+ )
172
+
173
+ report_date = epoch_time or date_string
174
+ if report_date:
175
+ return {
176
+ "reportDate": report_date.astimezone(tz=timezone.utc)
177
+ .isoformat(timespec="milliseconds")
178
+ .replace("+00:00", "Z")
179
+ }
180
+ return {}
181
+
145
182
  def create_manifest(self) -> dict[str, Any]:
146
183
  patient = self.__fetch_patient_by_kit_id()
147
184
 
@@ -167,6 +204,7 @@ class ManifestAssembler:
167
204
  return {
168
205
  "name": "Nebula",
169
206
  "indexedDate": datetime.now().strftime("%Y-%m-%d"),
207
+ **self.__extract_report_date(),
170
208
  "reference": "GRCh38",
171
209
  "patientId": patient.get("id"),
172
210
  "mrn": self.__extract_elation_mrn(patient),
@@ -15,8 +15,9 @@ def process(vcf_file, out_path, file_name, source_file_id, ingestion_id, account
15
15
  )
16
16
 
17
17
  case_id = file_name
18
- manifest_assembler = ManifestAssembler(ingestion_id, account_id, project_id, case_id)
19
- manifest = manifest_assembler.create_manifest()
18
+ manifest = ManifestAssembler(
19
+ ingestion_id, account_id, project_id, case_id, vcf_file
20
+ ).create_manifest()
20
21
  base_vcf_file = os.path.basename(vcf_file)
21
22
  vcf_out = base_vcf_file.replace(".vcf", ".modified.vcf")
22
23
  vcf_final = base_vcf_file.replace(".vcf", ".modified.nrm.filtered.vcf")
@@ -35,10 +36,10 @@ def process(vcf_file, out_path, file_name, source_file_id, ingestion_id, account
35
36
  manifest["testType"] = NEBULA_TEST_TYPE
36
37
  manifest["reportID"] = case_id
37
38
  manifest["sourceFileId"] = source_file_id
38
- manifest["resources"] = [{"fileName": f".lifeomic/vcf-ingest/{case_id}/{base_vcf_file}"}]
39
+ manifest["resources"] = [{"fileName": f".lifeomic/nebula/{case_id}/{base_vcf_file}"}]
39
40
  manifest["files"] = [
40
41
  {
41
- "fileName": f".lifeomic/vcf-ingest/{case_id}/{vcf_final}",
42
+ "fileName": f".lifeomic/nebula/{case_id}/{vcf_final}",
42
43
  "sequenceType": "germline",
43
44
  "type": "shortVariant",
44
45
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.10.11
3
+ Version: 0.10.13
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -28,8 +28,8 @@ ingestion/generic/process.py,sha256=ZaVnZ_gx9faDUsuresI1A0oCegTa-dPQT7DBFMeZGyY,
28
28
  ingestion/generic/utils.py,sha256=1MEIru7uq38IjUdL8lcHqDH0oTki9uWrz1f2e-pmRoU,2814
29
29
  ingestion/nebula/__init__.py,sha256=VauK-rup_N8ZXVohx3HYqHX_PE_WoPyMUhdv2R7al4o,45
30
30
  ingestion/nebula/constants.py,sha256=thKqSwemdaAwAmKvF4FEVI9l1Ph5ergsnMlx6nWte7E,357
31
- ingestion/nebula/manifest_assembler.py,sha256=Og1g_HD0cR9osH5eWdttgFEEayimaZRO7QckC9U9YKM,6383
32
- ingestion/nebula/process.py,sha256=vQZIyQH3iL2PG1xyyKLv5PsnnVsQ_fBBGzrP8oRJ5FA,2347
31
+ ingestion/nebula/manifest_assembler.py,sha256=4lVZGiUc8DBkxb2W6zWOoX3azJf0VUMWYC20-T7wDNA,7989
32
+ ingestion/nebula/process.py,sha256=N9OuipynGV_XgEL3nO5I_-di1tk9szOy8LqsyNTw0E0,2323
33
33
  ingestion/nextgen/__init__.py,sha256=7LQ-h_Bvc5P1QcHMdzsqi1Qm4fTJn04-ozar2ty9wSc,59
34
34
  ingestion/nextgen/process.py,sha256=5Z0RfclwTAYZruGDiLPutjPCYFh1DJpoWY9dnttghT4,3993
35
35
  ingestion/nextgen/util/alteration_table.py,sha256=JTWBL1Fqj_pGsH5vwuVEnCUJle2wOBk6VYImHYCF9vg,6129
@@ -59,6 +59,6 @@ ingestion/vcf_standardization/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
59
59
  ingestion/vcf_standardization/util/af_helpers.py,sha256=dpTzoeIQVeBRt0ETF3a9rp5ojZqznHg4x_hCZ8OPcOg,1061
60
60
  ingestion/vcf_standardization/util/dp_helpers.py,sha256=Nq8oLOLObu4_pv16qwwgpALRlUoJVCULrd9cFOD-eoI,823
61
61
  ingestion/vcf_standardization/util/read_write.py,sha256=x3Pf6Dq8tmolblbCS5CrNmrcHS3FGfqBSFpFgvFGC4g,2526
62
- phc_ingestion-0.10.11.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
63
- phc_ingestion-0.10.11.dist-info/METADATA,sha256=tRkIKGt0Yqk_NmBaE0qtD-tko63m3LozUr2V7ycoYzs,678
64
- phc_ingestion-0.10.11.dist-info/RECORD,,
62
+ phc_ingestion-0.10.13.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
63
+ phc_ingestion-0.10.13.dist-info/METADATA,sha256=7CQQcYy4OIzsmQ-0mv5ZiO1KV9npZF62M61s2633IS0,678
64
+ phc_ingestion-0.10.13.dist-info/RECORD,,