phc-ingestion 0.10.3__py3-none-any.whl → 0.10.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ NEBULA_TEST_TYPE = "WGS-30x"
2
+ DATASET_SYSTEM = "http://lifeomic.com/fhir/dataset"
3
+ NEBULA_KIT_ID_SYSTEM = "http://lifeomic.com/fhir/nebula-kit-id"
4
+ BODY_SITE_SYSTEM = "http://lifeomic.com/fhir/sequence-body-site"
5
+ NEBULA_BODY_SITE = "Buccal Swab"
6
+ INDICATION_SYSTEM = "http://lifeomic.com/fhir/sequence-indication"
7
+ NEBULA_INDICATION = "Genetic Health Screening"
@@ -0,0 +1,181 @@
1
+ from typing import Optional, TypedDict, Any
2
+ from datetime import datetime
3
+ from ingestion.shared_util.lambda_client import LambdaClient
4
+ from ingestion.nebula.constants import (
5
+ DATASET_SYSTEM,
6
+ NEBULA_KIT_ID_SYSTEM,
7
+ BODY_SITE_SYSTEM,
8
+ INDICATION_SYSTEM,
9
+ NEBULA_BODY_SITE,
10
+ NEBULA_INDICATION,
11
+ )
12
+
13
+
14
+ class HumanName(TypedDict):
15
+ use: str
16
+ given: list[str]
17
+ family: str
18
+
19
+
20
+ class Identifier(TypedDict):
21
+ system: str
22
+ value: str
23
+
24
+
25
+ class Reference(TypedDict):
26
+ reference: str | None
27
+
28
+
29
+ class Resource(TypedDict):
30
+ id: str | None
31
+ identifier: list[Identifier] | None
32
+
33
+
34
+ class Patient(Resource):
35
+ name: list[HumanName] | None
36
+ gender: str | None
37
+ birthDate: str | None
38
+ managingOrganization: Reference | None
39
+ generalPractitioner: Reference | None
40
+
41
+
42
+ class Organization(Resource):
43
+ name: str | None
44
+
45
+
46
+ class Practitioner(Resource):
47
+ name: list[HumanName] | None
48
+
49
+
50
+ class ManifestAssembler:
51
+ def __init__(self, ingestion_id: str, account_id: str, project_id: str, kit_id: str):
52
+ self.ingestion_id = ingestion_id
53
+ self.account_id = account_id
54
+ self.project_id = project_id
55
+ self.kit_id = kit_id
56
+ self.client = LambdaClient(
57
+ "patient-service",
58
+ {
59
+ "Content-Type": "application/json",
60
+ "LifeOmic-Account": self.account_id,
61
+ "LifeOmic-Correlation-Id": self.ingestion_id,
62
+ },
63
+ )
64
+
65
+ def __fetch_patient_by_kit_id(self) -> Optional[Patient]:
66
+ path = f"/{self.account_id}/dstu3/Patient"
67
+ params = {
68
+ "_tag": f"{DATASET_SYSTEM}|{self.project_id}",
69
+ "identifier": f"{NEBULA_KIT_ID_SYSTEM}|{self.kit_id}",
70
+ }
71
+
72
+ response = self.client.invoke(path, "get", None, params)
73
+ entries = response.get("entry", [])
74
+ patient = entries[0]["resource"] if len(entries) > 0 else None
75
+
76
+ return patient
77
+
78
+ def __fetch_resource_by_type_and_reference(
79
+ self,
80
+ resource_type: str,
81
+ reference: Reference | None,
82
+ ) -> Any:
83
+ if not reference:
84
+ return None
85
+
86
+ resource_id = self.__extract_id_from_reference(reference)
87
+ path = f"/{self.account_id}/dstu3/{resource_type}/{resource_id}"
88
+
89
+ try:
90
+ return self.client.invoke(path, "get")
91
+ except RuntimeError:
92
+ return None
93
+
94
+ def __extract_identifier_from_resource(self, resource: Resource | None) -> str:
95
+ if not resource:
96
+ return ""
97
+
98
+ identifiers = resource.get("identifier", [])
99
+ return identifiers[0].get("value", "") if identifiers else ""
100
+
101
+ def __extract_id_from_reference(self, reference: Reference) -> str:
102
+ if not reference or not reference.get("reference"):
103
+ return ""
104
+
105
+ ref_string = reference.get("reference")
106
+ if not ref_string:
107
+ return ""
108
+
109
+ parts = ref_string.split("/")
110
+ return parts[1] if len(parts) > 1 else parts[0]
111
+
112
+ def __parse_human_name(self, human_name: list[HumanName] | None):
113
+ if not human_name or len(human_name) == 0:
114
+ return None
115
+
116
+ last_name = human_name[0].get("family", "")
117
+ first_name = human_name[0].get("given", [])[0]
118
+
119
+ return {
120
+ "lastName": last_name,
121
+ "firstName": first_name,
122
+ "fullName": f"{first_name} {last_name}",
123
+ }
124
+
125
+ def create_manifest(self) -> dict[str, Any]:
126
+ patient = self.__fetch_patient_by_kit_id()
127
+
128
+ if not patient:
129
+ raise RuntimeError(f"Patient with kit id {self.kit_id} not found")
130
+ patient_birth_date = patient.get("birthDate")
131
+ if not patient_birth_date:
132
+ raise RuntimeError("Patient birth date is a required to create a manifest")
133
+
134
+ organization: Organization | None = self.__fetch_resource_by_type_and_reference(
135
+ "Organization", patient.get("managingOrganization")
136
+ )
137
+
138
+ general_practitioner: Practitioner | None = self.__fetch_resource_by_type_and_reference(
139
+ "Practitioner", patient.get("generalPractitioner")
140
+ )
141
+
142
+ patient_info = self.__parse_human_name(patient["name"])
143
+ practitioner_info = self.__parse_human_name(
144
+ general_practitioner.get("name") if general_practitioner else None
145
+ )
146
+
147
+ return {
148
+ "name": "Nebula",
149
+ "indexedDate": datetime.now().strftime("%Y-%m-%d"),
150
+ "reference": "GRCh38",
151
+ "patientId": patient.get("id"),
152
+ "mrn": patient.get("id"),
153
+ "bodySite": NEBULA_BODY_SITE,
154
+ "bodySiteDisplay": NEBULA_BODY_SITE,
155
+ "bodySiteSystem": BODY_SITE_SYSTEM,
156
+ "indicationSystem": INDICATION_SYSTEM,
157
+ "indication": NEBULA_INDICATION,
158
+ "indicationDisplay": NEBULA_INDICATION,
159
+ "patientInfo": {
160
+ "lastName": patient_info.get("lastName"),
161
+ "dob": datetime.fromisoformat(patient_birth_date).strftime("%Y-%m-%d"),
162
+ "firstName": patient_info.get("firstName"),
163
+ "gender": patient["gender"],
164
+ },
165
+ **(
166
+ {
167
+ "medFacilName": organization.get("name"),
168
+ "medFacilID": self.__extract_identifier_from_resource(organization),
169
+ }
170
+ if organization
171
+ else {}
172
+ ),
173
+ **(
174
+ {
175
+ "orderingMDName": practitioner_info.get("fullName"),
176
+ "orderingMDNPI": self.__extract_identifier_from_resource(general_practitioner),
177
+ }
178
+ if general_practitioner
179
+ else {}
180
+ ),
181
+ }
@@ -1,18 +1,21 @@
1
1
  import os
2
2
 
3
+ from ingestion.nebula.constants import NEBULA_TEST_TYPE
3
4
  from ingestion.vcf_standardization.standardize import standardize_vcf
4
5
  from lifeomic_logging import scoped_logger
6
+ from ingestion.nebula.manifest_assembler import ManifestAssembler
5
7
 
6
8
 
7
- def process(vcf_file, out_path, file_name, source_file_id):
9
+ def process(vcf_file, out_path, file_name, source_file_id, ingestion_id, account_id, project_id):
8
10
 
9
11
  with scoped_logger(__name__) as log:
10
12
  log.info(
11
13
  f"Beginning Nebula ingestion for vcf_file: {vcf_file}, file_name: {file_name}, out_path: {out_path}, source_file_id: {source_file_id}"
12
14
  )
13
15
 
14
- manifest = {}
15
16
  case_id = file_name
17
+ manifest_assembler = ManifestAssembler(ingestion_id, account_id, project_id, case_id)
18
+ manifest = manifest_assembler.create_manifest()
16
19
  base_vcf_file = os.path.basename(vcf_file)
17
20
  vcf_out = base_vcf_file.replace(".vcf", ".modified.vcf")
18
21
  vcf_final = base_vcf_file.replace(".vcf", ".modified.nrm.filtered.vcf")
@@ -28,7 +31,7 @@ def process(vcf_file, out_path, file_name, source_file_id):
28
31
  )
29
32
 
30
33
  # Add to manifest
31
- manifest["testType"] = "WGS-30x"
34
+ manifest["testType"] = NEBULA_TEST_TYPE
32
35
  manifest["reportID"] = case_id
33
36
  manifest["sourceFileId"] = source_file_id
34
37
  manifest["resources"] = [{"fileName": f".lifeomic/vcf-ingest/{case_id}/{base_vcf_file}"}]
@@ -0,0 +1,81 @@
1
+ import boto3
2
+ from mypy_boto3_lambda.type_defs import InvocationResponseTypeDef
3
+ import json
4
+ from typing import Optional, Any, cast
5
+ from urllib3.util.url import parse_url
6
+
7
+
8
+ class LambdaHandler:
9
+ ACCEPTABLE_STATUS_CODES = [200, 201, 204]
10
+
11
+ def __init__(self) -> None:
12
+ return
13
+
14
+ def invoke(
15
+ self,
16
+ url: str,
17
+ header: dict,
18
+ method: str,
19
+ body: Optional[dict] = None,
20
+ query_params: Optional[dict] = None,
21
+ ) -> Any:
22
+ parsed_url = parse_url(url)
23
+ payload = {
24
+ "headers": header,
25
+ "path": parsed_url.path,
26
+ "httpMethod": method.upper(),
27
+ "body": body,
28
+ "queryStringParameters": query_params,
29
+ }
30
+
31
+ client = boto3.client("lambda")
32
+ try:
33
+ raw_response = client.invoke(
34
+ FunctionName=cast(str, parsed_url.netloc),
35
+ InvocationType="RequestResponse",
36
+ Payload=json.dumps(payload),
37
+ )
38
+ return self.parse_response(raw_response)
39
+
40
+ except RuntimeError as e:
41
+ raise RuntimeError(f"Error invoking lambda for payload {str(payload)}") from e
42
+
43
+ def parse_response(self, response: InvocationResponseTypeDef) -> Any:
44
+ raw_payload = response["Payload"].read() if "Payload" in response else None
45
+ invocation_status_code = response.get("StatusCode", None)
46
+ if invocation_status_code not in self.ACCEPTABLE_STATUS_CODES:
47
+ raise RuntimeError(
48
+ f"Error invoking lambda. Invocation status code: {invocation_status_code}. Response: {str(raw_payload)}"
49
+ )
50
+
51
+ payload = json.loads(raw_payload)
52
+ lambda_status_code = payload.get("statusCode", None)
53
+ if lambda_status_code not in self.ACCEPTABLE_STATUS_CODES:
54
+ raise RuntimeError(
55
+ f"Error invoking lambda. Lambda status code: {lambda_status_code}. Response: {str(payload)}"
56
+ )
57
+
58
+ return json.loads(payload.get("body", "{}"))
59
+
60
+
61
+ class LambdaClient:
62
+ def __init__(self, host: str, default_header: dict):
63
+ self.host = host
64
+ self.default_header = default_header
65
+ self.handler = LambdaHandler()
66
+
67
+ def invoke(
68
+ self,
69
+ path: str,
70
+ method: str,
71
+ body: Optional[dict] = None,
72
+ query_params: Optional[dict] = None,
73
+ ):
74
+ if path.startswith("/"):
75
+ path = path[1:]
76
+ endpoint = f"{self.host}/{path}"
77
+ try:
78
+ return self.handler.invoke(endpoint, self.default_header, method, body, query_params)
79
+
80
+ except RuntimeError as e:
81
+ raise RuntimeError(f"Error invoking API. Request error: {str(e)}") from e
@@ -1,18 +1,21 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.10.3
3
+ Version: 0.10.5
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
7
7
  Requires-Python: >=3.11
8
+ Requires-Dist: boto3>=1.28.34
8
9
  Requires-Dist: jsonschema<5.0.0,>=4.16.0
9
10
  Requires-Dist: lifeomic-logging<0.4.0,>=0.3.2
11
+ Requires-Dist: mypy-boto3-lambda>=1.28.19
10
12
  Requires-Dist: natsort==7.1.1
11
13
  Requires-Dist: numpy>=2.1.2
12
14
  Requires-Dist: packaging>=23.1
13
15
  Requires-Dist: pandas>=2.2.3
14
16
  Requires-Dist: ruamel.yaml==0.17.21
15
17
  Requires-Dist: schema>=0.7.5
18
+ Requires-Dist: urllib3>=1.26.16
16
19
  Requires-Dist: xmltodict>=0.14.2
17
20
  Description-Content-Type: text/markdown
18
21
 
@@ -27,7 +27,9 @@ ingestion/generic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
27
27
  ingestion/generic/process.py,sha256=ZaVnZ_gx9faDUsuresI1A0oCegTa-dPQT7DBFMeZGyY,1777
28
28
  ingestion/generic/utils.py,sha256=1MEIru7uq38IjUdL8lcHqDH0oTki9uWrz1f2e-pmRoU,2814
29
29
  ingestion/nebula/__init__.py,sha256=VauK-rup_N8ZXVohx3HYqHX_PE_WoPyMUhdv2R7al4o,45
30
- ingestion/nebula/process.py,sha256=x0nip0-99iYIOMItaaduAsx-b0EXStdwm50EtFNdJLo,1942
30
+ ingestion/nebula/constants.py,sha256=thKqSwemdaAwAmKvF4FEVI9l1Ph5ergsnMlx6nWte7E,357
31
+ ingestion/nebula/manifest_assembler.py,sha256=sGZdeMpMT4osOBXlxY9CPZKUJSQXaShT77JJVB30I1s,5704
32
+ ingestion/nebula/process.py,sha256=D2ct9tF60ZIP_jZdjvgjfTkhEAkNNEhxaSqa04CtNR8,2237
31
33
  ingestion/nextgen/__init__.py,sha256=7LQ-h_Bvc5P1QcHMdzsqi1Qm4fTJn04-ozar2ty9wSc,59
32
34
  ingestion/nextgen/process.py,sha256=5Z0RfclwTAYZruGDiLPutjPCYFh1DJpoWY9dnttghT4,3993
33
35
  ingestion/nextgen/util/alteration_table.py,sha256=JTWBL1Fqj_pGsH5vwuVEnCUJle2wOBk6VYImHYCF9vg,6129
@@ -46,6 +48,7 @@ ingestion/shared_util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
46
48
  ingestion/shared_util/coords_to_genes.py,sha256=vz9EfgFm3BS6pEPnslbEka8cJKlQZtHJdH2WRCCUMdE,1669
47
49
  ingestion/shared_util/ga4gh.py,sha256=-jNQj79zspxG67MxHzOfwAhLbb9je55M1h4-i5ri-tU,507
48
50
  ingestion/shared_util/gene_to_coords.py,sha256=M-q5ateLSQ4fCF0uMk5TX2uBLRrcZzXqXEf05TPaLsU,876
51
+ ingestion/shared_util/lambda_client.py,sha256=0EdV5nOqe_w-OoDyi72w1P0lk30g1vlTW2sD3ci_Qqw,2695
49
52
  ingestion/shared_util/open_maybe_gzipped.py,sha256=FrOPJ4OgfpQGyT3f1Su1rFeuuYYu6QJ-nVIBIosbfhw,232
50
53
  ingestion/shared_util/tar.py,sha256=BGR_2vBbxyMgF-GzJ3SrihsPdOzII4SFVz9tvKV5vo0,482
51
54
  ingestion/shared_util/types.py,sha256=u9AD2OrTQWMBtK_7VXHsD8Rv6HFs-7ZUItNl4KXdL7k,68
@@ -56,6 +59,6 @@ ingestion/vcf_standardization/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
56
59
  ingestion/vcf_standardization/util/af_helpers.py,sha256=dpTzoeIQVeBRt0ETF3a9rp5ojZqznHg4x_hCZ8OPcOg,1061
57
60
  ingestion/vcf_standardization/util/dp_helpers.py,sha256=Nq8oLOLObu4_pv16qwwgpALRlUoJVCULrd9cFOD-eoI,823
58
61
  ingestion/vcf_standardization/util/read_write.py,sha256=x3Pf6Dq8tmolblbCS5CrNmrcHS3FGfqBSFpFgvFGC4g,2526
59
- phc_ingestion-0.10.3.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
60
- phc_ingestion-0.10.3.dist-info/METADATA,sha256=BbHHrV-H3nYEQ1XGyYlonO2KRtUWkrGRgSTbtrlsnC4,573
61
- phc_ingestion-0.10.3.dist-info/RECORD,,
62
+ phc_ingestion-0.10.5.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
63
+ phc_ingestion-0.10.5.dist-info/METADATA,sha256=qf_dLgOkjhiTmAL2LjoyPJWs-38ggs82qXsc8wc66eg,677
64
+ phc_ingestion-0.10.5.dist-info/RECORD,,