phc-ingestion 0.10.2__py3-none-any.whl → 0.10.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ NEBULA_TEST_TYPE = "WGS-30x"
@@ -0,0 +1,172 @@
1
+ from typing import Optional, TypedDict, Any
2
+ from datetime import datetime
3
+ from ingestion.shared_util.lambda_client import LambdaClient
4
+
5
+
6
+ class HumanName(TypedDict):
7
+ use: str
8
+ given: list[str]
9
+ family: str
10
+
11
+
12
+ class Identifier(TypedDict):
13
+ system: str
14
+ value: str
15
+
16
+
17
+ class Reference(TypedDict):
18
+ reference: str | None
19
+
20
+
21
+ class Resource(TypedDict):
22
+ id: str | None
23
+ identifier: list[Identifier] | None
24
+
25
+
26
+ class Patient(Resource):
27
+ name: list[HumanName] | None
28
+ gender: str | None
29
+ birthDate: str | None
30
+ managingOrganization: Reference | None
31
+ generalPractitioner: Reference | None
32
+
33
+
34
+ class Organization(Resource):
35
+ name: str | None
36
+
37
+
38
+ class Practitioner(Resource):
39
+ name: list[HumanName] | None
40
+
41
+
42
+ class ManifestAssembler:
43
+ def __init__(self, ingestion_id: str, account_id: str, project_id: str, kit_id: str):
44
+ self.ingestion_id = ingestion_id
45
+ self.account_id = account_id
46
+ self.project_id = project_id
47
+ self.kit_id = kit_id
48
+ self.client = LambdaClient(
49
+ "patient-service",
50
+ {
51
+ "Content-Type": "application/json",
52
+ "LifeOmic-Account": self.account_id,
53
+ "LifeOmic-Correlation-Id": self.ingestion_id,
54
+ },
55
+ )
56
+
57
+ def __fetch_patient_by_kit_id(self) -> Optional[Patient]:
58
+ path = f"/{self.account_id}/dstu3/Patient"
59
+ params = {
60
+ "_tag": f"http://lifeomic.com/fhir/dataset|{self.project_id}",
61
+ "identifier": f"http://lifeomic.com/fhir/nebula-kit-id|{self.kit_id}",
62
+ }
63
+
64
+ response = self.client.invoke(path, "get", None, params)
65
+ entries = response.get("entry", [])
66
+ patient = entries[0]["resource"] if len(entries) > 0 else None
67
+
68
+ return patient
69
+
70
+ def __fetch_resource_by_type_and_reference(
71
+ self,
72
+ resource_type: str,
73
+ reference: Reference | None,
74
+ ) -> Any:
75
+ if not reference:
76
+ return None
77
+
78
+ resource_id = self.__extract_id_from_reference(reference)
79
+ path = f"/{self.account_id}/dstu3/{resource_type}/{resource_id}"
80
+
81
+ try:
82
+ return self.client.invoke(path, "get")
83
+ except RuntimeError as e:
84
+ return None
85
+
86
+ def __extract_identifier_from_resource(self, resource: Resource | None) -> str:
87
+ if not resource:
88
+ return ""
89
+
90
+ identifiers = resource.get("identifier", [])
91
+ return identifiers[0].get("value", "") if identifiers else ""
92
+
93
+ def __extract_id_from_reference(self, reference: Reference) -> str:
94
+ if not reference or not reference.get("reference"):
95
+ return ""
96
+
97
+ ref_string = reference.get("reference")
98
+ if not ref_string:
99
+ return ""
100
+
101
+ parts = ref_string.split("/")
102
+ return parts[1] if len(parts) > 1 else parts[0]
103
+
104
+ def __parse_human_name(self, human_name: list[HumanName] | None):
105
+ if not human_name or len(human_name) == 0:
106
+ return None
107
+
108
+ last_name = human_name[0].get("family", "")
109
+ first_name = human_name[0].get("given", [])[0]
110
+
111
+ return {
112
+ "lastName": last_name,
113
+ "firstName": first_name,
114
+ "fullName": f"{first_name} {last_name}",
115
+ }
116
+
117
+ def create_manifest(self) -> dict[str, Any]:
118
+ patient = self.__fetch_patient_by_kit_id()
119
+
120
+ if not patient:
121
+ raise RuntimeError(f"Patient with kit id {self.kit_id} not found")
122
+ patient_birth_date = patient.get("birthDate")
123
+ if not patient_birth_date:
124
+ raise RuntimeError(f"Patient birth date is a required to create a manifest")
125
+
126
+ organization: Organization | None = self.__fetch_resource_by_type_and_reference(
127
+ "Organization", patient.get("managingOrganization")
128
+ )
129
+
130
+ general_practitioner: Practitioner | None = self.__fetch_resource_by_type_and_reference(
131
+ "Practitioner", patient.get("generalPractitioner")
132
+ )
133
+
134
+ patient_info = self.__parse_human_name(patient["name"])
135
+ practitioner_info = self.__parse_human_name(
136
+ general_practitioner.get("name") if general_practitioner else None
137
+ )
138
+
139
+ return {
140
+ "name": "Nebula",
141
+ "indexedDate": datetime.now().strftime("%Y-%m-%d"),
142
+ "reference": "GRCh38", # TODO: Get from project?
143
+ "mrn": patient.get("id"),
144
+ "bodySite": "Buccal Swab",
145
+ "bodySiteDisplay": "Buccal Swab",
146
+ "bodySiteSystem": "http://lifeomic.com/fhir/sequence-body-site",
147
+ "indicationSystem": "http://lifeomic.com/fhir/sequence-indication",
148
+ "indication": "Genetic Health Screening",
149
+ "indicationDisplay": "Genetic Health Screening",
150
+ "patientInfo": {
151
+ "lastName": patient_info.get("lastName"),
152
+ "dob": datetime.fromisoformat(patient_birth_date).strftime("%Y-%m-%d"),
153
+ "firstName": patient_info.get("firstName"),
154
+ "gender": patient["gender"],
155
+ },
156
+ **(
157
+ {
158
+ "medFacilName": organization.get("name"),
159
+ "medFacilID": self.__extract_identifier_from_resource(organization),
160
+ }
161
+ if organization
162
+ else {}
163
+ ),
164
+ **(
165
+ {
166
+ "orderingMDName": practitioner_info.get("fullName"),
167
+ "orderingMDNPI": self.__extract_identifier_from_resource(general_practitioner),
168
+ }
169
+ if general_practitioner
170
+ else {}
171
+ ),
172
+ }
@@ -1,18 +1,21 @@
1
1
  import os
2
2
 
3
+ from ingestion.nebula.constants import NEBULA_TEST_TYPE
3
4
  from ingestion.vcf_standardization.standardize import standardize_vcf
4
5
  from lifeomic_logging import scoped_logger
6
+ from ingestion.nebula.manifest_assembler import ManifestAssembler
5
7
 
6
8
 
7
- def process(vcf_file, out_path, file_name, source_file_id):
9
+ def process(vcf_file, out_path, file_name, source_file_id, ingestion_id, account_id, project_id):
8
10
 
9
11
  with scoped_logger(__name__) as log:
10
12
  log.info(
11
13
  f"Beginning Nebula ingestion for vcf_file: {vcf_file}, file_name: {file_name}, out_path: {out_path}, source_file_id: {source_file_id}"
12
14
  )
13
15
 
14
- manifest = {}
15
16
  case_id = file_name
17
+ manifest_assembler = ManifestAssembler(ingestion_id, account_id, project_id, case_id)
18
+ manifest = manifest_assembler.create_manifest()
16
19
  base_vcf_file = os.path.basename(vcf_file)
17
20
  vcf_out = base_vcf_file.replace(".vcf", ".modified.vcf")
18
21
  vcf_final = base_vcf_file.replace(".vcf", ".modified.nrm.filtered.vcf")
@@ -28,6 +31,8 @@ def process(vcf_file, out_path, file_name, source_file_id):
28
31
  )
29
32
 
30
33
  # Add to manifest
34
+ manifest["testType"] = NEBULA_TEST_TYPE
35
+ manifest["reportID"] = case_id
31
36
  manifest["sourceFileId"] = source_file_id
32
37
  manifest["resources"] = [{"fileName": f".lifeomic/vcf-ingest/{case_id}/{base_vcf_file}"}]
33
38
  manifest["files"] = [
@@ -44,7 +49,7 @@ def process(vcf_file, out_path, file_name, source_file_id):
44
49
  case_metadata = {
45
50
  "test_type": manifest["testType"],
46
51
  "vcf_line_count": vcf_line_count,
47
- "case_id": manifest["reportID"],
52
+ "germline_case_id": manifest["reportID"],
48
53
  "germline_genome_reference": genome_reference,
49
54
  }
50
55
 
@@ -0,0 +1,81 @@
1
+ import boto3
2
+ from mypy_boto3_lambda.type_defs import InvocationResponseTypeDef
3
+ import json
4
+ from typing import Optional, Any, cast
5
+ from urllib3.util.url import parse_url
6
+
7
+
8
+ class LambdaHandler:
9
+ ACCEPTABLE_STATUS_CODES = [200, 201, 204]
10
+
11
+ def __init__(self) -> None:
12
+ return
13
+
14
+ def invoke(
15
+ self,
16
+ url: str,
17
+ header: dict,
18
+ method: str,
19
+ body: Optional[dict] = None,
20
+ query_params: Optional[dict] = None,
21
+ ) -> Any:
22
+ parsed_url = parse_url(url)
23
+ payload = {
24
+ "headers": header,
25
+ "path": parsed_url.path,
26
+ "httpMethod": method.upper(),
27
+ "body": body,
28
+ "queryStringParameters": query_params,
29
+ }
30
+
31
+ client = boto3.client("lambda")
32
+ try:
33
+ raw_response = client.invoke(
34
+ FunctionName=cast(str, parsed_url.netloc),
35
+ InvocationType="RequestResponse",
36
+ Payload=json.dumps(payload),
37
+ )
38
+ return self.parse_response(raw_response)
39
+
40
+ except RuntimeError as e:
41
+ raise RuntimeError(f"Error invoking lambda for payload {str(payload)}") from e
42
+
43
+ def parse_response(self, response: InvocationResponseTypeDef) -> Any:
44
+ raw_payload = response["Payload"].read() if "Payload" in response else None
45
+ invocation_status_code = response.get("StatusCode", None)
46
+ if invocation_status_code not in self.ACCEPTABLE_STATUS_CODES:
47
+ raise RuntimeError(
48
+ f"Error invoking lambda. Invocation status code: {invocation_status_code}. Response: {str(raw_payload)}"
49
+ )
50
+
51
+ payload = json.loads(raw_payload)
52
+ lambda_status_code = payload.get("statusCode", None)
53
+ if lambda_status_code not in self.ACCEPTABLE_STATUS_CODES:
54
+ raise RuntimeError(
55
+ f"Error invoking lambda. Lambda status code: {lambda_status_code}. Response: {str(payload)}"
56
+ )
57
+
58
+ return json.loads(payload.get("body", "{}"))
59
+
60
+
61
+ class LambdaClient:
62
+ def __init__(self, host: str, default_header: dict):
63
+ self.host = host
64
+ self.default_header = default_header
65
+ self.handler = LambdaHandler()
66
+
67
+ def invoke(
68
+ self,
69
+ path: str,
70
+ method: str,
71
+ body: Optional[dict] = None,
72
+ query_params: Optional[dict] = None,
73
+ ):
74
+ if path.startswith("/"):
75
+ path = path[1:]
76
+ endpoint = f"{self.host}/{path}"
77
+ try:
78
+ return self.handler.invoke(endpoint, self.default_header, method, body, query_params)
79
+
80
+ except RuntimeError as e:
81
+ raise RuntimeError(f"Error invoking API. Request error: {str(e)}") from e
@@ -1,18 +1,21 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.10.2
3
+ Version: 0.10.4
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
7
7
  Requires-Python: >=3.11
8
+ Requires-Dist: boto3>=1.28.34
8
9
  Requires-Dist: jsonschema<5.0.0,>=4.16.0
9
10
  Requires-Dist: lifeomic-logging<0.4.0,>=0.3.2
11
+ Requires-Dist: mypy-boto3-lambda>=1.28.19
10
12
  Requires-Dist: natsort==7.1.1
11
13
  Requires-Dist: numpy>=2.1.2
12
14
  Requires-Dist: packaging>=23.1
13
15
  Requires-Dist: pandas>=2.2.3
14
16
  Requires-Dist: ruamel.yaml==0.17.21
15
17
  Requires-Dist: schema>=0.7.5
18
+ Requires-Dist: urllib3>=1.26.16
16
19
  Requires-Dist: xmltodict>=0.14.2
17
20
  Description-Content-Type: text/markdown
18
21
 
@@ -27,7 +27,9 @@ ingestion/generic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
27
27
  ingestion/generic/process.py,sha256=ZaVnZ_gx9faDUsuresI1A0oCegTa-dPQT7DBFMeZGyY,1777
28
28
  ingestion/generic/utils.py,sha256=1MEIru7uq38IjUdL8lcHqDH0oTki9uWrz1f2e-pmRoU,2814
29
29
  ingestion/nebula/__init__.py,sha256=VauK-rup_N8ZXVohx3HYqHX_PE_WoPyMUhdv2R7al4o,45
30
- ingestion/nebula/process.py,sha256=kIFhndcXFUICKMg4PfFhEsBxb_0oPkGPnpVNHhHFJ7k,1853
30
+ ingestion/nebula/constants.py,sha256=015BkvYnu5JTzFCS3zjqsaFb0emzH4I8ZewpBjMJQfo,29
31
+ ingestion/nebula/manifest_assembler.py,sha256=2nXMfFESs00NeetpizIJlFa8SR0KKqYgK_Xnusdjb58,5616
32
+ ingestion/nebula/process.py,sha256=D2ct9tF60ZIP_jZdjvgjfTkhEAkNNEhxaSqa04CtNR8,2237
31
33
  ingestion/nextgen/__init__.py,sha256=7LQ-h_Bvc5P1QcHMdzsqi1Qm4fTJn04-ozar2ty9wSc,59
32
34
  ingestion/nextgen/process.py,sha256=5Z0RfclwTAYZruGDiLPutjPCYFh1DJpoWY9dnttghT4,3993
33
35
  ingestion/nextgen/util/alteration_table.py,sha256=JTWBL1Fqj_pGsH5vwuVEnCUJle2wOBk6VYImHYCF9vg,6129
@@ -46,6 +48,7 @@ ingestion/shared_util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
46
48
  ingestion/shared_util/coords_to_genes.py,sha256=vz9EfgFm3BS6pEPnslbEka8cJKlQZtHJdH2WRCCUMdE,1669
47
49
  ingestion/shared_util/ga4gh.py,sha256=-jNQj79zspxG67MxHzOfwAhLbb9je55M1h4-i5ri-tU,507
48
50
  ingestion/shared_util/gene_to_coords.py,sha256=M-q5ateLSQ4fCF0uMk5TX2uBLRrcZzXqXEf05TPaLsU,876
51
+ ingestion/shared_util/lambda_client.py,sha256=0EdV5nOqe_w-OoDyi72w1P0lk30g1vlTW2sD3ci_Qqw,2695
49
52
  ingestion/shared_util/open_maybe_gzipped.py,sha256=FrOPJ4OgfpQGyT3f1Su1rFeuuYYu6QJ-nVIBIosbfhw,232
50
53
  ingestion/shared_util/tar.py,sha256=BGR_2vBbxyMgF-GzJ3SrihsPdOzII4SFVz9tvKV5vo0,482
51
54
  ingestion/shared_util/types.py,sha256=u9AD2OrTQWMBtK_7VXHsD8Rv6HFs-7ZUItNl4KXdL7k,68
@@ -56,6 +59,6 @@ ingestion/vcf_standardization/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
56
59
  ingestion/vcf_standardization/util/af_helpers.py,sha256=dpTzoeIQVeBRt0ETF3a9rp5ojZqznHg4x_hCZ8OPcOg,1061
57
60
  ingestion/vcf_standardization/util/dp_helpers.py,sha256=Nq8oLOLObu4_pv16qwwgpALRlUoJVCULrd9cFOD-eoI,823
58
61
  ingestion/vcf_standardization/util/read_write.py,sha256=x3Pf6Dq8tmolblbCS5CrNmrcHS3FGfqBSFpFgvFGC4g,2526
59
- phc_ingestion-0.10.2.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
60
- phc_ingestion-0.10.2.dist-info/METADATA,sha256=HuebNLig9ccPPhZrF9It2bxP7p4ZWf0LGiz_rt__1Zk,573
61
- phc_ingestion-0.10.2.dist-info/RECORD,,
62
+ phc_ingestion-0.10.4.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
63
+ phc_ingestion-0.10.4.dist-info/METADATA,sha256=2bo_7YXbyMWyhO2pJDyGSbMWb_IYdPWgihQDWuB9b6s,677
64
+ phc_ingestion-0.10.4.dist-info/RECORD,,