phc-ingestion 0.10.12__tar.gz → 0.10.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/PKG-INFO +1 -1
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nebula/manifest_assembler.py +42 -4
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nebula/process.py +3 -2
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/pyproject.toml +1 -1
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/PYPI.md +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/__init__.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/__init__.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/process.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/__init__.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/cnv.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/detect_genome_ref.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/hla.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/ihc.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/interpretation.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/json.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/metadata.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/specimen_details.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/structural.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/tests.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/tmb.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/tsv.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/caris/util/vcf.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/foundation/__init__.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/foundation/process.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/foundation/util/__init__.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/foundation/util/cnv.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/foundation/util/fnv.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/foundation/util/ga4gh.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/foundation/util/interpretation.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/foundation/util/vcf_etl.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/generic/__init__.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/generic/process.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/generic/utils.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nebula/__init__.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nebula/constants.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/__init__.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/process.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/alteration_table.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/interpretation.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/manifest_helpers.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/nextgen_specific_genes.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/process_cnv.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/process_manifest.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/process_structural.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/process_vcf.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/types.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/resources/GRCh37_map.csv.gz +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/resources/GRCh38_map.csv.gz +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/shared_util/__init__.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/shared_util/coords_to_genes.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/shared_util/ga4gh.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/shared_util/gene_to_coords.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/shared_util/lambda_client.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/shared_util/tar.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/shared_util/types.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/Variant.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/__init__.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/standardize.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/util/__init__.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
- {phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/util/read_write.py +0 -0
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import re
|
|
2
|
+
import gzip
|
|
3
|
+
from typing import Optional, TypedDict, Any, Callable
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from dateutil import parser
|
|
3
6
|
from ingestion.shared_util.lambda_client import LambdaClient
|
|
7
|
+
from ingestion.vcf_standardization.util.read_write import read_headers
|
|
4
8
|
from ingestion.nebula.constants import (
|
|
5
9
|
DATASET_SYSTEM,
|
|
6
10
|
NEBULA_KIT_ID_SYSTEM,
|
|
@@ -9,7 +13,6 @@ from ingestion.nebula.constants import (
|
|
|
9
13
|
NEBULA_BODY_SITE,
|
|
10
14
|
NEBULA_INDICATION,
|
|
11
15
|
)
|
|
12
|
-
import re
|
|
13
16
|
|
|
14
17
|
|
|
15
18
|
class HumanName(TypedDict):
|
|
@@ -49,11 +52,14 @@ class Practitioner(Resource):
|
|
|
49
52
|
|
|
50
53
|
|
|
51
54
|
class ManifestAssembler:
|
|
52
|
-
def __init__(
|
|
55
|
+
def __init__(
|
|
56
|
+
self, ingestion_id: str, account_id: str, project_id: str, kit_id: str, vcf_file_path: str
|
|
57
|
+
):
|
|
53
58
|
self.ingestion_id = ingestion_id
|
|
54
59
|
self.account_id = account_id
|
|
55
60
|
self.project_id = project_id
|
|
56
61
|
self.kit_id = kit_id
|
|
62
|
+
self.vcf_file_path = vcf_file_path
|
|
57
63
|
self.client = LambdaClient(
|
|
58
64
|
"patient-service",
|
|
59
65
|
{
|
|
@@ -142,6 +148,37 @@ class ManifestAssembler:
|
|
|
142
148
|
"fullName": f"{first_name} {last_name}",
|
|
143
149
|
}
|
|
144
150
|
|
|
151
|
+
def _safe(self, lambda_func: Callable[[], Any]) -> Any:
|
|
152
|
+
try:
|
|
153
|
+
return lambda_func()
|
|
154
|
+
except Exception as e:
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
def __extract_collection_date(self) -> dict[str, str]:
|
|
158
|
+
in_file = self.vcf_file_path
|
|
159
|
+
with gzip.open(in_file, "rt") if in_file.endswith(".gz") else open(in_file, "r") as f:
|
|
160
|
+
headers = read_headers(f)
|
|
161
|
+
for header in headers:
|
|
162
|
+
epoch_parts = re.search(r"Epoch=(\d+)", header)
|
|
163
|
+
epoch_time_raw = epoch_parts.group(1) if epoch_parts else None
|
|
164
|
+
epoch_time = self._safe(
|
|
165
|
+
lambda: datetime.fromtimestamp(int(epoch_time_raw) / 1000.0, tz=timezone.utc)
|
|
166
|
+
)
|
|
167
|
+
date_parts = re.search(r"Date=\"(.*)\"", header)
|
|
168
|
+
date_string_raw = date_parts.group(1) if date_parts else None
|
|
169
|
+
date_string = self._safe(
|
|
170
|
+
lambda: parser.parse(date_string_raw, tzinfos={"GST": "UTC+4"})
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
report_date = epoch_time or date_string
|
|
174
|
+
if report_date:
|
|
175
|
+
return {
|
|
176
|
+
"collDate": report_date.astimezone(tz=timezone.utc)
|
|
177
|
+
.isoformat(timespec="milliseconds")
|
|
178
|
+
.replace("+00:00", "Z")
|
|
179
|
+
}
|
|
180
|
+
return {}
|
|
181
|
+
|
|
145
182
|
def create_manifest(self) -> dict[str, Any]:
|
|
146
183
|
patient = self.__fetch_patient_by_kit_id()
|
|
147
184
|
|
|
@@ -167,6 +204,7 @@ class ManifestAssembler:
|
|
|
167
204
|
return {
|
|
168
205
|
"name": "Nebula",
|
|
169
206
|
"indexedDate": datetime.now().strftime("%Y-%m-%d"),
|
|
207
|
+
**self.__extract_collection_date(),
|
|
170
208
|
"reference": "GRCh38",
|
|
171
209
|
"patientId": patient.get("id"),
|
|
172
210
|
"mrn": self.__extract_elation_mrn(patient),
|
|
@@ -15,8 +15,9 @@ def process(vcf_file, out_path, file_name, source_file_id, ingestion_id, account
|
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
case_id = file_name
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
manifest = ManifestAssembler(
|
|
19
|
+
ingestion_id, account_id, project_id, case_id, vcf_file
|
|
20
|
+
).create_manifest()
|
|
20
21
|
base_vcf_file = os.path.basename(vcf_file)
|
|
21
22
|
vcf_out = base_vcf_file.replace(".vcf", ".modified.vcf")
|
|
22
23
|
vcf_final = base_vcf_file.replace(".vcf", ".modified.nrm.filtered.vcf")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/nextgen_specific_genes.py
RENAMED
|
File without changes
|
{phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/pre_filter_somatic_vcf.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/nextgen/util/process_structural.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/standardize.py
RENAMED
|
File without changes
|
{phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/util/__init__.py
RENAMED
|
File without changes
|
{phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/util/af_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/util/dp_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.10.12 → phc-ingestion-0.10.14}/ingestion/vcf_standardization/util/read_write.py
RENAMED
|
File without changes
|