phc-ingestion 0.8.42__tar.gz → 0.9.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/PKG-INFO +1 -1
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/metadata.py +5 -1
- phc-ingestion-0.9.1/ingestion/caris/util/specimen_details.py +86 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/ga4gh.py +18 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/pyproject.toml +1 -1
- phc-ingestion-0.8.42/ingestion/caris/util/specimen_details.py +0 -67
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/PYPI.md +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/__init__.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/__init__.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/process.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/__init__.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/cnv.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/detect_genome_ref.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/ga4gh.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/hla.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/ihc.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/interpretation.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/json.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/structural.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/tar.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/tests.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/tmb.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/tsv.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/vcf.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/__init__.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/process.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/__init__.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/cnv.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/fnv.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/interpretation.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/vcf_etl.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/generic/__init__.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/generic/process.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/generic/utils.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/__init__.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/process.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/alteration_table.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/interpretation.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/manifest_helpers.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/nextgen_specific_genes.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/process_cnv.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/process_manifest.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/process_structural.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/process_vcf.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/types.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/resources/GRCh37_map.csv.gz +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/resources/GRCh38_map.csv.gz +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/shared_util/__init__.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/shared_util/coords_to_genes.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/shared_util/gene_to_coords.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/shared_util/types.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/Variant.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/__init__.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/standardize.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/util/__init__.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
- {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/util/read_write.py +0 -0
|
@@ -94,9 +94,13 @@ def extract_metadata(data, prefix, files, source_file_id, log: Logger) -> dict:
|
|
|
94
94
|
# Get date of collected and received for the specimen
|
|
95
95
|
metadata["receivedDate"] = specimen_details["receivedDate"]
|
|
96
96
|
metadata["collDate"] = specimen_details["collDate"]
|
|
97
|
+
if specimen_details["specimenId"]:
|
|
98
|
+
metadata["specimenIds"] = [specimen_details["specimenId"]]
|
|
99
|
+
if specimen_details["specimenTypes"]:
|
|
100
|
+
metadata["specimenTypes"] = specimen_details["specimenTypes"]
|
|
97
101
|
metadata["reportDate"] = get_report_date(test_details, log)
|
|
98
102
|
|
|
99
|
-
#
|
|
103
|
+
# Get the date without the time
|
|
100
104
|
metadata["indexedDate"] = metadata["reportDate"]
|
|
101
105
|
|
|
102
106
|
patient = data["patientInformation"]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from logging import Logger
|
|
2
|
+
from typing import TypedDict, cast, Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class SpecimenDetails(TypedDict, total=False):
|
|
6
|
+
"""A partial representation of the specimen details in the Caris JSON file"""
|
|
7
|
+
|
|
8
|
+
specimenReceivedDate: str
|
|
9
|
+
specimenCollectionDate: str
|
|
10
|
+
specimenSite: str
|
|
11
|
+
specimenType: str
|
|
12
|
+
specimenID: str | int
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ParsedSpecimenDetails(TypedDict):
|
|
16
|
+
bodySite: str
|
|
17
|
+
receivedDate: str
|
|
18
|
+
collDate: str
|
|
19
|
+
specimenId: Optional[str]
|
|
20
|
+
specimenTypes: Optional[list[str]]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def parse_specimen_details(specimen_details: list[SpecimenDetails]) -> ParsedSpecimenDetails:
|
|
24
|
+
specimen_types: list[str] = []
|
|
25
|
+
for specimen in specimen_details:
|
|
26
|
+
specimen_type = specimen.get("specimenType")
|
|
27
|
+
if specimen_type and specimen_type not in specimen_types:
|
|
28
|
+
specimen_types.append(specimen_type)
|
|
29
|
+
maybe_specimen_id = specimen_details[0].get("specimenID")
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
"bodySite": specimen_details[0].get("specimenSite", ""),
|
|
33
|
+
"receivedDate": specimen_details[0].get("specimenReceivedDate", ""),
|
|
34
|
+
"collDate": specimen_details[0].get("specimenCollectionDate", ""),
|
|
35
|
+
"specimenId": str(maybe_specimen_id) if maybe_specimen_id else None,
|
|
36
|
+
"specimenTypes": specimen_types if specimen_types else None,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def standardize_specimen_details(
|
|
41
|
+
specimen_details: SpecimenDetails | list[SpecimenDetails],
|
|
42
|
+
log: Logger,
|
|
43
|
+
) -> list[SpecimenDetails]:
|
|
44
|
+
"""
|
|
45
|
+
Specimen details can be a single dictionary or a list of dictionaries
|
|
46
|
+
This function standardizes the input to always be a list of dictionaries
|
|
47
|
+
|
|
48
|
+
If it is a list, we expect all the specimens to have the same site and warn otherwise.
|
|
49
|
+
"""
|
|
50
|
+
if isinstance(specimen_details, dict):
|
|
51
|
+
return [specimen_details]
|
|
52
|
+
|
|
53
|
+
# Sometimes, we have multiple specimen details
|
|
54
|
+
# In this case, we expect them to all be the same and warn otherwise
|
|
55
|
+
sites = {specimen.get("specimenSite") for specimen in specimen_details}
|
|
56
|
+
|
|
57
|
+
if len(sites) > 1:
|
|
58
|
+
log.warning("Multiple specimen sites found")
|
|
59
|
+
|
|
60
|
+
return specimen_details
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def extract_and_parse_specimen_details(data: dict, log: Logger) -> ParsedSpecimenDetails:
|
|
64
|
+
specimen_information = data["specimenInformation"]
|
|
65
|
+
specimen_details: SpecimenDetails | list[SpecimenDetails] | None = None
|
|
66
|
+
|
|
67
|
+
# The key for the specimen details varies based on the test type
|
|
68
|
+
potential_keys = [
|
|
69
|
+
# Tissue case
|
|
70
|
+
"tumorSpecimenInformation",
|
|
71
|
+
# Liquid case
|
|
72
|
+
"liquidBiopsySpecimenInformation",
|
|
73
|
+
]
|
|
74
|
+
for key in potential_keys:
|
|
75
|
+
if key in specimen_information:
|
|
76
|
+
specimen_details = cast(
|
|
77
|
+
SpecimenDetails | list[SpecimenDetails], specimen_information[key]
|
|
78
|
+
)
|
|
79
|
+
break
|
|
80
|
+
|
|
81
|
+
if not specimen_details:
|
|
82
|
+
raise ValueError("No specimen details found in data")
|
|
83
|
+
|
|
84
|
+
specimen_details = standardize_specimen_details(specimen_details, log)
|
|
85
|
+
|
|
86
|
+
return parse_specimen_details(specimen_details)
|
|
@@ -190,6 +190,19 @@ def get_test_yml(
|
|
|
190
190
|
reportDate = str(get_date(signatures.get("Signature").get("ServerTime")))[0:10]
|
|
191
191
|
indexedDate = reportDate
|
|
192
192
|
|
|
193
|
+
sample_id = (sample.get("SampleId") or "").strip()
|
|
194
|
+
block_id = (sample.get("BlockId") or "").strip()
|
|
195
|
+
if block_id == "N/A":
|
|
196
|
+
block_id = ""
|
|
197
|
+
|
|
198
|
+
specimen_ids = []
|
|
199
|
+
if sample_id:
|
|
200
|
+
specimen_ids.append(sample_id)
|
|
201
|
+
if block_id:
|
|
202
|
+
specimen_ids.append(block_id)
|
|
203
|
+
|
|
204
|
+
specimenType = sample.get("SpecFormat", "").strip()
|
|
205
|
+
|
|
193
206
|
yaml_file = {
|
|
194
207
|
"name": "Foundation Medicine",
|
|
195
208
|
"reference": "GRCh37",
|
|
@@ -216,6 +229,11 @@ def get_test_yml(
|
|
|
216
229
|
"files": [],
|
|
217
230
|
}
|
|
218
231
|
|
|
232
|
+
if specimen_ids:
|
|
233
|
+
yaml_file["specimenIds"] = specimen_ids
|
|
234
|
+
if specimenType:
|
|
235
|
+
yaml_file["specimenTypes"] = [specimenType]
|
|
236
|
+
|
|
219
237
|
if write_to_manifest["cnv"]:
|
|
220
238
|
yaml_file["files"].append(
|
|
221
239
|
{
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
from logging import Logger
|
|
2
|
-
from typing import TypedDict, cast
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class SpecimenDetails(TypedDict, total=False):
|
|
6
|
-
"""A partial representation of the specimen details in the Caris JSON file"""
|
|
7
|
-
|
|
8
|
-
specimenReceivedDate: str
|
|
9
|
-
specimenCollectionDate: str
|
|
10
|
-
specimenSite: str
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class ParsedSpecimenDetails(TypedDict):
|
|
14
|
-
bodySite: str
|
|
15
|
-
receivedDate: str
|
|
16
|
-
collDate: str
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def parse_specimen_details(specimen_details: SpecimenDetails) -> ParsedSpecimenDetails:
|
|
20
|
-
return {
|
|
21
|
-
"bodySite": specimen_details.get("specimenSite", ""),
|
|
22
|
-
"receivedDate": specimen_details.get("specimenReceivedDate", ""),
|
|
23
|
-
"collDate": specimen_details.get("specimenCollectionDate", ""),
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def ensure_single_specimen_details(
|
|
28
|
-
specimen_details: SpecimenDetails | list[SpecimenDetails],
|
|
29
|
-
log: Logger,
|
|
30
|
-
) -> SpecimenDetails:
|
|
31
|
-
if isinstance(specimen_details, dict):
|
|
32
|
-
return specimen_details
|
|
33
|
-
|
|
34
|
-
# Sometimes, we have multiple specimen details
|
|
35
|
-
# In this case, we expect them to all be the same and warn otherwise
|
|
36
|
-
sites = {specimen["specimenSite"] for specimen in specimen_details}
|
|
37
|
-
|
|
38
|
-
if len(sites) > 1:
|
|
39
|
-
log.warn(f"Multiple specimen sites found")
|
|
40
|
-
|
|
41
|
-
return specimen_details[0]
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def extract_and_parse_specimen_details(data: dict, log: Logger) -> ParsedSpecimenDetails:
|
|
45
|
-
specimen_information = data["specimenInformation"]
|
|
46
|
-
specimen_details: SpecimenDetails | list[SpecimenDetails] | None = None
|
|
47
|
-
|
|
48
|
-
# The key for the specimen details varies based on the test type
|
|
49
|
-
potential_keys = [
|
|
50
|
-
# Tissue case
|
|
51
|
-
"tumorSpecimenInformation",
|
|
52
|
-
# Liquid case
|
|
53
|
-
"liquidBiopsySpecimenInformation",
|
|
54
|
-
]
|
|
55
|
-
for key in potential_keys:
|
|
56
|
-
if key in specimen_information:
|
|
57
|
-
specimen_details = cast(
|
|
58
|
-
SpecimenDetails | list[SpecimenDetails], specimen_information[key]
|
|
59
|
-
)
|
|
60
|
-
break
|
|
61
|
-
|
|
62
|
-
if not specimen_details:
|
|
63
|
-
raise ValueError("No specimen details found in data")
|
|
64
|
-
|
|
65
|
-
specimen_details = ensure_single_specimen_details(specimen_details, log)
|
|
66
|
-
|
|
67
|
-
return parse_specimen_details(specimen_details)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/nextgen_specific_genes.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/pre_filter_somatic_vcf.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/util/af_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/util/dp_helpers.py
RENAMED
|
File without changes
|
{phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/util/read_write.py
RENAMED
|
File without changes
|