PyPI - phc-ingestion - Versions diffs - 0.8.42__tar.gz → 0.9.1__tar.gz - Mend

phc-ingestion 0.8.42tar.gz → 0.9.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: phc-ingestion
-Version: 0.8.42
+Version: 0.9.1
 Summary: Functions for LifeOmic PHC genomic ingestions
 License: MIT
 Author-email: LifeOmic Development <development@lifeomic.com>

{phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/metadata.py RENAMED Viewed

@@ -94,9 +94,13 @@ def extract_metadata(data, prefix, files, source_file_id, log: Logger) -> dict:
     # Get date of collected and received for the specimen
     metadata["receivedDate"] = specimen_details["receivedDate"]
     metadata["collDate"] = specimen_details["collDate"]
+    if specimen_details["specimenId"]:
+        metadata["specimenIds"] = [specimen_details["specimenId"]]
+    if specimen_details["specimenTypes"]:
+        metadata["specimenTypes"] = specimen_details["specimenTypes"]
     metadata["reportDate"] = get_report_date(test_details, log)
-    #  Get the date without the time
+    # Get the date without the time
     metadata["indexedDate"] = metadata["reportDate"]
     patient = data["patientInformation"]

phc-ingestion-0.9.1/ingestion/caris/util/specimen_details.py ADDED Viewed

@@ -0,0 +1,86 @@
+from logging import Logger
+from typing import TypedDict, cast, Optional
+class SpecimenDetails(TypedDict, total=False):
+    """A partial representation of the specimen details in the Caris JSON file"""
+    specimenReceivedDate: str
+    specimenCollectionDate: str
+    specimenSite: str
+    specimenType: str
+    specimenID: str | int
+class ParsedSpecimenDetails(TypedDict):
+    bodySite: str
+    receivedDate: str
+    collDate: str
+    specimenId: Optional[str]
+    specimenTypes: Optional[list[str]]
+def parse_specimen_details(specimen_details: list[SpecimenDetails]) -> ParsedSpecimenDetails:
+    specimen_types: list[str] = []
+    for specimen in specimen_details:
+        specimen_type = specimen.get("specimenType")
+        if specimen_type and specimen_type not in specimen_types:
+            specimen_types.append(specimen_type)
+    maybe_specimen_id = specimen_details[0].get("specimenID")
+    return {
+        "bodySite": specimen_details[0].get("specimenSite", ""),
+        "receivedDate": specimen_details[0].get("specimenReceivedDate", ""),
+        "collDate": specimen_details[0].get("specimenCollectionDate", ""),
+        "specimenId": str(maybe_specimen_id) if maybe_specimen_id else None,
+        "specimenTypes": specimen_types if specimen_types else None,
+    }
+def standardize_specimen_details(
+    specimen_details: SpecimenDetails | list[SpecimenDetails],
+    log: Logger,
+) -> list[SpecimenDetails]:
+    """
+    Specimen details can be a single dictionary or a list of dictionaries
+    This function standardizes the input to always be a list of dictionaries
+    If it is a list, we expect all the specimens to have the same site and warn otherwise.
+    """
+    if isinstance(specimen_details, dict):
+        return [specimen_details]
+    # Sometimes, we have multiple specimen details
+    # In this case, we expect them to all be the same and warn otherwise
+    sites = {specimen.get("specimenSite") for specimen in specimen_details}
+    if len(sites) > 1:
+        log.warning("Multiple specimen sites found")
+    return specimen_details
+def extract_and_parse_specimen_details(data: dict, log: Logger) -> ParsedSpecimenDetails:
+    specimen_information = data["specimenInformation"]
+    specimen_details: SpecimenDetails | list[SpecimenDetails] | None = None
+    # The key for the specimen details varies based on the test type
+    potential_keys = [
+        # Tissue case
+        "tumorSpecimenInformation",
+        # Liquid case
+        "liquidBiopsySpecimenInformation",
+    ]
+    for key in potential_keys:
+        if key in specimen_information:
+            specimen_details = cast(
+                SpecimenDetails | list[SpecimenDetails], specimen_information[key]
+            )
+            break
+    if not specimen_details:
+        raise ValueError("No specimen details found in data")
+    specimen_details = standardize_specimen_details(specimen_details, log)
+    return parse_specimen_details(specimen_details)

{phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/ga4gh.py RENAMED Viewed

@@ -190,6 +190,19 @@ def get_test_yml(
     reportDate = str(get_date(signatures.get("Signature").get("ServerTime")))[0:10]
     indexedDate = reportDate
+    sample_id = (sample.get("SampleId") or "").strip()
+    block_id = (sample.get("BlockId") or "").strip()
+    if block_id == "N/A":
+        block_id = ""
+    specimen_ids = []
+    if sample_id:
+        specimen_ids.append(sample_id)
+    if block_id:
+        specimen_ids.append(block_id)
+    specimenType = sample.get("SpecFormat", "").strip()
     yaml_file = {
         "name": "Foundation Medicine",
         "reference": "GRCh37",
@@ -216,6 +229,11 @@ def get_test_yml(
         "files": [],
     }
+    if specimen_ids:
+        yaml_file["specimenIds"] = specimen_ids
+    if specimenType:
+        yaml_file["specimenTypes"] = [specimenType]
     if write_to_manifest["cnv"]:
         yaml_file["files"].append(
             {

{phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "phc-ingestion"
-version = "0.8.42"
+version = "0.9.1"
 description = "Functions for LifeOmic PHC genomic ingestions"
 authors = [
     { name = "LifeOmic Development", email = "development@lifeomic.com" },

phc-ingestion-0.8.42/ingestion/caris/util/specimen_details.py DELETED Viewed

@@ -1,67 +0,0 @@
-from logging import Logger
-from typing import TypedDict, cast
-class SpecimenDetails(TypedDict, total=False):
-    """A partial representation of the specimen details in the Caris JSON file"""
-    specimenReceivedDate: str
-    specimenCollectionDate: str
-    specimenSite: str
-class ParsedSpecimenDetails(TypedDict):
-    bodySite: str
-    receivedDate: str
-    collDate: str
-def parse_specimen_details(specimen_details: SpecimenDetails) -> ParsedSpecimenDetails:
-    return {
-        "bodySite": specimen_details.get("specimenSite", ""),
-        "receivedDate": specimen_details.get("specimenReceivedDate", ""),
-        "collDate": specimen_details.get("specimenCollectionDate", ""),
-    }
-def ensure_single_specimen_details(
-    specimen_details: SpecimenDetails | list[SpecimenDetails],
-    log: Logger,
-) -> SpecimenDetails:
-    if isinstance(specimen_details, dict):
-        return specimen_details
-    # Sometimes, we have multiple specimen details
-    # In this case, we expect them to all be the same and warn otherwise
-    sites = {specimen["specimenSite"] for specimen in specimen_details}
-    if len(sites) > 1:
-        log.warn(f"Multiple specimen sites found")
-    return specimen_details[0]
-def extract_and_parse_specimen_details(data: dict, log: Logger) -> ParsedSpecimenDetails:
-    specimen_information = data["specimenInformation"]
-    specimen_details: SpecimenDetails | list[SpecimenDetails] | None = None
-    # The key for the specimen details varies based on the test type
-    potential_keys = [
-        # Tissue case
-        "tumorSpecimenInformation",
-        # Liquid case
-        "liquidBiopsySpecimenInformation",
-    ]
-    for key in potential_keys:
-        if key in specimen_information:
-            specimen_details = cast(
-                SpecimenDetails | list[SpecimenDetails], specimen_information[key]
-            )
-            break
-    if not specimen_details:
-        raise ValueError("No specimen details found in data")
-    specimen_details = ensure_single_specimen_details(specimen_details, log)
-    return parse_specimen_details(specimen_details)