phc-ingestion 0.8.42__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -94,9 +94,13 @@ def extract_metadata(data, prefix, files, source_file_id, log: Logger) -> dict:
94
94
  # Get date of collected and received for the specimen
95
95
  metadata["receivedDate"] = specimen_details["receivedDate"]
96
96
  metadata["collDate"] = specimen_details["collDate"]
97
+ if specimen_details["specimenId"]:
98
+ metadata["specimenIds"] = [specimen_details["specimenId"]]
99
+ if specimen_details["specimenTypes"]:
100
+ metadata["specimenTypes"] = specimen_details["specimenTypes"]
97
101
  metadata["reportDate"] = get_report_date(test_details, log)
98
102
 
99
- # Get the date without the time
103
+ # Get the date without the time
100
104
  metadata["indexedDate"] = metadata["reportDate"]
101
105
 
102
106
  patient = data["patientInformation"]
@@ -1,5 +1,5 @@
1
1
  from logging import Logger
2
- from typing import TypedDict, cast
2
+ from typing import TypedDict, cast, Optional
3
3
 
4
4
 
5
5
  class SpecimenDetails(TypedDict, total=False):
@@ -8,37 +8,55 @@ class SpecimenDetails(TypedDict, total=False):
8
8
  specimenReceivedDate: str
9
9
  specimenCollectionDate: str
10
10
  specimenSite: str
11
+ specimenType: str
12
+ specimenID: str
11
13
 
12
14
 
13
15
  class ParsedSpecimenDetails(TypedDict):
14
16
  bodySite: str
15
17
  receivedDate: str
16
18
  collDate: str
19
+ specimenId: Optional[str]
20
+ specimenTypes: Optional[list[str]]
17
21
 
18
22
 
19
- def parse_specimen_details(specimen_details: SpecimenDetails) -> ParsedSpecimenDetails:
23
+ def parse_specimen_details(specimen_details: list[SpecimenDetails]) -> ParsedSpecimenDetails:
24
+ specimen_types: list[str] = []
25
+ for specimen in specimen_details:
26
+ specimen_type = specimen.get("specimenType")
27
+ if specimen_type and specimen_type not in specimen_types:
28
+ specimen_types.append(specimen_type)
29
+
20
30
  return {
21
- "bodySite": specimen_details.get("specimenSite", ""),
22
- "receivedDate": specimen_details.get("specimenReceivedDate", ""),
23
- "collDate": specimen_details.get("specimenCollectionDate", ""),
31
+ "bodySite": specimen_details[0].get("specimenSite", ""),
32
+ "receivedDate": specimen_details[0].get("specimenReceivedDate", ""),
33
+ "collDate": specimen_details[0].get("specimenCollectionDate", ""),
34
+ "specimenId": specimen_details[0].get("specimenID"),
35
+ "specimenTypes": specimen_types if specimen_types else None,
24
36
  }
25
37
 
26
38
 
27
- def ensure_single_specimen_details(
39
+ def standardize_specimen_details(
28
40
  specimen_details: SpecimenDetails | list[SpecimenDetails],
29
41
  log: Logger,
30
- ) -> SpecimenDetails:
42
+ ) -> list[SpecimenDetails]:
43
+ """
44
+ Specimen details can be a single dictionary or a list of dictionaries
45
+ This function standardizes the input to always be a list of dictionaries
46
+
47
+ If it is a list, we expect all the specimens to have the same site and warn otherwise.
48
+ """
31
49
  if isinstance(specimen_details, dict):
32
- return specimen_details
50
+ return [specimen_details]
33
51
 
34
52
  # Sometimes, we have multiple specimen details
35
53
  # In this case, we expect them to all be the same and warn otherwise
36
- sites = {specimen["specimenSite"] for specimen in specimen_details}
54
+ sites = {specimen.get("specimenSite") for specimen in specimen_details}
37
55
 
38
56
  if len(sites) > 1:
39
- log.warn(f"Multiple specimen sites found")
57
+ log.warning("Multiple specimen sites found")
40
58
 
41
- return specimen_details[0]
59
+ return specimen_details
42
60
 
43
61
 
44
62
  def extract_and_parse_specimen_details(data: dict, log: Logger) -> ParsedSpecimenDetails:
@@ -62,6 +80,6 @@ def extract_and_parse_specimen_details(data: dict, log: Logger) -> ParsedSpecime
62
80
  if not specimen_details:
63
81
  raise ValueError("No specimen details found in data")
64
82
 
65
- specimen_details = ensure_single_specimen_details(specimen_details, log)
83
+ specimen_details = standardize_specimen_details(specimen_details, log)
66
84
 
67
85
  return parse_specimen_details(specimen_details)
@@ -190,6 +190,19 @@ def get_test_yml(
190
190
  reportDate = str(get_date(signatures.get("Signature").get("ServerTime")))[0:10]
191
191
  indexedDate = reportDate
192
192
 
193
+ sample_id = (sample.get("SampleId") or "").strip()
194
+ block_id = (sample.get("BlockId") or "").strip()
195
+ if block_id == "N/A":
196
+ block_id = ""
197
+
198
+ specimen_ids = []
199
+ if sample_id:
200
+ specimen_ids.append(sample_id)
201
+ if block_id:
202
+ specimen_ids.append(block_id)
203
+
204
+ specimenType = sample.get("SpecFormat", "").strip()
205
+
193
206
  yaml_file = {
194
207
  "name": "Foundation Medicine",
195
208
  "reference": "GRCh37",
@@ -216,6 +229,11 @@ def get_test_yml(
216
229
  "files": [],
217
230
  }
218
231
 
232
+ if specimen_ids:
233
+ yaml_file["specimenIds"] = specimen_ids
234
+ if specimenType:
235
+ yaml_file["specimenTypes"] = [specimenType]
236
+
219
237
  if write_to_manifest["cnv"]:
220
238
  yaml_file["files"].append(
221
239
  {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.8.42
3
+ Version: 0.9.0
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -9,8 +9,8 @@ ingestion/caris/util/hla.py,sha256=X_t6ngBRvmdG3m4I2_KnPFeWn3BaH-3IWHtOvDbS32A,7
9
9
  ingestion/caris/util/ihc.py,sha256=vegxudxHj7tLihrXGbEx_ptwkSsu3YCCB1nZVwoiYXg,12312
10
10
  ingestion/caris/util/interpretation.py,sha256=CghNurqeVA5VTBBorU8-ZTN-PVNPnR8wrmTwKCH3568,555
11
11
  ingestion/caris/util/json.py,sha256=HBU3Tf-XSi9fGHANYUtD8maXNYqmmnpncGh0KCDaPEU,5018
12
- ingestion/caris/util/metadata.py,sha256=C50e5a6zqYeUG_RcZvFvN-UEXWNJb0q03dMOGWkDgO0,10070
13
- ingestion/caris/util/specimen_details.py,sha256=R3uKHlLR056XcQbUPI6IO2dLr-z5Z5AJi866DJ379Qw,2105
12
+ ingestion/caris/util/metadata.py,sha256=a6NToMtGtIRrlMd3CQwq4IRjGGmIiBA9JFwsATjNEoQ,10287
13
+ ingestion/caris/util/specimen_details.py,sha256=5Tew0uhSa_SGBFEWjHp8wOKmQdeAYsY3-yTB-dEbtE4,2879
14
14
  ingestion/caris/util/structural.py,sha256=EUcMIea_WnafoVmFLIyEqlJ_HtYIj_g6qkekXa7QNQs,4628
15
15
  ingestion/caris/util/tar.py,sha256=BGR_2vBbxyMgF-GzJ3SrihsPdOzII4SFVz9tvKV5vo0,482
16
16
  ingestion/caris/util/tests.py,sha256=mcG3A8TW81_sn2Bfoa-Gd6Q1sR3_R4FX2BNskD4DkJk,372
@@ -22,7 +22,7 @@ ingestion/foundation/process.py,sha256=T8YTvXRiThqE1LTERhrzvvD69mP4qJ7soJ1ZIbu8Y
22
22
  ingestion/foundation/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  ingestion/foundation/util/cnv.py,sha256=YSKCaOBhjZDNXth_GxC-50crDURpTNCMefoHo0uO7lQ,4215
24
24
  ingestion/foundation/util/fnv.py,sha256=-VstGsBKXM0duC-IpwUkektoTZ9yQUR0IQcDb1HibY0,5937
25
- ingestion/foundation/util/ga4gh.py,sha256=dn7uoQdnKHfIVCSHVEyPpR0jc1uRp1YB4P_9tLMOxMs,11134
25
+ ingestion/foundation/util/ga4gh.py,sha256=R9qfFTjucOXntlPYq37g8-NYygk8HuCxz1vih_2iD24,11626
26
26
  ingestion/foundation/util/interpretation.py,sha256=LVVUmMyD6Un1rIKXqiyQDUC6oIJUd8cU3I9YHD5fsXg,405
27
27
  ingestion/foundation/util/vcf_etl.py,sha256=ZBrX1XGRz-ymLUEiVcjjqmPZPb-AfD9On8UkZJDa1Dk,2133
28
28
  ingestion/generic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -54,6 +54,6 @@ ingestion/vcf_standardization/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQe
54
54
  ingestion/vcf_standardization/util/af_helpers.py,sha256=dpTzoeIQVeBRt0ETF3a9rp5ojZqznHg4x_hCZ8OPcOg,1061
55
55
  ingestion/vcf_standardization/util/dp_helpers.py,sha256=Nq8oLOLObu4_pv16qwwgpALRlUoJVCULrd9cFOD-eoI,823
56
56
  ingestion/vcf_standardization/util/read_write.py,sha256=x3Pf6Dq8tmolblbCS5CrNmrcHS3FGfqBSFpFgvFGC4g,2526
57
- phc_ingestion-0.8.42.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
58
- phc_ingestion-0.8.42.dist-info/METADATA,sha256=LB_E096gDykQT0yFc3_AP28VNff1Zi91Ltj971FZ8c0,573
59
- phc_ingestion-0.8.42.dist-info/RECORD,,
57
+ phc_ingestion-0.9.0.dist-info/WHEEL,sha256=B19PGBCYhWaz2p_UjAoRVh767nYQfk14Sn4TpIZ-nfU,87
58
+ phc_ingestion-0.9.0.dist-info/METADATA,sha256=4sKbowdUIImMSp2a_3tm8WPyS7fq8hp-_96HRIZi_H0,572
59
+ phc_ingestion-0.9.0.dist-info/RECORD,,