phc-ingestion 0.8.42__tar.gz → 0.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/PKG-INFO +1 -1
  2. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/metadata.py +5 -1
  3. phc-ingestion-0.9.1/ingestion/caris/util/specimen_details.py +86 -0
  4. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/ga4gh.py +18 -0
  5. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/pyproject.toml +1 -1
  6. phc-ingestion-0.8.42/ingestion/caris/util/specimen_details.py +0 -67
  7. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/PYPI.md +0 -0
  8. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/__init__.py +0 -0
  9. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/__init__.py +0 -0
  10. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/process.py +0 -0
  11. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/__init__.py +0 -0
  12. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/cnv.py +0 -0
  13. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/detect_genome_ref.py +0 -0
  14. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/ga4gh.py +0 -0
  15. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/hla.py +0 -0
  16. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/ihc.py +0 -0
  17. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/interpretation.py +0 -0
  18. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/json.py +0 -0
  19. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/structural.py +0 -0
  20. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/tar.py +0 -0
  21. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/tests.py +0 -0
  22. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/tmb.py +0 -0
  23. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/tsv.py +0 -0
  24. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/caris/util/vcf.py +0 -0
  25. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/__init__.py +0 -0
  26. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/process.py +0 -0
  27. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/__init__.py +0 -0
  28. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/cnv.py +0 -0
  29. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/fnv.py +0 -0
  30. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/interpretation.py +0 -0
  31. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/foundation/util/vcf_etl.py +0 -0
  32. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/generic/__init__.py +0 -0
  33. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/generic/process.py +0 -0
  34. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/generic/utils.py +0 -0
  35. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/__init__.py +0 -0
  36. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/process.py +0 -0
  37. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/alteration_table.py +0 -0
  38. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/interpretation.py +0 -0
  39. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/manifest_helpers.py +0 -0
  40. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/nextgen_specific_genes.py +0 -0
  41. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -0
  42. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/process_cnv.py +0 -0
  43. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/process_manifest.py +0 -0
  44. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/process_structural.py +0 -0
  45. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/process_vcf.py +0 -0
  46. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/nextgen/util/types.py +0 -0
  47. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/resources/GRCh37_map.csv.gz +0 -0
  48. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/resources/GRCh38_map.csv.gz +0 -0
  49. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/shared_util/__init__.py +0 -0
  50. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/shared_util/coords_to_genes.py +0 -0
  51. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/shared_util/gene_to_coords.py +0 -0
  52. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
  53. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/shared_util/types.py +0 -0
  54. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/Variant.py +0 -0
  55. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/__init__.py +0 -0
  56. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/standardize.py +0 -0
  57. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/util/__init__.py +0 -0
  58. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
  59. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
  60. {phc-ingestion-0.8.42 → phc-ingestion-0.9.1}/ingestion/vcf_standardization/util/read_write.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.8.42
3
+ Version: 0.9.1
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -94,9 +94,13 @@ def extract_metadata(data, prefix, files, source_file_id, log: Logger) -> dict:
94
94
  # Get date of collected and received for the specimen
95
95
  metadata["receivedDate"] = specimen_details["receivedDate"]
96
96
  metadata["collDate"] = specimen_details["collDate"]
97
+ if specimen_details["specimenId"]:
98
+ metadata["specimenIds"] = [specimen_details["specimenId"]]
99
+ if specimen_details["specimenTypes"]:
100
+ metadata["specimenTypes"] = specimen_details["specimenTypes"]
97
101
  metadata["reportDate"] = get_report_date(test_details, log)
98
102
 
99
- # Get the date without the time
103
+ # Get the date without the time
100
104
  metadata["indexedDate"] = metadata["reportDate"]
101
105
 
102
106
  patient = data["patientInformation"]
@@ -0,0 +1,86 @@
1
+ from logging import Logger
2
+ from typing import TypedDict, cast, Optional
3
+
4
+
5
+ class SpecimenDetails(TypedDict, total=False):
6
+ """A partial representation of the specimen details in the Caris JSON file"""
7
+
8
+ specimenReceivedDate: str
9
+ specimenCollectionDate: str
10
+ specimenSite: str
11
+ specimenType: str
12
+ specimenID: str | int
13
+
14
+
15
+ class ParsedSpecimenDetails(TypedDict):
16
+ bodySite: str
17
+ receivedDate: str
18
+ collDate: str
19
+ specimenId: Optional[str]
20
+ specimenTypes: Optional[list[str]]
21
+
22
+
23
+ def parse_specimen_details(specimen_details: list[SpecimenDetails]) -> ParsedSpecimenDetails:
24
+ specimen_types: list[str] = []
25
+ for specimen in specimen_details:
26
+ specimen_type = specimen.get("specimenType")
27
+ if specimen_type and specimen_type not in specimen_types:
28
+ specimen_types.append(specimen_type)
29
+ maybe_specimen_id = specimen_details[0].get("specimenID")
30
+
31
+ return {
32
+ "bodySite": specimen_details[0].get("specimenSite", ""),
33
+ "receivedDate": specimen_details[0].get("specimenReceivedDate", ""),
34
+ "collDate": specimen_details[0].get("specimenCollectionDate", ""),
35
+ "specimenId": str(maybe_specimen_id) if maybe_specimen_id else None,
36
+ "specimenTypes": specimen_types if specimen_types else None,
37
+ }
38
+
39
+
40
+ def standardize_specimen_details(
41
+ specimen_details: SpecimenDetails | list[SpecimenDetails],
42
+ log: Logger,
43
+ ) -> list[SpecimenDetails]:
44
+ """
45
+ Specimen details can be a single dictionary or a list of dictionaries
46
+ This function standardizes the input to always be a list of dictionaries
47
+
48
+ If it is a list, we expect all the specimens to have the same site and warn otherwise.
49
+ """
50
+ if isinstance(specimen_details, dict):
51
+ return [specimen_details]
52
+
53
+ # Sometimes, we have multiple specimen details
54
+ # In this case, we expect them to all be the same and warn otherwise
55
+ sites = {specimen.get("specimenSite") for specimen in specimen_details}
56
+
57
+ if len(sites) > 1:
58
+ log.warning("Multiple specimen sites found")
59
+
60
+ return specimen_details
61
+
62
+
63
+ def extract_and_parse_specimen_details(data: dict, log: Logger) -> ParsedSpecimenDetails:
64
+ specimen_information = data["specimenInformation"]
65
+ specimen_details: SpecimenDetails | list[SpecimenDetails] | None = None
66
+
67
+ # The key for the specimen details varies based on the test type
68
+ potential_keys = [
69
+ # Tissue case
70
+ "tumorSpecimenInformation",
71
+ # Liquid case
72
+ "liquidBiopsySpecimenInformation",
73
+ ]
74
+ for key in potential_keys:
75
+ if key in specimen_information:
76
+ specimen_details = cast(
77
+ SpecimenDetails | list[SpecimenDetails], specimen_information[key]
78
+ )
79
+ break
80
+
81
+ if not specimen_details:
82
+ raise ValueError("No specimen details found in data")
83
+
84
+ specimen_details = standardize_specimen_details(specimen_details, log)
85
+
86
+ return parse_specimen_details(specimen_details)
@@ -190,6 +190,19 @@ def get_test_yml(
190
190
  reportDate = str(get_date(signatures.get("Signature").get("ServerTime")))[0:10]
191
191
  indexedDate = reportDate
192
192
 
193
+ sample_id = (sample.get("SampleId") or "").strip()
194
+ block_id = (sample.get("BlockId") or "").strip()
195
+ if block_id == "N/A":
196
+ block_id = ""
197
+
198
+ specimen_ids = []
199
+ if sample_id:
200
+ specimen_ids.append(sample_id)
201
+ if block_id:
202
+ specimen_ids.append(block_id)
203
+
204
+ specimenType = sample.get("SpecFormat", "").strip()
205
+
193
206
  yaml_file = {
194
207
  "name": "Foundation Medicine",
195
208
  "reference": "GRCh37",
@@ -216,6 +229,11 @@ def get_test_yml(
216
229
  "files": [],
217
230
  }
218
231
 
232
+ if specimen_ids:
233
+ yaml_file["specimenIds"] = specimen_ids
234
+ if specimenType:
235
+ yaml_file["specimenTypes"] = [specimenType]
236
+
219
237
  if write_to_manifest["cnv"]:
220
238
  yaml_file["files"].append(
221
239
  {
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "phc-ingestion"
3
- version = "0.8.42"
3
+ version = "0.9.1"
4
4
  description = "Functions for LifeOmic PHC genomic ingestions"
5
5
  authors = [
6
6
  { name = "LifeOmic Development", email = "development@lifeomic.com" },
@@ -1,67 +0,0 @@
1
- from logging import Logger
2
- from typing import TypedDict, cast
3
-
4
-
5
- class SpecimenDetails(TypedDict, total=False):
6
- """A partial representation of the specimen details in the Caris JSON file"""
7
-
8
- specimenReceivedDate: str
9
- specimenCollectionDate: str
10
- specimenSite: str
11
-
12
-
13
- class ParsedSpecimenDetails(TypedDict):
14
- bodySite: str
15
- receivedDate: str
16
- collDate: str
17
-
18
-
19
- def parse_specimen_details(specimen_details: SpecimenDetails) -> ParsedSpecimenDetails:
20
- return {
21
- "bodySite": specimen_details.get("specimenSite", ""),
22
- "receivedDate": specimen_details.get("specimenReceivedDate", ""),
23
- "collDate": specimen_details.get("specimenCollectionDate", ""),
24
- }
25
-
26
-
27
- def ensure_single_specimen_details(
28
- specimen_details: SpecimenDetails | list[SpecimenDetails],
29
- log: Logger,
30
- ) -> SpecimenDetails:
31
- if isinstance(specimen_details, dict):
32
- return specimen_details
33
-
34
- # Sometimes, we have multiple specimen details
35
- # In this case, we expect them to all be the same and warn otherwise
36
- sites = {specimen["specimenSite"] for specimen in specimen_details}
37
-
38
- if len(sites) > 1:
39
- log.warn(f"Multiple specimen sites found")
40
-
41
- return specimen_details[0]
42
-
43
-
44
- def extract_and_parse_specimen_details(data: dict, log: Logger) -> ParsedSpecimenDetails:
45
- specimen_information = data["specimenInformation"]
46
- specimen_details: SpecimenDetails | list[SpecimenDetails] | None = None
47
-
48
- # The key for the specimen details varies based on the test type
49
- potential_keys = [
50
- # Tissue case
51
- "tumorSpecimenInformation",
52
- # Liquid case
53
- "liquidBiopsySpecimenInformation",
54
- ]
55
- for key in potential_keys:
56
- if key in specimen_information:
57
- specimen_details = cast(
58
- SpecimenDetails | list[SpecimenDetails], specimen_information[key]
59
- )
60
- break
61
-
62
- if not specimen_details:
63
- raise ValueError("No specimen details found in data")
64
-
65
- specimen_details = ensure_single_specimen_details(specimen_details, log)
66
-
67
- return parse_specimen_details(specimen_details)
File without changes