phc-ingestion 0.8.41__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/PKG-INFO +1 -1
  2. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/metadata.py +5 -1
  3. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/specimen_details.py +30 -12
  4. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/foundation/util/ga4gh.py +18 -0
  5. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/generic/process.py +5 -1
  6. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/pyproject.toml +1 -1
  7. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/PYPI.md +0 -0
  8. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/__init__.py +0 -0
  9. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/__init__.py +0 -0
  10. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/process.py +0 -0
  11. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/__init__.py +0 -0
  12. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/cnv.py +0 -0
  13. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/detect_genome_ref.py +0 -0
  14. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/ga4gh.py +0 -0
  15. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/hla.py +0 -0
  16. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/ihc.py +0 -0
  17. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/interpretation.py +0 -0
  18. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/json.py +0 -0
  19. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/structural.py +0 -0
  20. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/tar.py +0 -0
  21. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/tests.py +0 -0
  22. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/tmb.py +0 -0
  23. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/tsv.py +0 -0
  24. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/caris/util/vcf.py +0 -0
  25. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/foundation/__init__.py +0 -0
  26. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/foundation/process.py +0 -0
  27. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/foundation/util/__init__.py +0 -0
  28. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/foundation/util/cnv.py +0 -0
  29. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/foundation/util/fnv.py +0 -0
  30. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/foundation/util/interpretation.py +0 -0
  31. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/foundation/util/vcf_etl.py +0 -0
  32. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/generic/__init__.py +0 -0
  33. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/generic/utils.py +0 -0
  34. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/__init__.py +0 -0
  35. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/process.py +0 -0
  36. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/util/alteration_table.py +0 -0
  37. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/util/interpretation.py +0 -0
  38. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/util/manifest_helpers.py +0 -0
  39. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/util/nextgen_specific_genes.py +0 -0
  40. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -0
  41. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/util/process_cnv.py +0 -0
  42. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/util/process_manifest.py +0 -0
  43. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/util/process_structural.py +0 -0
  44. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/util/process_vcf.py +0 -0
  45. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/nextgen/util/types.py +0 -0
  46. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/resources/GRCh37_map.csv.gz +0 -0
  47. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/resources/GRCh38_map.csv.gz +0 -0
  48. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/shared_util/__init__.py +0 -0
  49. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/shared_util/coords_to_genes.py +0 -0
  50. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/shared_util/gene_to_coords.py +0 -0
  51. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
  52. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/shared_util/types.py +0 -0
  53. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/vcf_standardization/Variant.py +0 -0
  54. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/vcf_standardization/__init__.py +0 -0
  55. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/vcf_standardization/standardize.py +0 -0
  56. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/vcf_standardization/util/__init__.py +0 -0
  57. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
  58. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
  59. {phc-ingestion-0.8.41 → phc-ingestion-0.9.0}/ingestion/vcf_standardization/util/read_write.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.8.41
3
+ Version: 0.9.0
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -94,9 +94,13 @@ def extract_metadata(data, prefix, files, source_file_id, log: Logger) -> dict:
94
94
  # Get date of collected and received for the specimen
95
95
  metadata["receivedDate"] = specimen_details["receivedDate"]
96
96
  metadata["collDate"] = specimen_details["collDate"]
97
+ if specimen_details["specimenId"]:
98
+ metadata["specimenIds"] = [specimen_details["specimenId"]]
99
+ if specimen_details["specimenTypes"]:
100
+ metadata["specimenTypes"] = specimen_details["specimenTypes"]
97
101
  metadata["reportDate"] = get_report_date(test_details, log)
98
102
 
99
- # Get the date without the time
103
+ # Get the date without the time
100
104
  metadata["indexedDate"] = metadata["reportDate"]
101
105
 
102
106
  patient = data["patientInformation"]
@@ -1,5 +1,5 @@
1
1
  from logging import Logger
2
- from typing import TypedDict, cast
2
+ from typing import TypedDict, cast, Optional
3
3
 
4
4
 
5
5
  class SpecimenDetails(TypedDict, total=False):
@@ -8,37 +8,55 @@ class SpecimenDetails(TypedDict, total=False):
8
8
  specimenReceivedDate: str
9
9
  specimenCollectionDate: str
10
10
  specimenSite: str
11
+ specimenType: str
12
+ specimenID: str
11
13
 
12
14
 
13
15
  class ParsedSpecimenDetails(TypedDict):
14
16
  bodySite: str
15
17
  receivedDate: str
16
18
  collDate: str
19
+ specimenId: Optional[str]
20
+ specimenTypes: Optional[list[str]]
17
21
 
18
22
 
19
- def parse_specimen_details(specimen_details: SpecimenDetails) -> ParsedSpecimenDetails:
23
+ def parse_specimen_details(specimen_details: list[SpecimenDetails]) -> ParsedSpecimenDetails:
24
+ specimen_types: list[str] = []
25
+ for specimen in specimen_details:
26
+ specimen_type = specimen.get("specimenType")
27
+ if specimen_type and specimen_type not in specimen_types:
28
+ specimen_types.append(specimen_type)
29
+
20
30
  return {
21
- "bodySite": specimen_details.get("specimenSite", ""),
22
- "receivedDate": specimen_details.get("specimenReceivedDate", ""),
23
- "collDate": specimen_details.get("specimenCollectionDate", ""),
31
+ "bodySite": specimen_details[0].get("specimenSite", ""),
32
+ "receivedDate": specimen_details[0].get("specimenReceivedDate", ""),
33
+ "collDate": specimen_details[0].get("specimenCollectionDate", ""),
34
+ "specimenId": specimen_details[0].get("specimenID"),
35
+ "specimenTypes": specimen_types if specimen_types else None,
24
36
  }
25
37
 
26
38
 
27
- def ensure_single_specimen_details(
39
+ def standardize_specimen_details(
28
40
  specimen_details: SpecimenDetails | list[SpecimenDetails],
29
41
  log: Logger,
30
- ) -> SpecimenDetails:
42
+ ) -> list[SpecimenDetails]:
43
+ """
44
+ Specimen details can be a single dictionary or a list of dictionaries
45
+ This function standardizes the input to always be a list of dictionaries
46
+
47
+ If it is a list, we expect all the specimens to have the same site and warn otherwise.
48
+ """
31
49
  if isinstance(specimen_details, dict):
32
- return specimen_details
50
+ return [specimen_details]
33
51
 
34
52
  # Sometimes, we have multiple specimen details
35
53
  # In this case, we expect them to all be the same and warn otherwise
36
- sites = {specimen["specimenSite"] for specimen in specimen_details}
54
+ sites = {specimen.get("specimenSite") for specimen in specimen_details}
37
55
 
38
56
  if len(sites) > 1:
39
- log.warn(f"Multiple specimen sites found")
57
+ log.warning("Multiple specimen sites found")
40
58
 
41
- return specimen_details[0]
59
+ return specimen_details
42
60
 
43
61
 
44
62
  def extract_and_parse_specimen_details(data: dict, log: Logger) -> ParsedSpecimenDetails:
@@ -62,6 +80,6 @@ def extract_and_parse_specimen_details(data: dict, log: Logger) -> ParsedSpecime
62
80
  if not specimen_details:
63
81
  raise ValueError("No specimen details found in data")
64
82
 
65
- specimen_details = ensure_single_specimen_details(specimen_details, log)
83
+ specimen_details = standardize_specimen_details(specimen_details, log)
66
84
 
67
85
  return parse_specimen_details(specimen_details)
@@ -190,6 +190,19 @@ def get_test_yml(
190
190
  reportDate = str(get_date(signatures.get("Signature").get("ServerTime")))[0:10]
191
191
  indexedDate = reportDate
192
192
 
193
+ sample_id = (sample.get("SampleId") or "").strip()
194
+ block_id = (sample.get("BlockId") or "").strip()
195
+ if block_id == "N/A":
196
+ block_id = ""
197
+
198
+ specimen_ids = []
199
+ if sample_id:
200
+ specimen_ids.append(sample_id)
201
+ if block_id:
202
+ specimen_ids.append(block_id)
203
+
204
+ specimenType = sample.get("SpecFormat", "").strip()
205
+
193
206
  yaml_file = {
194
207
  "name": "Foundation Medicine",
195
208
  "reference": "GRCh37",
@@ -216,6 +229,11 @@ def get_test_yml(
216
229
  "files": [],
217
230
  }
218
231
 
232
+ if specimen_ids:
233
+ yaml_file["specimenIds"] = specimen_ids
234
+ if specimenType:
235
+ yaml_file["specimenTypes"] = [specimenType]
236
+
219
237
  if write_to_manifest["cnv"]:
220
238
  yaml_file["files"].append(
221
239
  {
@@ -19,8 +19,12 @@ def process(
19
19
  vcf_final = base_vcf_file.replace(".vcf", ".modified.nrm.filtered.vcf")
20
20
  if not vcf_final.endswith(".gz"):
21
21
  vcf_final = vcf_final + ".gz"
22
+ # All generic VCF ingestions are germline, so ensure the
23
+ # sample name is prefixed with "germline_". This matches
24
+ # the downstream logic in genomic-manifest
25
+ sample_name = f"germline_{case_id}"
22
26
  vcf_line_count = standardize_vcf(
23
- vcf_file, vcf_out, out_path, case_id, log, compression=True
27
+ vcf_file, vcf_out, out_path, sample_name, log, compression=True
24
28
  )
25
29
 
26
30
  # Add to manifest
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "phc-ingestion"
3
- version = "0.8.41"
3
+ version = "0.9.0"
4
4
  description = "Functions for LifeOmic PHC genomic ingestions"
5
5
  authors = [
6
6
  { name = "LifeOmic Development", email = "development@lifeomic.com" },
File without changes