phc-ingestion 0.9.1__tar.gz → 0.9.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/PKG-INFO +1 -1
  2. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/foundation/process.py +1 -21
  3. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/foundation/util/ga4gh.py +12 -21
  4. phc-ingestion-0.9.3/ingestion/nebula/__init__.py +1 -0
  5. phc-ingestion-0.9.3/ingestion/nebula/process.py +46 -0
  6. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/pyproject.toml +1 -1
  7. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/PYPI.md +0 -0
  8. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/__init__.py +0 -0
  9. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/__init__.py +0 -0
  10. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/process.py +0 -0
  11. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/__init__.py +0 -0
  12. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/cnv.py +0 -0
  13. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/detect_genome_ref.py +0 -0
  14. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/ga4gh.py +0 -0
  15. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/hla.py +0 -0
  16. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/ihc.py +0 -0
  17. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/interpretation.py +0 -0
  18. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/json.py +0 -0
  19. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/metadata.py +0 -0
  20. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/specimen_details.py +0 -0
  21. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/structural.py +0 -0
  22. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/tar.py +0 -0
  23. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/tests.py +0 -0
  24. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/tmb.py +0 -0
  25. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/tsv.py +0 -0
  26. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/caris/util/vcf.py +0 -0
  27. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/foundation/__init__.py +0 -0
  28. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/foundation/util/__init__.py +0 -0
  29. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/foundation/util/cnv.py +0 -0
  30. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/foundation/util/fnv.py +0 -0
  31. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/foundation/util/interpretation.py +0 -0
  32. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/foundation/util/vcf_etl.py +0 -0
  33. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/generic/__init__.py +0 -0
  34. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/generic/process.py +0 -0
  35. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/generic/utils.py +0 -0
  36. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/__init__.py +0 -0
  37. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/process.py +0 -0
  38. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/util/alteration_table.py +0 -0
  39. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/util/interpretation.py +0 -0
  40. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/util/manifest_helpers.py +0 -0
  41. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/util/nextgen_specific_genes.py +0 -0
  42. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/util/pre_filter_somatic_vcf.py +0 -0
  43. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/util/process_cnv.py +0 -0
  44. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/util/process_manifest.py +0 -0
  45. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/util/process_structural.py +0 -0
  46. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/util/process_vcf.py +0 -0
  47. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/nextgen/util/types.py +0 -0
  48. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/resources/GRCh37_map.csv.gz +0 -0
  49. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/resources/GRCh38_map.csv.gz +0 -0
  50. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/shared_util/__init__.py +0 -0
  51. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/shared_util/coords_to_genes.py +0 -0
  52. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/shared_util/gene_to_coords.py +0 -0
  53. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/shared_util/open_maybe_gzipped.py +0 -0
  54. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/shared_util/types.py +0 -0
  55. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/vcf_standardization/Variant.py +0 -0
  56. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/vcf_standardization/__init__.py +0 -0
  57. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/vcf_standardization/standardize.py +0 -0
  58. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/vcf_standardization/util/__init__.py +0 -0
  59. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/vcf_standardization/util/af_helpers.py +0 -0
  60. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/vcf_standardization/util/dp_helpers.py +0 -0
  61. {phc-ingestion-0.9.1 → phc-ingestion-0.9.3}/ingestion/vcf_standardization/util/read_write.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phc-ingestion
3
- Version: 0.9.1
3
+ Version: 0.9.3
4
4
  Summary: Functions for LifeOmic PHC genomic ingestions
5
5
  License: MIT
6
6
  Author-email: LifeOmic Development <development@lifeomic.com>
@@ -11,25 +11,7 @@ from ingestion.foundation.util.vcf_etl import vcf_etl
11
11
 
12
12
  def read_xml(xml_file: str) -> dict:
13
13
  with open(xml_file) as fd:
14
- return xmltodict.parse(fd.read())
15
-
16
-
17
- def get_specimen_name(results_payload_dict: dict) -> str:
18
- specimen_name = None
19
- if isinstance(results_payload_dict["variant-report"]["samples"]["sample"], list):
20
- found = list(
21
- filter(
22
- lambda x: x["@nucleic-acid-type"] == "DNA",
23
- results_payload_dict["variant-report"]["samples"]["sample"],
24
- )
25
- )
26
- if len(found) > 0:
27
- specimen_name = found[0]["@name"]
28
- else:
29
- specimen_name = results_payload_dict["variant-report"]["samples"]["sample"]["@name"]
30
- if not specimen_name:
31
- raise RuntimeError("Failed to find specimen name")
32
- return specimen_name # type: ignore
14
+ return xmltodict.parse(fd.read(), force_list=("reportProperty", "non-human", "Gene"))
33
15
 
34
16
 
35
17
  def process(
@@ -45,8 +27,6 @@ def process(
45
27
  customer_info_dict = xml_dict["rr:ResultsReport"]["rr:CustomerInformation"]
46
28
  results_payload_dict = xml_dict["rr:ResultsReport"]["rr:ResultsPayload"]
47
29
 
48
- sample_name = get_specimen_name(results_payload_dict)
49
-
50
30
  base_xml_name = Path(xml_file).stem
51
31
 
52
32
  vcf_name = f"{local_output_dir}/{base_xml_name}/{base_xml_name}.modified.vcf"
@@ -47,25 +47,15 @@ def get_ordering_md(pmi, npi) -> list:
47
47
  return ordering_md_info
48
48
 
49
49
 
50
- def get_non_human_content(nhc_dicts) -> list:
50
+ def get_non_human_content(nhc_dicts: list[dict]) -> list:
51
51
  nhc_final = []
52
52
 
53
- if isinstance(nhc_dicts, list):
54
- for sub_dict in nhc_dicts:
55
- nhc_final.append(
56
- {
57
- "nhcOrganism": sub_dict.get("@organism", "unknown"),
58
- "nhcReadsPerMil": float(sub_dict.get("@reads-per-million", 0.0)),
59
- "nhcStatus": sub_dict.get("@status", "unknown"),
60
- }
61
- )
62
-
63
- else:
53
+ for sub_dict in nhc_dicts:
64
54
  nhc_final.append(
65
55
  {
66
- "nhcOrganism": nhc_dicts.get("@organism", "unknown"),
67
- "nhcReadsPerMil": float(nhc_dicts.get("@reads-per-million", 0.0)),
68
- "nhcStatus": nhc_dicts.get("@status", "unknown"),
56
+ "nhcOrganism": sub_dict.get("@organism", "unknown"),
57
+ "nhcReadsPerMil": float(sub_dict.get("@reads-per-million", 0.0)),
58
+ "nhcStatus": sub_dict.get("@status", "unknown"),
69
59
  }
70
60
  )
71
61
 
@@ -90,6 +80,8 @@ def get_plasma_tumor_fraction(genes) -> Optional[str]:
90
80
  ptf_entry = next((entry for entry in entries if entry.get("Name", "") in ptf_entry_names), None)
91
81
  if ptf_entry:
92
82
  ptf_val = ptf_entry.get("Alterations", {}).get("Alteration", {}).get("Name", "")
83
+ if ptf_val in ["> 0%", ">0%"]:
84
+ return "Elevated"
93
85
  try:
94
86
  # See https://github.com/lifeomic/phc-ingestion/pull/129
95
87
  float_val = float(ptf_val.strip("%"))
@@ -150,11 +142,11 @@ def get_test_yml(
150
142
  biomarkers = variant_report.get("biomarkers", {})
151
143
  non_human_content = variant_report.get("non-human-content", {})
152
144
  report_properties = results_payload_dict.get("FinalReport", {}).get("reportProperties", {})
153
- report_property = report_properties.get("reportProperty", [])
154
- genes = results_payload_dict.get("FinalReport", {}).get("Genes", {})
145
+ properties = report_properties.get("reportProperty", [])
146
+ # `Genes` may exist as a key but have a value of None, so we need to ensure it defaults to an empty dict
147
+ genes = results_payload_dict.get("FinalReport", {}).get("Genes", {}) or {}
155
148
  plasma_tumor_fraction = get_plasma_tumor_fraction(genes)
156
149
  hrd_status = get_hrd_status(genes)
157
- properties = report_property if isinstance(report_property, list) else [report_property]
158
150
  gw_loh = next(
159
151
  (prop for prop in properties if prop.get("@key") == "LossOfHeterozygosityScore"),
160
152
  None,
@@ -283,8 +275,7 @@ def get_test_yml(
283
275
  if gw_loh:
284
276
  values = {"loh-high": "high", "loh-low": "low"}
285
277
  alterations = []
286
- gene = genes.get("Gene", [])
287
- genes_list = gene if isinstance(gene, list) else [gene]
278
+ genes_list = genes.get("Gene", [])
288
279
  for gene in genes_list:
289
280
  alt = gene.get("Alterations", {}).get("Alteration", [])
290
281
  alterations.extend(alt) if isinstance(alt, list) else alterations.append(alt)
@@ -317,7 +308,7 @@ def get_test_yml(
317
308
 
318
309
  # adding non-human content fields
319
310
  if non_human_content != None:
320
- yaml_file["nonHumanContent"] = get_non_human_content(non_human_content.get("non-human", ""))
311
+ yaml_file["nonHumanContent"] = get_non_human_content(non_human_content.get("non-human", []))
321
312
 
322
313
  if report_file:
323
314
  copyfile(report_file, f"{local_output_dir}/{base_xml_name}/{base_xml_name}.report.pdf")
@@ -0,0 +1 @@
1
+ from ingestion.nebula.process import process
@@ -0,0 +1,46 @@
1
+ import os
2
+ from pathlib import Path
3
+ from ingestion.vcf_standardization.standardize import standardize_vcf
4
+ from lifeomic_logging import scoped_logger
5
+
6
+
7
+ def process(vcf_file: str, source_file_id: str, out_path: str, case_id: str) -> dict:
8
+ with scoped_logger(__name__) as log:
9
+ # TODO: do we need to take in + process the manifest file here?
10
+
11
+ # Process VCF
12
+ base_vcf_file = os.path.basename(vcf_file)
13
+ vcf_out = base_vcf_file.replace(".vcf", ".modified.vcf")
14
+ vcf_final = base_vcf_file.replace(".vcf", ".modified.nrm.filtered.vcf")
15
+ if not vcf_final.endswith(".gz"):
16
+ vcf_final = vcf_final + ".gz"
17
+
18
+ # Assuming Nebula VCFs are germline
19
+ sample_name = f"germline_{case_id}"
20
+ vcf_line_count = standardize_vcf(
21
+ vcf_file, vcf_out, out_path, sample_name, log, compression=True
22
+ )
23
+
24
+ # Create a basic manifest for the Nebula VCF
25
+ manifest = {
26
+ "testType": "Nebula",
27
+ "sourceFileId": source_file_id,
28
+ "reference": "GRCh38", # Assuming GRCh38, adjust as needed
29
+ "resources": [{"fileName": f".lifeomic/nebula/{case_id}/{base_vcf_file}"}],
30
+ "files": [
31
+ {
32
+ "fileName": f".lifeomic/nebula/{case_id}/{vcf_final}",
33
+ "sequenceType": "germline",
34
+ "type": "shortVariant",
35
+ }
36
+ ],
37
+ }
38
+
39
+ case_metadata = {
40
+ "test_type": "Nebula",
41
+ "vcf_line_count": vcf_line_count,
42
+ "case_id": case_id,
43
+ "germline_genome_reference": manifest["reference"],
44
+ }
45
+
46
+ return case_metadata, manifest
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "phc-ingestion"
3
- version = "0.9.1"
3
+ version = "0.9.3"
4
4
  description = "Functions for LifeOmic PHC genomic ingestions"
5
5
  authors = [
6
6
  { name = "LifeOmic Development", email = "development@lifeomic.com" },
File without changes