nci-cidc-api-modules 1.0.0rc0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -273,16 +273,6 @@ details_dict = {
273
273
  "alignment: index file for deduplicated bam",
274
274
  "Bam index file for deduplicated bam file generated by the Sentieon Dedup tool (https://support.sentieon.com/manual/usages/general/#dedup-algorithm)",
275
275
  ),
276
- "/wes/analysis/tumor/haplotyper_targets.vcf.gz": FileDetails(
277
- "analysis",
278
- "germline: vcf of haplotype variants in targeted regions",
279
- "Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
280
- ),
281
- "/wes/analysis/tumor/haplotyper_output.vcf": FileDetails(
282
- "analysis",
283
- "germline: germline variants",
284
- "Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
285
- ),
286
276
  "/wes/analysis/normal/haplotyper_targets.vcf.gz": FileDetails(
287
277
  "analysis",
288
278
  "germline: vcf of haplotype variants in targeted regions",
@@ -447,16 +437,6 @@ details_dict = {
447
437
  "germline: germline variants",
448
438
  "Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
449
439
  ),
450
- "/wes/analysis/tumor/haplotyper_targets.vcf.gz": FileDetails(
451
- "analysis",
452
- "germline: vcf of haplotype variants in targeted regions",
453
- "Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
454
- ),
455
- "/wes/analysis/tumor/haplotyper_output.vcf": FileDetails(
456
- "analysis",
457
- "germline: germline variants",
458
- "Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
459
- ),
460
440
  "/wes_tumor_only/analysis/tumor/coverage_metrics.txt": FileDetails(
461
441
  "analysis",
462
442
  "plain-text genome-wide coverage file from tumor sample, from Sentieon's CoverageMetrics",
@@ -794,6 +774,16 @@ details_dict = {
794
774
  "stained image file that is the result of an H&E experiment",
795
775
  "An image file stained with hematoxylin and eosin, generated from an H&E experiment.",
796
776
  ),
777
+ "/hande/annotated_image.svs": FileDetails(
778
+ "source",
779
+ "stained and annotated image file that is the result of an H&E experiment",
780
+ "An SVS image file stained and annotated with hematoxylin and eosin, generated from an H&E experiment.",
781
+ ),
782
+ "/hande/annotated_image.": FileDetails(
783
+ "source",
784
+ "stained and annotated image file that is the result of an H&E experiment",
785
+ "An annotated image file stained with hematoxylin and eosin, generated from an H&E experiment.",
786
+ ),
797
787
  # ELISA
798
788
  "/elisa/assay.xlsx": FileDetails(
799
789
  "source",
@@ -801,17 +791,17 @@ details_dict = {
801
791
  "An XML-based Excel file that contains the results of a single run in arbitrary units. Each row is a sample, though not all have CIMAC IDs, and each column is an antigen.",
802
792
  ),
803
793
  # CyTOF analysis
804
- f"/cytof_analysis/cell_counts_assignment.csv": FileDetails(
794
+ "/cytof_analysis/cell_counts_assignment.csv": FileDetails(
805
795
  "miscellaneous",
806
796
  "comma-separated two-column table with cell counts for each assigned cell type",
807
797
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the called cell type, and 'N', the number of cells of that type seen in the sample.",
808
798
  ),
809
- f"/cytof_analysis/cell_counts_compartment.csv": FileDetails(
799
+ "/cytof_analysis/cell_counts_compartment.csv": FileDetails(
810
800
  "miscellaneous",
811
801
  "comma-separated two-column table with cell counts for each broad compartment assigned",
812
802
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that compartment seen in the sample.",
813
803
  ),
814
- f"/cytof_analysis/cell_counts_profiling.csv": FileDetails(
804
+ "/cytof_analysis/cell_counts_profiling.csv": FileDetails(
815
805
  "miscellaneous",
816
806
  "comma-separated two-column table with cell counts for each profiled subset of assigned cell types",
817
807
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
@@ -821,62 +811,62 @@ details_dict = {
821
811
  "comma-separated two-column table with cell counts for each assigned cell type",
822
812
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the called cell type, and 'N', the number of cells of that type seen in the sample.",
823
813
  ),
824
- f"csv|cell counts compartment": FileDetails(
814
+ "csv|cell counts compartment": FileDetails(
825
815
  "analysis",
826
816
  "comma-separated two-column table with cell counts for each broad compartment assigned",
827
817
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that compartment seen in the sample.",
828
818
  ),
829
- f"csv|cell counts profiling": FileDetails(
819
+ "csv|cell counts profiling": FileDetails(
830
820
  "analysis",
831
821
  "comma-separated two-column table with cell counts for each profiled subset of all assigned cell types",
832
822
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the profiled subset of the assigned cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
833
823
  ),
834
- f"/cytof_analysis/analysis.zip": FileDetails("analysis", "", ""),
835
- f"/cytof_analysis/assignment.csv": FileDetails(
824
+ "/cytof_analysis/analysis.zip": FileDetails("analysis", "", ""),
825
+ "/cytof_analysis/assignment.csv": FileDetails(
836
826
  "miscellaneous",
837
827
  "comma-separated table of marker expression for each assigned cell type",
838
828
  "A plain-text, comma-separated table with a column for each assigned cell type, where rows are the signal on each channel for every cell type assigned.",
839
829
  ),
840
- f"/cytof_analysis/compartment.csv": FileDetails(
830
+ "/cytof_analysis/compartment.csv": FileDetails(
841
831
  "miscellaneous",
842
832
  "comma-separated table of marker expression for each broad compartment assigned",
843
833
  "A plain-text, comma-separated table with a column for each broad compartment of the called cell types, where rows are the signal on each channel for every compartment.",
844
834
  ),
845
- f"/cytof_analysis/control_files_analysis.zip": FileDetails("analysis", "", ""),
846
- f"/cytof_analysis/profiling.csv": FileDetails(
835
+ "/cytof_analysis/control_files_analysis.zip": FileDetails("analysis", "", ""),
836
+ "/cytof_analysis/profiling.csv": FileDetails(
847
837
  "miscellaneous",
848
838
  "comma-separated two-column table with cell counts for each profiled subset of all assigned cell types",
849
839
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the profiled subset of the assigned cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
850
840
  ),
851
- f"/cytof_analysis/reports.zip": FileDetails("analysis", "", ""),
852
- f"/cytof_analysis/source.fcs": FileDetails(
841
+ "/cytof_analysis/reports.zip": FileDetails("analysis", "", ""),
842
+ "/cytof_analysis/source.fcs": FileDetails(
853
843
  "source",
854
844
  "fcs data used as the input for this analysis",
855
845
  "The analysis-ready FCS file used as the input for this analysis. After normalization, debarcoding, and removal of Veri-Cells and other non-specimen cells.",
856
846
  ),
857
847
  # CyTOF assay
858
- f"/cytof/spike_in.fcs": FileDetails(
848
+ "/cytof/spike_in.fcs": FileDetails(
859
849
  "source",
860
850
  "normalized and debarcoded fcs data for a blank spike-in sample",
861
851
  "The FCS file that captures pure spike-in for use as a control, after normalization and debarcoding.",
862
852
  ),
863
- f"/cytof/controls/spike_in.fcs": FileDetails(
853
+ "/cytof/controls/spike_in.fcs": FileDetails(
864
854
  "source",
865
855
  "normalized and debarcoded fcs data for a blank spike-in sample",
866
856
  "The FCS file that captures pure spike-in for use as a control, after normalization and debarcoding.",
867
857
  ),
868
- f"/cytof/controls/processed.fcs": FileDetails("source", "", ""),
869
- f"/cytof/source_.fcs": FileDetails(
858
+ "/cytof/controls/processed.fcs": FileDetails("source", "", ""),
859
+ "/cytof/source_.fcs": FileDetails(
870
860
  "source",
871
861
  "raw fcs data as generated by the machine, without normalization, debarcoding, or cleaning",
872
862
  "The raw FCS file as generated by the machine, without any normalization, debarcoding, cleaning, etc.",
873
863
  ),
874
- f"/cytof/debarcoding_key.csv": FileDetails(
864
+ "/cytof/debarcoding_key.csv": FileDetails(
875
865
  "source",
876
866
  "",
877
867
  "",
878
868
  ),
879
- f"/cytof/processed.fcs": FileDetails(
869
+ "/cytof/processed.fcs": FileDetails(
880
870
  "source",
881
871
  "fully processed fcs data: normalized, debarcoded, no Veri-Cells, cleaned",
882
872
  "The analysis-ready FCS file after normalization, debarcoding, and removal of Veri-Cells and other non-specimen cells.",
@@ -416,7 +416,13 @@ assay_facets: Facets = {
416
416
  },
417
417
  "H&E": {
418
418
  "Images": FacetConfig(
419
- ["/hande/image_file.svs", "/hande/image_file."], "Stained image file."
419
+ [
420
+ "/hande/image_file.svs",
421
+ "/hande/image_file.",
422
+ "/hande/annotated_image.svs",
423
+ "/hande/annotated_image.",
424
+ ],
425
+ "Stained image file.",
420
426
  )
421
427
  },
422
428
  "TCR": {
@@ -553,23 +559,23 @@ facets_dict: Dict[str, Facets] = {
553
559
  "Analysis Ready": analysis_ready_facets,
554
560
  }
555
561
 
556
-
557
562
  FACET_NAME_DELIM = "|"
558
563
 
559
564
 
560
565
  def _build_facet_groups_to_names():
561
566
  """Map facet_groups to human-readable data categories."""
562
- path_to_name = lambda path: FACET_NAME_DELIM.join(path)
563
567
 
564
568
  facet_names = {}
565
569
  for facet_name, subfacet in facets_dict["Assay Type"].items():
566
570
  for subfacet_name, subsubfacet in subfacet.items():
567
571
  for facet_group in subsubfacet.facet_groups:
568
- facet_names[facet_group] = path_to_name([facet_name, subfacet_name])
572
+ facet_names[facet_group] = FACET_NAME_DELIM.join(
573
+ [facet_name, subfacet_name]
574
+ )
569
575
 
570
576
  for facet_name, subfacet in facets_dict["Clinical Type"].items():
571
577
  for facet_group in subfacet.facet_groups:
572
- facet_names[facet_group] = path_to_name([facet_name])
578
+ facet_names[facet_group] = FACET_NAME_DELIM.join([facet_name])
573
579
 
574
580
  # Note on why we don't use "Analysis Ready": any facet group included in the
575
581
  # "Analysis Ready" facet type will also have an entry in "Assay Type".
@@ -594,26 +600,37 @@ def build_data_category_facets(facet_group_file_counts: Dict[str, int]):
594
600
  }
595
601
  ```
596
602
  """
597
- extract_facet_info = lambda facet_config_entries, prefix: [
598
- {
599
- "label": label,
600
- "description": config.description,
601
- "count": sum(
603
+
604
+ def extract_facet_info(facet_config_entries, _prefix):
605
+ results = []
606
+ for label, config in facet_config_entries.items():
607
+ count = sum(
602
608
  facet_group_file_counts.get(facet_group, 0)
603
609
  for facet_group in config.facet_groups
604
- ),
605
- }
606
- for label, config in facet_config_entries.items()
607
- ]
608
-
609
- return {
610
- "Assay Type": {
611
- assay_name: extract_facet_info(subfacets, assay_name)
612
- for assay_name, subfacets in assay_facets.items()
613
- },
614
- "Clinical Type": extract_facet_info(clinical_facets, None),
615
- "Analysis Ready": extract_facet_info(analysis_ready_facets, None),
616
- }
610
+ )
611
+ if count:
612
+ results.append(
613
+ {"label": label, "description": config.description, "count": count}
614
+ )
615
+ return results
616
+
617
+ assay_types = {}
618
+ for assay_name, subfacets in assay_facets.items():
619
+ assay_names = extract_facet_info(subfacets, assay_name)
620
+ if assay_names:
621
+ assay_types[assay_name] = assay_names
622
+
623
+ results = {}
624
+ if assay_types:
625
+ results["Assay Type"] = assay_types
626
+ clinical_types = extract_facet_info(clinical_facets, None)
627
+ if clinical_types:
628
+ results["Clinical Type"] = clinical_types
629
+ analysis_ready = extract_facet_info(analysis_ready_facets, None)
630
+ if analysis_ready:
631
+ results["Analysis Ready"] = analysis_ready
632
+
633
+ return results
617
634
 
618
635
 
619
636
  def build_trial_facets(trial_file_counts: Dict[str, int]):
@@ -636,7 +653,7 @@ def get_facet_groups_for_paths(paths: List[List[str]]) -> List[str]:
636
653
  facet_config = facet_config[key]
637
654
  assert isinstance(facet_config, FacetConfig)
638
655
  except Exception as e:
639
- raise BadRequest(f"no facet for path {path}")
656
+ raise BadRequest(f"no facet for path {path}") from e
640
657
  facet_groups.extend(facet_config.facet_groups)
641
658
 
642
659
  return facet_groups
@@ -1,4 +1,5 @@
1
- import os, traceback
1
+ import os
2
+ import traceback
2
3
  from contextlib import contextmanager
3
4
  from functools import partial
4
5
  from typing import Callable, List, NamedTuple
@@ -83,8 +84,8 @@ def migration_session():
83
84
  print("Running GCS rollback...")
84
85
  task_queue.rollback()
85
86
  print("GCS rollback succeeded.")
86
- except Exception as e:
87
- print(f"GCS rollback failed: {e.__class__}\n{e}")
87
+ except Exception as e_inner:
88
+ print(f"GCS rollback failed: {e_inner.__class__}\n{e_inner}")
88
89
  raise
89
90
  finally:
90
91
  session.close()
@@ -196,7 +197,7 @@ def _run_metadata_migration(
196
197
  )[1]
197
198
 
198
199
  # If the GCS URI has changed, rename the blob
199
- # makes call to bucket.rename_blob
200
+ # makes call to bucket.rename_blob
200
201
  new_gcs_uri = artifact["object_url"]
201
202
  if old_gcs_uri != new_gcs_uri:
202
203
  print(
@@ -204,10 +205,16 @@ def _run_metadata_migration(
204
205
  )
205
206
  renamer = PieceOfWork(
206
207
  partial(
207
- rename_gcs_blob, GOOGLE_ACL_DATA_BUCKET, old_gcs_uri, new_gcs_uri
208
+ rename_gcs_blob,
209
+ GOOGLE_ACL_DATA_BUCKET,
210
+ old_gcs_uri,
211
+ new_gcs_uri,
208
212
  ),
209
213
  partial(
210
- rename_gcs_blob, GOOGLE_ACL_DATA_BUCKET, new_gcs_uri, old_gcs_uri
214
+ rename_gcs_blob,
215
+ GOOGLE_ACL_DATA_BUCKET,
216
+ new_gcs_uri,
217
+ old_gcs_uri,
211
218
  ),
212
219
  )
213
220
  gcs_tasks.schedule(renamer)
@@ -280,9 +287,9 @@ def _run_metadata_migration(
280
287
  flag_modified(upload, "metadata_patch")
281
288
 
282
289
  # Attempt to make GCS updates
283
- print(f"Running all GCS tasks...")
290
+ print("Running all GCS tasks...")
284
291
  gcs_tasks.run_all()
285
- print(f"GCS tasks succeeded.")
292
+ print("GCS tasks succeeded.")
286
293
 
287
294
 
288
295
  dont_run = os.environ.get("TESTING") or os.environ.get("ENV") == "dev"
@@ -294,7 +301,7 @@ def rename_gcs_blob(bucket, old_name, new_name):
294
301
  message = f"GCS: moving {full_old_uri} to {full_new_uri}"
295
302
  if dont_run:
296
303
  print(f"SKIPPING: {message}")
297
- return
304
+ return None
298
305
 
299
306
  print(message)
300
307