nci-cidc-api-modules 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -273,16 +273,6 @@ details_dict = {
273
273
  "alignment: index file for deduplicated bam",
274
274
  "Bam index file for deduplicated bam file generated by the Sentieon Dedup tool (https://support.sentieon.com/manual/usages/general/#dedup-algorithm)",
275
275
  ),
276
- "/wes/analysis/tumor/haplotyper_targets.vcf.gz": FileDetails(
277
- "analysis",
278
- "germline: vcf of haplotype variants in targeted regions",
279
- "Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
280
- ),
281
- "/wes/analysis/tumor/haplotyper_output.vcf": FileDetails(
282
- "analysis",
283
- "germline: germline variants",
284
- "Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
285
- ),
286
276
  "/wes/analysis/normal/haplotyper_targets.vcf.gz": FileDetails(
287
277
  "analysis",
288
278
  "germline: vcf of haplotype variants in targeted regions",
@@ -447,16 +437,6 @@ details_dict = {
447
437
  "germline: germline variants",
448
438
  "Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
449
439
  ),
450
- "/wes/analysis/tumor/haplotyper_targets.vcf.gz": FileDetails(
451
- "analysis",
452
- "germline: vcf of haplotype variants in targeted regions",
453
- "Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
454
- ),
455
- "/wes/analysis/tumor/haplotyper_output.vcf": FileDetails(
456
- "analysis",
457
- "germline: germline variants",
458
- "Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
459
- ),
460
440
  "/wes_tumor_only/analysis/tumor/coverage_metrics.txt": FileDetails(
461
441
  "analysis",
462
442
  "plain-text genome-wide coverage file from tumor sample, from Sentieon's CoverageMetrics",
@@ -811,17 +791,17 @@ details_dict = {
811
791
  "An XML-based Excel file that contains the results of a single run in arbitrary units. Each row is a sample, though not all have CIMAC IDs, and each column is an antigen.",
812
792
  ),
813
793
  # CyTOF analysis
814
- f"/cytof_analysis/cell_counts_assignment.csv": FileDetails(
794
+ "/cytof_analysis/cell_counts_assignment.csv": FileDetails(
815
795
  "miscellaneous",
816
796
  "comma-separated two-column table with cell counts for each assigned cell type",
817
797
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the called cell type, and 'N', the number of cells of that type seen in the sample.",
818
798
  ),
819
- f"/cytof_analysis/cell_counts_compartment.csv": FileDetails(
799
+ "/cytof_analysis/cell_counts_compartment.csv": FileDetails(
820
800
  "miscellaneous",
821
801
  "comma-separated two-column table with cell counts for each broad compartment assigned",
822
802
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that compartment seen in the sample.",
823
803
  ),
824
- f"/cytof_analysis/cell_counts_profiling.csv": FileDetails(
804
+ "/cytof_analysis/cell_counts_profiling.csv": FileDetails(
825
805
  "miscellaneous",
826
806
  "comma-separated two-column table with cell counts for each profiled subset of assigned cell types",
827
807
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
@@ -831,62 +811,62 @@ details_dict = {
831
811
  "comma-separated two-column table with cell counts for each assigned cell type",
832
812
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the called cell type, and 'N', the number of cells of that type seen in the sample.",
833
813
  ),
834
- f"csv|cell counts compartment": FileDetails(
814
+ "csv|cell counts compartment": FileDetails(
835
815
  "analysis",
836
816
  "comma-separated two-column table with cell counts for each broad compartment assigned",
837
817
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that compartment seen in the sample.",
838
818
  ),
839
- f"csv|cell counts profiling": FileDetails(
819
+ "csv|cell counts profiling": FileDetails(
840
820
  "analysis",
841
821
  "comma-separated two-column table with cell counts for each profiled subset of all assigned cell types",
842
822
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the profiled subset of the assigned cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
843
823
  ),
844
- f"/cytof_analysis/analysis.zip": FileDetails("analysis", "", ""),
845
- f"/cytof_analysis/assignment.csv": FileDetails(
824
+ "/cytof_analysis/analysis.zip": FileDetails("analysis", "", ""),
825
+ "/cytof_analysis/assignment.csv": FileDetails(
846
826
  "miscellaneous",
847
827
  "comma-separated table of marker expression for each assigned cell type",
848
828
  "A plain-text, comma-separated table with a column for each assigned cell type, where rows are the signal on each channel for every cell type assigned.",
849
829
  ),
850
- f"/cytof_analysis/compartment.csv": FileDetails(
830
+ "/cytof_analysis/compartment.csv": FileDetails(
851
831
  "miscellaneous",
852
832
  "comma-separated table of marker expression for each broad compartment assigned",
853
833
  "A plain-text, comma-separated table with a column for each broad compartment of the called cell types, where rows are the signal on each channel for every compartment.",
854
834
  ),
855
- f"/cytof_analysis/control_files_analysis.zip": FileDetails("analysis", "", ""),
856
- f"/cytof_analysis/profiling.csv": FileDetails(
835
+ "/cytof_analysis/control_files_analysis.zip": FileDetails("analysis", "", ""),
836
+ "/cytof_analysis/profiling.csv": FileDetails(
857
837
  "miscellaneous",
858
838
  "comma-separated two-column table with cell counts for each profiled subset of all assigned cell types",
859
839
  "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the profiled subset of the assigned cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
860
840
  ),
861
- f"/cytof_analysis/reports.zip": FileDetails("analysis", "", ""),
862
- f"/cytof_analysis/source.fcs": FileDetails(
841
+ "/cytof_analysis/reports.zip": FileDetails("analysis", "", ""),
842
+ "/cytof_analysis/source.fcs": FileDetails(
863
843
  "source",
864
844
  "fcs data used as the input for this analysis",
865
845
  "The analysis-ready FCS file used as the input for this analysis. After normalization, debarcoding, and removal of Veri-Cells and other non-specimen cells.",
866
846
  ),
867
847
  # CyTOF assay
868
- f"/cytof/spike_in.fcs": FileDetails(
848
+ "/cytof/spike_in.fcs": FileDetails(
869
849
  "source",
870
850
  "normalized and debarcoded fcs data for a blank spike-in sample",
871
851
  "The FCS file that captures pure spike-in for use as a control, after normalization and debarcoding.",
872
852
  ),
873
- f"/cytof/controls/spike_in.fcs": FileDetails(
853
+ "/cytof/controls/spike_in.fcs": FileDetails(
874
854
  "source",
875
855
  "normalized and debarcoded fcs data for a blank spike-in sample",
876
856
  "The FCS file that captures pure spike-in for use as a control, after normalization and debarcoding.",
877
857
  ),
878
- f"/cytof/controls/processed.fcs": FileDetails("source", "", ""),
879
- f"/cytof/source_.fcs": FileDetails(
858
+ "/cytof/controls/processed.fcs": FileDetails("source", "", ""),
859
+ "/cytof/source_.fcs": FileDetails(
880
860
  "source",
881
861
  "raw fcs data as generated by the machine, without normalization, debarcoding, or cleaning",
882
862
  "The raw FCS file as generated by the machine, without any normalization, debarcoding, cleaning, etc.",
883
863
  ),
884
- f"/cytof/debarcoding_key.csv": FileDetails(
864
+ "/cytof/debarcoding_key.csv": FileDetails(
885
865
  "source",
886
866
  "",
887
867
  "",
888
868
  ),
889
- f"/cytof/processed.fcs": FileDetails(
869
+ "/cytof/processed.fcs": FileDetails(
890
870
  "source",
891
871
  "fully processed fcs data: normalized, debarcoded, no Veri-Cells, cleaned",
892
872
  "The analysis-ready FCS file after normalization, debarcoding, and removal of Veri-Cells and other non-specimen cells.",
@@ -559,23 +559,23 @@ facets_dict: Dict[str, Facets] = {
559
559
  "Analysis Ready": analysis_ready_facets,
560
560
  }
561
561
 
562
-
563
562
  FACET_NAME_DELIM = "|"
564
563
 
565
564
 
566
565
  def _build_facet_groups_to_names():
567
566
  """Map facet_groups to human-readable data categories."""
568
- path_to_name = lambda path: FACET_NAME_DELIM.join(path)
569
567
 
570
568
  facet_names = {}
571
569
  for facet_name, subfacet in facets_dict["Assay Type"].items():
572
570
  for subfacet_name, subsubfacet in subfacet.items():
573
571
  for facet_group in subsubfacet.facet_groups:
574
- facet_names[facet_group] = path_to_name([facet_name, subfacet_name])
572
+ facet_names[facet_group] = FACET_NAME_DELIM.join(
573
+ [facet_name, subfacet_name]
574
+ )
575
575
 
576
576
  for facet_name, subfacet in facets_dict["Clinical Type"].items():
577
577
  for facet_group in subfacet.facet_groups:
578
- facet_names[facet_group] = path_to_name([facet_name])
578
+ facet_names[facet_group] = FACET_NAME_DELIM.join([facet_name])
579
579
 
580
580
  # Note on why we don't use "Analysis Ready": any facet group included in the
581
581
  # "Analysis Ready" facet type will also have an entry in "Assay Type".
@@ -601,7 +601,7 @@ def build_data_category_facets(facet_group_file_counts: Dict[str, int]):
601
601
  ```
602
602
  """
603
603
 
604
- def extract_facet_info(facet_config_entries, prefix):
604
+ def extract_facet_info(facet_config_entries, _prefix):
605
605
  results = []
606
606
  for label, config in facet_config_entries.items():
607
607
  count = sum(
@@ -653,7 +653,7 @@ def get_facet_groups_for_paths(paths: List[List[str]]) -> List[str]:
653
653
  facet_config = facet_config[key]
654
654
  assert isinstance(facet_config, FacetConfig)
655
655
  except Exception as e:
656
- raise BadRequest(f"no facet for path {path}")
656
+ raise BadRequest(f"no facet for path {path}") from e
657
657
  facet_groups.extend(facet_config.facet_groups)
658
658
 
659
659
  return facet_groups
@@ -1,4 +1,5 @@
1
- import os, traceback
1
+ import os
2
+ import traceback
2
3
  from contextlib import contextmanager
3
4
  from functools import partial
4
5
  from typing import Callable, List, NamedTuple
@@ -83,8 +84,8 @@ def migration_session():
83
84
  print("Running GCS rollback...")
84
85
  task_queue.rollback()
85
86
  print("GCS rollback succeeded.")
86
- except Exception as e:
87
- print(f"GCS rollback failed: {e.__class__}\n{e}")
87
+ except Exception as e_inner:
88
+ print(f"GCS rollback failed: {e_inner.__class__}\n{e_inner}")
88
89
  raise
89
90
  finally:
90
91
  session.close()
@@ -196,7 +197,7 @@ def _run_metadata_migration(
196
197
  )[1]
197
198
 
198
199
  # If the GCS URI has changed, rename the blob
199
- # makes call to bucket.rename_blob
200
+ # makes call to bucket.rename_blob
200
201
  new_gcs_uri = artifact["object_url"]
201
202
  if old_gcs_uri != new_gcs_uri:
202
203
  print(
@@ -204,10 +205,16 @@ def _run_metadata_migration(
204
205
  )
205
206
  renamer = PieceOfWork(
206
207
  partial(
207
- rename_gcs_blob, GOOGLE_ACL_DATA_BUCKET, old_gcs_uri, new_gcs_uri
208
+ rename_gcs_blob,
209
+ GOOGLE_ACL_DATA_BUCKET,
210
+ old_gcs_uri,
211
+ new_gcs_uri,
208
212
  ),
209
213
  partial(
210
- rename_gcs_blob, GOOGLE_ACL_DATA_BUCKET, new_gcs_uri, old_gcs_uri
214
+ rename_gcs_blob,
215
+ GOOGLE_ACL_DATA_BUCKET,
216
+ new_gcs_uri,
217
+ old_gcs_uri,
211
218
  ),
212
219
  )
213
220
  gcs_tasks.schedule(renamer)
@@ -280,9 +287,9 @@ def _run_metadata_migration(
280
287
  flag_modified(upload, "metadata_patch")
281
288
 
282
289
  # Attempt to make GCS updates
283
- print(f"Running all GCS tasks...")
290
+ print("Running all GCS tasks...")
284
291
  gcs_tasks.run_all()
285
- print(f"GCS tasks succeeded.")
292
+ print("GCS tasks succeeded.")
286
293
 
287
294
 
288
295
  dont_run = os.environ.get("TESTING") or os.environ.get("ENV") == "dev"
@@ -294,7 +301,7 @@ def rename_gcs_blob(bucket, old_name, new_name):
294
301
  message = f"GCS: moving {full_old_uri} to {full_new_uri}"
295
302
  if dont_run:
296
303
  print(f"SKIPPING: {message}")
297
- return
304
+ return None
298
305
 
299
306
  print(message)
300
307