PyPI - nci-cidc-api-modules - Versions diffs - 1.0.0rc0__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

nci-cidc-api-modules 1.0.0rc0py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

cidc_api/config/db.py +1 -1
cidc_api/config/secrets.py +2 -2
cidc_api/config/settings.py +1 -2
cidc_api/csms/auth.py +14 -7
cidc_api/models/csms_api.py +101 -83
cidc_api/models/files/details.py +28 -38
cidc_api/models/files/facets.py +41 -24
cidc_api/models/migrations.py +16 -9
cidc_api/models/models.py +763 -195
cidc_api/shared/auth.py +18 -13
cidc_api/shared/gcloud_client.py +106 -61
cidc_api/shared/rest_utils.py +6 -5
{nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.2.dist-info}/METADATA +38 -10
nci_cidc_api_modules-1.0.2.dist-info/RECORD +25 -0
{nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.2.dist-info}/WHEEL +1 -1
nci_cidc_api_modules-1.0.0rc0.dist-info/RECORD +0 -25
{nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.2.dist-info}/LICENSE +0 -0
{nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.2.dist-info}/top_level.txt +0 -0

cidc_api/models/files/details.py CHANGED Viewed

@@ -273,16 +273,6 @@ details_dict = {
         "alignment: index file for deduplicated bam",
         "Bam index file for deduplicated bam file generated by the Sentieon Dedup tool (https://support.sentieon.com/manual/usages/general/#dedup-algorithm)",
     ),
-    "/wes/analysis/tumor/haplotyper_targets.vcf.gz": FileDetails(
-        "analysis",
-        "germline: vcf of haplotype variants in targeted regions",
-        "Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
-    ),
-    "/wes/analysis/tumor/haplotyper_output.vcf": FileDetails(
-        "analysis",
-        "germline: germline variants",
-        "Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
-    ),
     "/wes/analysis/normal/haplotyper_targets.vcf.gz": FileDetails(
         "analysis",
         "germline: vcf of haplotype variants in targeted regions",
@@ -447,16 +437,6 @@ details_dict = {
         "germline: germline variants",
         "Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
     ),
-    "/wes/analysis/tumor/haplotyper_targets.vcf.gz": FileDetails(
-        "analysis",
-        "germline: vcf of haplotype variants in targeted regions",
-        "Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
-    ),
-    "/wes/analysis/tumor/haplotyper_output.vcf": FileDetails(
-        "analysis",
-        "germline: germline variants",
-        "Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
-    ),
     "/wes_tumor_only/analysis/tumor/coverage_metrics.txt": FileDetails(
         "analysis",
         "plain-text genome-wide coverage file from tumor sample, from Sentieon's CoverageMetrics",
@@ -794,6 +774,16 @@ details_dict = {
         "stained image file that is the result of an H&E experiment",
         "An image file stained with hematoxylin and eosin, generated from an H&E experiment.",
     ),
+    "/hande/annotated_image.svs": FileDetails(
+        "source",
+        "stained and annotated image file that is the result of an H&E experiment",
+        "An SVS image file stained and annotated with hematoxylin and eosin, generated from an H&E experiment.",
+    ),
+    "/hande/annotated_image.": FileDetails(
+        "source",
+        "stained and annotated image file that is the result of an H&E experiment",
+        "An annotated image file stained with hematoxylin and eosin, generated from an H&E experiment.",
+    ),
     # ELISA
     "/elisa/assay.xlsx": FileDetails(
         "source",
@@ -801,17 +791,17 @@ details_dict = {
         "An XML-based Excel file that contains the results of a single run in arbitrary units. Each row is a sample, though not all have CIMAC IDs, and each column is an antigen.",
     ),
     # CyTOF analysis
-    f"/cytof_analysis/cell_counts_assignment.csv": FileDetails(
+    "/cytof_analysis/cell_counts_assignment.csv": FileDetails(
         "miscellaneous",
         "comma-separated two-column table with cell counts for each assigned cell type",
         "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the called cell type, and 'N', the number of cells of that type seen in the sample.",
     ),
-    f"/cytof_analysis/cell_counts_compartment.csv": FileDetails(
+    "/cytof_analysis/cell_counts_compartment.csv": FileDetails(
         "miscellaneous",
         "comma-separated two-column table with cell counts for each broad compartment assigned",
         "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that compartment seen in the sample.",
     ),
-    f"/cytof_analysis/cell_counts_profiling.csv": FileDetails(
+    "/cytof_analysis/cell_counts_profiling.csv": FileDetails(
         "miscellaneous",
         "comma-separated two-column table with cell counts for each profiled subset of assigned cell types",
         "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
@@ -821,62 +811,62 @@ details_dict = {
         "comma-separated two-column table with cell counts for each assigned cell type",
         "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the called cell type, and 'N', the number of cells of that type seen in the sample.",
     ),
-    f"csv|cell counts compartment": FileDetails(
+    "csv|cell counts compartment": FileDetails(
         "analysis",
         "comma-separated two-column table with cell counts for each broad compartment assigned",
         "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that compartment seen in the sample.",
     ),
-    f"csv|cell counts profiling": FileDetails(
+    "csv|cell counts profiling": FileDetails(
         "analysis",
         "comma-separated two-column table with cell counts for each profiled subset of all assigned cell types",
         "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the profiled subset of the assigned cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
     ),
-    f"/cytof_analysis/analysis.zip": FileDetails("analysis", "", ""),
-    f"/cytof_analysis/assignment.csv": FileDetails(
+    "/cytof_analysis/analysis.zip": FileDetails("analysis", "", ""),
+    "/cytof_analysis/assignment.csv": FileDetails(
         "miscellaneous",
         "comma-separated table of marker expression for each assigned cell type",
         "A plain-text, comma-separated table with a column for each assigned cell type, where rows are the signal on each channel for every cell type assigned.",
     ),
-    f"/cytof_analysis/compartment.csv": FileDetails(
+    "/cytof_analysis/compartment.csv": FileDetails(
         "miscellaneous",
         "comma-separated table of marker expression for each broad compartment assigned",
         "A plain-text, comma-separated table with a column for each broad compartment of the called cell types, where rows are the signal on each channel for every compartment.",
     ),
-    f"/cytof_analysis/control_files_analysis.zip": FileDetails("analysis", "", ""),
-    f"/cytof_analysis/profiling.csv": FileDetails(
+    "/cytof_analysis/control_files_analysis.zip": FileDetails("analysis", "", ""),
+    "/cytof_analysis/profiling.csv": FileDetails(
         "miscellaneous",
         "comma-separated two-column table with cell counts for each profiled subset of all assigned cell types",
         "A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the profiled subset of the assigned cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
     ),
-    f"/cytof_analysis/reports.zip": FileDetails("analysis", "", ""),
-    f"/cytof_analysis/source.fcs": FileDetails(
+    "/cytof_analysis/reports.zip": FileDetails("analysis", "", ""),
+    "/cytof_analysis/source.fcs": FileDetails(
         "source",
         "fcs data used as the input for this analysis",
         "The analysis-ready FCS file used as the input for this analysis. After normalization, debarcoding, and removal of Veri-Cells and other non-specimen cells.",
     ),
     # CyTOF assay
-    f"/cytof/spike_in.fcs": FileDetails(
+    "/cytof/spike_in.fcs": FileDetails(
         "source",
         "normalized and debarcoded fcs data for a blank spike-in sample",
         "The FCS file that captures pure spike-in for use as a control, after normalization and debarcoding.",
     ),
-    f"/cytof/controls/spike_in.fcs": FileDetails(
+    "/cytof/controls/spike_in.fcs": FileDetails(
         "source",
         "normalized and debarcoded fcs data for a blank spike-in sample",
         "The FCS file that captures pure spike-in for use as a control, after normalization and debarcoding.",
     ),
-    f"/cytof/controls/processed.fcs": FileDetails("source", "", ""),
-    f"/cytof/source_.fcs": FileDetails(
+    "/cytof/controls/processed.fcs": FileDetails("source", "", ""),
+    "/cytof/source_.fcs": FileDetails(
         "source",
         "raw fcs data as generated by the machine, without normalization, debarcoding, or cleaning",
         "The raw FCS file as generated by the machine, without any normalization, debarcoding, cleaning, etc.",
     ),
-    f"/cytof/debarcoding_key.csv": FileDetails(
+    "/cytof/debarcoding_key.csv": FileDetails(
         "source",
         "",
         "",
     ),
-    f"/cytof/processed.fcs": FileDetails(
+    "/cytof/processed.fcs": FileDetails(
         "source",
         "fully processed fcs data: normalized, debarcoded, no Veri-Cells, cleaned",
         "The analysis-ready FCS file after normalization, debarcoding, and removal of Veri-Cells and other non-specimen cells.",

cidc_api/models/files/facets.py CHANGED Viewed

@@ -416,7 +416,13 @@ assay_facets: Facets = {
     },
     "H&E": {
         "Images": FacetConfig(
-            ["/hande/image_file.svs", "/hande/image_file."], "Stained image file."
+            [
+                "/hande/image_file.svs",
+                "/hande/image_file.",
+                "/hande/annotated_image.svs",
+                "/hande/annotated_image.",
+            ],
+            "Stained image file.",
         )
     },
     "TCR": {
@@ -553,23 +559,23 @@ facets_dict: Dict[str, Facets] = {
     "Analysis Ready": analysis_ready_facets,
 }
 FACET_NAME_DELIM = "|"
 def _build_facet_groups_to_names():
     """Map facet_groups to human-readable data categories."""
-    path_to_name = lambda path: FACET_NAME_DELIM.join(path)
     facet_names = {}
     for facet_name, subfacet in facets_dict["Assay Type"].items():
         for subfacet_name, subsubfacet in subfacet.items():
             for facet_group in subsubfacet.facet_groups:
-                facet_names[facet_group] = path_to_name([facet_name, subfacet_name])
+                facet_names[facet_group] = FACET_NAME_DELIM.join(
+                    [facet_name, subfacet_name]
+                )
     for facet_name, subfacet in facets_dict["Clinical Type"].items():
         for facet_group in subfacet.facet_groups:
-            facet_names[facet_group] = path_to_name([facet_name])
+            facet_names[facet_group] = FACET_NAME_DELIM.join([facet_name])
     # Note on why we don't use "Analysis Ready": any facet group included in the
     # "Analysis Ready" facet type will also have an entry in "Assay Type".
@@ -594,26 +600,37 @@ def build_data_category_facets(facet_group_file_counts: Dict[str, int]):
     }
     ```
     """
-    extract_facet_info = lambda facet_config_entries, prefix: [
-        {
-            "label": label,
-            "description": config.description,
-            "count": sum(
+    def extract_facet_info(facet_config_entries, _prefix):
+        results = []
+        for label, config in facet_config_entries.items():
+            count = sum(
                 facet_group_file_counts.get(facet_group, 0)
                 for facet_group in config.facet_groups
-            ),
-        }
-        for label, config in facet_config_entries.items()
-    ]
-    return {
-        "Assay Type": {
-            assay_name: extract_facet_info(subfacets, assay_name)
-            for assay_name, subfacets in assay_facets.items()
-        },
-        "Clinical Type": extract_facet_info(clinical_facets, None),
-        "Analysis Ready": extract_facet_info(analysis_ready_facets, None),
-    }
+            )
+            if count:
+                results.append(
+                    {"label": label, "description": config.description, "count": count}
+                )
+        return results
+    assay_types = {}
+    for assay_name, subfacets in assay_facets.items():
+        assay_names = extract_facet_info(subfacets, assay_name)
+        if assay_names:
+            assay_types[assay_name] = assay_names
+    results = {}
+    if assay_types:
+        results["Assay Type"] = assay_types
+    clinical_types = extract_facet_info(clinical_facets, None)
+    if clinical_types:
+        results["Clinical Type"] = clinical_types
+    analysis_ready = extract_facet_info(analysis_ready_facets, None)
+    if analysis_ready:
+        results["Analysis Ready"] = analysis_ready
+    return results
 def build_trial_facets(trial_file_counts: Dict[str, int]):
@@ -636,7 +653,7 @@ def get_facet_groups_for_paths(paths: List[List[str]]) -> List[str]:
                 facet_config = facet_config[key]
             assert isinstance(facet_config, FacetConfig)
         except Exception as e:
-            raise BadRequest(f"no facet for path {path}")
+            raise BadRequest(f"no facet for path {path}") from e
         facet_groups.extend(facet_config.facet_groups)
     return facet_groups

cidc_api/models/migrations.py CHANGED Viewed

@@ -1,4 +1,5 @@
-import os, traceback
+import os
+import traceback
 from contextlib import contextmanager
 from functools import partial
 from typing import Callable, List, NamedTuple
@@ -83,8 +84,8 @@ def migration_session():
                 print("Running GCS rollback...")
                 task_queue.rollback()
                 print("GCS rollback succeeded.")
-            except Exception as e:
-                print(f"GCS rollback failed: {e.__class__}\n{e}")
+            except Exception as e_inner:
+                print(f"GCS rollback failed: {e_inner.__class__}\n{e_inner}")
         raise
     finally:
         session.close()
@@ -196,7 +197,7 @@ def _run_metadata_migration(
             )[1]
             # If the GCS URI has changed, rename the blob
-            # makes call to bucket.rename_blob
+            # makes call to bucket.rename_blob
             new_gcs_uri = artifact["object_url"]
             if old_gcs_uri != new_gcs_uri:
                 print(
@@ -204,10 +205,16 @@ def _run_metadata_migration(
                 )
                 renamer = PieceOfWork(
                     partial(
-                        rename_gcs_blob, GOOGLE_ACL_DATA_BUCKET, old_gcs_uri, new_gcs_uri
+                        rename_gcs_blob,
+                        GOOGLE_ACL_DATA_BUCKET,
+                        old_gcs_uri,
+                        new_gcs_uri,
                     ),
                     partial(
-                        rename_gcs_blob, GOOGLE_ACL_DATA_BUCKET, new_gcs_uri, old_gcs_uri
+                        rename_gcs_blob,
+                        GOOGLE_ACL_DATA_BUCKET,
+                        new_gcs_uri,
+                        old_gcs_uri,
                     ),
                 )
                 gcs_tasks.schedule(renamer)
@@ -280,9 +287,9 @@ def _run_metadata_migration(
         flag_modified(upload, "metadata_patch")
     # Attempt to make GCS updates
-    print(f"Running all GCS tasks...")
+    print("Running all GCS tasks...")
     gcs_tasks.run_all()
-    print(f"GCS tasks succeeded.")
+    print("GCS tasks succeeded.")
 dont_run = os.environ.get("TESTING") or os.environ.get("ENV") == "dev"
@@ -294,7 +301,7 @@ def rename_gcs_blob(bucket, old_name, new_name):
     message = f"GCS: moving {full_old_uri} to {full_new_uri}"
     if dont_run:
         print(f"SKIPPING: {message}")
-        return
+        return None
     print(message)

nci-cidc-api-modules 1.0.0rc0__py3-none-any.whl → 1.0.2__py3-none-any.whl

nci-cidc-api-modules 1.0.0rc0py3-none-any.whl → 1.0.2py3-none-any.whl