nci-cidc-api-modules 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cidc_api/config/db.py +1 -1
- cidc_api/config/secrets.py +2 -2
- cidc_api/config/settings.py +1 -2
- cidc_api/csms/auth.py +14 -7
- cidc_api/models/csms_api.py +101 -83
- cidc_api/models/files/details.py +18 -38
- cidc_api/models/files/facets.py +6 -6
- cidc_api/models/migrations.py +16 -9
- cidc_api/models/models.py +186 -158
- cidc_api/shared/auth.py +18 -13
- cidc_api/shared/gcloud_client.py +75 -73
- cidc_api/shared/rest_utils.py +6 -5
- {nci_cidc_api_modules-1.0.0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/METADATA +1 -1
- nci_cidc_api_modules-1.0.1.dist-info/RECORD +25 -0
- {nci_cidc_api_modules-1.0.0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/WHEEL +1 -1
- nci_cidc_api_modules-1.0.0.dist-info/RECORD +0 -25
- {nci_cidc_api_modules-1.0.0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/LICENSE +0 -0
- {nci_cidc_api_modules-1.0.0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/top_level.txt +0 -0
cidc_api/models/files/details.py
CHANGED
@@ -273,16 +273,6 @@ details_dict = {
|
|
273
273
|
"alignment: index file for deduplicated bam",
|
274
274
|
"Bam index file for deduplicated bam file generated by the Sentieon Dedup tool (https://support.sentieon.com/manual/usages/general/#dedup-algorithm)",
|
275
275
|
),
|
276
|
-
"/wes/analysis/tumor/haplotyper_targets.vcf.gz": FileDetails(
|
277
|
-
"analysis",
|
278
|
-
"germline: vcf of haplotype variants in targeted regions",
|
279
|
-
"Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
|
280
|
-
),
|
281
|
-
"/wes/analysis/tumor/haplotyper_output.vcf": FileDetails(
|
282
|
-
"analysis",
|
283
|
-
"germline: germline variants",
|
284
|
-
"Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
|
285
|
-
),
|
286
276
|
"/wes/analysis/normal/haplotyper_targets.vcf.gz": FileDetails(
|
287
277
|
"analysis",
|
288
278
|
"germline: vcf of haplotype variants in targeted regions",
|
@@ -447,16 +437,6 @@ details_dict = {
|
|
447
437
|
"germline: germline variants",
|
448
438
|
"Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
|
449
439
|
),
|
450
|
-
"/wes/analysis/tumor/haplotyper_targets.vcf.gz": FileDetails(
|
451
|
-
"analysis",
|
452
|
-
"germline: vcf of haplotype variants in targeted regions",
|
453
|
-
"Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
|
454
|
-
),
|
455
|
-
"/wes/analysis/tumor/haplotyper_output.vcf": FileDetails(
|
456
|
-
"analysis",
|
457
|
-
"germline: germline variants",
|
458
|
-
"Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
|
459
|
-
),
|
460
440
|
"/wes_tumor_only/analysis/tumor/coverage_metrics.txt": FileDetails(
|
461
441
|
"analysis",
|
462
442
|
"plain-text genome-wide coverage file from tumor sample, from Sentieon's CoverageMetrics",
|
@@ -811,17 +791,17 @@ details_dict = {
|
|
811
791
|
"An XML-based Excel file that contains the results of a single run in arbitrary units. Each row is a sample, though not all have CIMAC IDs, and each column is an antigen.",
|
812
792
|
),
|
813
793
|
# CyTOF analysis
|
814
|
-
|
794
|
+
"/cytof_analysis/cell_counts_assignment.csv": FileDetails(
|
815
795
|
"miscellaneous",
|
816
796
|
"comma-separated two-column table with cell counts for each assigned cell type",
|
817
797
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the called cell type, and 'N', the number of cells of that type seen in the sample.",
|
818
798
|
),
|
819
|
-
|
799
|
+
"/cytof_analysis/cell_counts_compartment.csv": FileDetails(
|
820
800
|
"miscellaneous",
|
821
801
|
"comma-separated two-column table with cell counts for each broad compartment assigned",
|
822
802
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that compartment seen in the sample.",
|
823
803
|
),
|
824
|
-
|
804
|
+
"/cytof_analysis/cell_counts_profiling.csv": FileDetails(
|
825
805
|
"miscellaneous",
|
826
806
|
"comma-separated two-column table with cell counts for each profiled subset of assigned cell types",
|
827
807
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
|
@@ -831,62 +811,62 @@ details_dict = {
|
|
831
811
|
"comma-separated two-column table with cell counts for each assigned cell type",
|
832
812
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the called cell type, and 'N', the number of cells of that type seen in the sample.",
|
833
813
|
),
|
834
|
-
|
814
|
+
"csv|cell counts compartment": FileDetails(
|
835
815
|
"analysis",
|
836
816
|
"comma-separated two-column table with cell counts for each broad compartment assigned",
|
837
817
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that compartment seen in the sample.",
|
838
818
|
),
|
839
|
-
|
819
|
+
"csv|cell counts profiling": FileDetails(
|
840
820
|
"analysis",
|
841
821
|
"comma-separated two-column table with cell counts for each profiled subset of all assigned cell types",
|
842
822
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the profiled subset of the assigned cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
|
843
823
|
),
|
844
|
-
|
845
|
-
|
824
|
+
"/cytof_analysis/analysis.zip": FileDetails("analysis", "", ""),
|
825
|
+
"/cytof_analysis/assignment.csv": FileDetails(
|
846
826
|
"miscellaneous",
|
847
827
|
"comma-separated table of marker expression for each assigned cell type",
|
848
828
|
"A plain-text, comma-separated table with a column for each assigned cell type, where rows are the signal on each channel for every cell type assigned.",
|
849
829
|
),
|
850
|
-
|
830
|
+
"/cytof_analysis/compartment.csv": FileDetails(
|
851
831
|
"miscellaneous",
|
852
832
|
"comma-separated table of marker expression for each broad compartment assigned",
|
853
833
|
"A plain-text, comma-separated table with a column for each broad compartment of the called cell types, where rows are the signal on each channel for every compartment.",
|
854
834
|
),
|
855
|
-
|
856
|
-
|
835
|
+
"/cytof_analysis/control_files_analysis.zip": FileDetails("analysis", "", ""),
|
836
|
+
"/cytof_analysis/profiling.csv": FileDetails(
|
857
837
|
"miscellaneous",
|
858
838
|
"comma-separated two-column table with cell counts for each profiled subset of all assigned cell types",
|
859
839
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the profiled subset of the assigned cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
|
860
840
|
),
|
861
|
-
|
862
|
-
|
841
|
+
"/cytof_analysis/reports.zip": FileDetails("analysis", "", ""),
|
842
|
+
"/cytof_analysis/source.fcs": FileDetails(
|
863
843
|
"source",
|
864
844
|
"fcs data used as the input for this analysis",
|
865
845
|
"The analysis-ready FCS file used as the input for this analysis. After normalization, debarcoding, and removal of Veri-Cells and other non-specimen cells.",
|
866
846
|
),
|
867
847
|
# CyTOF assay
|
868
|
-
|
848
|
+
"/cytof/spike_in.fcs": FileDetails(
|
869
849
|
"source",
|
870
850
|
"normalized and debarcoded fcs data for a blank spike-in sample",
|
871
851
|
"The FCS file that captures pure spike-in for use as a control, after normalization and debarcoding.",
|
872
852
|
),
|
873
|
-
|
853
|
+
"/cytof/controls/spike_in.fcs": FileDetails(
|
874
854
|
"source",
|
875
855
|
"normalized and debarcoded fcs data for a blank spike-in sample",
|
876
856
|
"The FCS file that captures pure spike-in for use as a control, after normalization and debarcoding.",
|
877
857
|
),
|
878
|
-
|
879
|
-
|
858
|
+
"/cytof/controls/processed.fcs": FileDetails("source", "", ""),
|
859
|
+
"/cytof/source_.fcs": FileDetails(
|
880
860
|
"source",
|
881
861
|
"raw fcs data as generated by the machine, without normalization, debarcoding, or cleaning",
|
882
862
|
"The raw FCS file as generated by the machine, without any normalization, debarcoding, cleaning, etc.",
|
883
863
|
),
|
884
|
-
|
864
|
+
"/cytof/debarcoding_key.csv": FileDetails(
|
885
865
|
"source",
|
886
866
|
"",
|
887
867
|
"",
|
888
868
|
),
|
889
|
-
|
869
|
+
"/cytof/processed.fcs": FileDetails(
|
890
870
|
"source",
|
891
871
|
"fully processed fcs data: normalized, debarcoded, no Veri-Cells, cleaned",
|
892
872
|
"The analysis-ready FCS file after normalization, debarcoding, and removal of Veri-Cells and other non-specimen cells.",
|
cidc_api/models/files/facets.py
CHANGED
@@ -559,23 +559,23 @@ facets_dict: Dict[str, Facets] = {
|
|
559
559
|
"Analysis Ready": analysis_ready_facets,
|
560
560
|
}
|
561
561
|
|
562
|
-
|
563
562
|
FACET_NAME_DELIM = "|"
|
564
563
|
|
565
564
|
|
566
565
|
def _build_facet_groups_to_names():
|
567
566
|
"""Map facet_groups to human-readable data categories."""
|
568
|
-
path_to_name = lambda path: FACET_NAME_DELIM.join(path)
|
569
567
|
|
570
568
|
facet_names = {}
|
571
569
|
for facet_name, subfacet in facets_dict["Assay Type"].items():
|
572
570
|
for subfacet_name, subsubfacet in subfacet.items():
|
573
571
|
for facet_group in subsubfacet.facet_groups:
|
574
|
-
facet_names[facet_group] =
|
572
|
+
facet_names[facet_group] = FACET_NAME_DELIM.join(
|
573
|
+
[facet_name, subfacet_name]
|
574
|
+
)
|
575
575
|
|
576
576
|
for facet_name, subfacet in facets_dict["Clinical Type"].items():
|
577
577
|
for facet_group in subfacet.facet_groups:
|
578
|
-
facet_names[facet_group] =
|
578
|
+
facet_names[facet_group] = FACET_NAME_DELIM.join([facet_name])
|
579
579
|
|
580
580
|
# Note on why we don't use "Analysis Ready": any facet group included in the
|
581
581
|
# "Analysis Ready" facet type will also have an entry in "Assay Type".
|
@@ -601,7 +601,7 @@ def build_data_category_facets(facet_group_file_counts: Dict[str, int]):
|
|
601
601
|
```
|
602
602
|
"""
|
603
603
|
|
604
|
-
def extract_facet_info(facet_config_entries,
|
604
|
+
def extract_facet_info(facet_config_entries, _prefix):
|
605
605
|
results = []
|
606
606
|
for label, config in facet_config_entries.items():
|
607
607
|
count = sum(
|
@@ -653,7 +653,7 @@ def get_facet_groups_for_paths(paths: List[List[str]]) -> List[str]:
|
|
653
653
|
facet_config = facet_config[key]
|
654
654
|
assert isinstance(facet_config, FacetConfig)
|
655
655
|
except Exception as e:
|
656
|
-
raise BadRequest(f"no facet for path {path}")
|
656
|
+
raise BadRequest(f"no facet for path {path}") from e
|
657
657
|
facet_groups.extend(facet_config.facet_groups)
|
658
658
|
|
659
659
|
return facet_groups
|
cidc_api/models/migrations.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
import os
|
1
|
+
import os
|
2
|
+
import traceback
|
2
3
|
from contextlib import contextmanager
|
3
4
|
from functools import partial
|
4
5
|
from typing import Callable, List, NamedTuple
|
@@ -83,8 +84,8 @@ def migration_session():
|
|
83
84
|
print("Running GCS rollback...")
|
84
85
|
task_queue.rollback()
|
85
86
|
print("GCS rollback succeeded.")
|
86
|
-
except Exception as
|
87
|
-
print(f"GCS rollback failed: {
|
87
|
+
except Exception as e_inner:
|
88
|
+
print(f"GCS rollback failed: {e_inner.__class__}\n{e_inner}")
|
88
89
|
raise
|
89
90
|
finally:
|
90
91
|
session.close()
|
@@ -196,7 +197,7 @@ def _run_metadata_migration(
|
|
196
197
|
)[1]
|
197
198
|
|
198
199
|
# If the GCS URI has changed, rename the blob
|
199
|
-
# makes call to bucket.rename_blob
|
200
|
+
# makes call to bucket.rename_blob
|
200
201
|
new_gcs_uri = artifact["object_url"]
|
201
202
|
if old_gcs_uri != new_gcs_uri:
|
202
203
|
print(
|
@@ -204,10 +205,16 @@ def _run_metadata_migration(
|
|
204
205
|
)
|
205
206
|
renamer = PieceOfWork(
|
206
207
|
partial(
|
207
|
-
rename_gcs_blob,
|
208
|
+
rename_gcs_blob,
|
209
|
+
GOOGLE_ACL_DATA_BUCKET,
|
210
|
+
old_gcs_uri,
|
211
|
+
new_gcs_uri,
|
208
212
|
),
|
209
213
|
partial(
|
210
|
-
rename_gcs_blob,
|
214
|
+
rename_gcs_blob,
|
215
|
+
GOOGLE_ACL_DATA_BUCKET,
|
216
|
+
new_gcs_uri,
|
217
|
+
old_gcs_uri,
|
211
218
|
),
|
212
219
|
)
|
213
220
|
gcs_tasks.schedule(renamer)
|
@@ -280,9 +287,9 @@ def _run_metadata_migration(
|
|
280
287
|
flag_modified(upload, "metadata_patch")
|
281
288
|
|
282
289
|
# Attempt to make GCS updates
|
283
|
-
print(
|
290
|
+
print("Running all GCS tasks...")
|
284
291
|
gcs_tasks.run_all()
|
285
|
-
print(
|
292
|
+
print("GCS tasks succeeded.")
|
286
293
|
|
287
294
|
|
288
295
|
dont_run = os.environ.get("TESTING") or os.environ.get("ENV") == "dev"
|
@@ -294,7 +301,7 @@ def rename_gcs_blob(bucket, old_name, new_name):
|
|
294
301
|
message = f"GCS: moving {full_old_uri} to {full_new_uri}"
|
295
302
|
if dont_run:
|
296
303
|
print(f"SKIPPING: {message}")
|
297
|
-
return
|
304
|
+
return None
|
298
305
|
|
299
306
|
print(message)
|
300
307
|
|