nci-cidc-api-modules 1.0.0rc0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cidc_api/config/db.py +1 -1
- cidc_api/config/secrets.py +2 -2
- cidc_api/config/settings.py +1 -2
- cidc_api/csms/auth.py +14 -7
- cidc_api/models/csms_api.py +101 -83
- cidc_api/models/files/details.py +28 -38
- cidc_api/models/files/facets.py +41 -24
- cidc_api/models/migrations.py +16 -9
- cidc_api/models/models.py +763 -195
- cidc_api/shared/auth.py +18 -13
- cidc_api/shared/gcloud_client.py +106 -61
- cidc_api/shared/rest_utils.py +6 -5
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/METADATA +33 -5
- nci_cidc_api_modules-1.0.1.dist-info/RECORD +25 -0
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/WHEEL +1 -1
- nci_cidc_api_modules-1.0.0rc0.dist-info/RECORD +0 -25
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/LICENSE +0 -0
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/top_level.txt +0 -0
cidc_api/models/files/details.py
CHANGED
@@ -273,16 +273,6 @@ details_dict = {
|
|
273
273
|
"alignment: index file for deduplicated bam",
|
274
274
|
"Bam index file for deduplicated bam file generated by the Sentieon Dedup tool (https://support.sentieon.com/manual/usages/general/#dedup-algorithm)",
|
275
275
|
),
|
276
|
-
"/wes/analysis/tumor/haplotyper_targets.vcf.gz": FileDetails(
|
277
|
-
"analysis",
|
278
|
-
"germline: vcf of haplotype variants in targeted regions",
|
279
|
-
"Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
|
280
|
-
),
|
281
|
-
"/wes/analysis/tumor/haplotyper_output.vcf": FileDetails(
|
282
|
-
"analysis",
|
283
|
-
"germline: germline variants",
|
284
|
-
"Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
|
285
|
-
),
|
286
276
|
"/wes/analysis/normal/haplotyper_targets.vcf.gz": FileDetails(
|
287
277
|
"analysis",
|
288
278
|
"germline: vcf of haplotype variants in targeted regions",
|
@@ -447,16 +437,6 @@ details_dict = {
|
|
447
437
|
"germline: germline variants",
|
448
438
|
"Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
|
449
439
|
),
|
450
|
-
"/wes/analysis/tumor/haplotyper_targets.vcf.gz": FileDetails(
|
451
|
-
"analysis",
|
452
|
-
"germline: vcf of haplotype variants in targeted regions",
|
453
|
-
"Haplotype variants within targeted capture regions using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
|
454
|
-
),
|
455
|
-
"/wes/analysis/tumor/haplotyper_output.vcf": FileDetails(
|
456
|
-
"analysis",
|
457
|
-
"germline: germline variants",
|
458
|
-
"Haplotype variants using Sentieon Haplotyper algorithm (https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm)",
|
459
|
-
),
|
460
440
|
"/wes_tumor_only/analysis/tumor/coverage_metrics.txt": FileDetails(
|
461
441
|
"analysis",
|
462
442
|
"plain-text genome-wide coverage file from tumor sample, from Sentieon's CoverageMetrics",
|
@@ -794,6 +774,16 @@ details_dict = {
|
|
794
774
|
"stained image file that is the result of an H&E experiment",
|
795
775
|
"An image file stained with hematoxylin and eosin, generated from an H&E experiment.",
|
796
776
|
),
|
777
|
+
"/hande/annotated_image.svs": FileDetails(
|
778
|
+
"source",
|
779
|
+
"stained and annotated image file that is the result of an H&E experiment",
|
780
|
+
"An SVS image file stained and annotated with hematoxylin and eosin, generated from an H&E experiment.",
|
781
|
+
),
|
782
|
+
"/hande/annotated_image.": FileDetails(
|
783
|
+
"source",
|
784
|
+
"stained and annotated image file that is the result of an H&E experiment",
|
785
|
+
"An annotated image file stained with hematoxylin and eosin, generated from an H&E experiment.",
|
786
|
+
),
|
797
787
|
# ELISA
|
798
788
|
"/elisa/assay.xlsx": FileDetails(
|
799
789
|
"source",
|
@@ -801,17 +791,17 @@ details_dict = {
|
|
801
791
|
"An XML-based Excel file that contains the results of a single run in arbitrary units. Each row is a sample, though not all have CIMAC IDs, and each column is an antigen.",
|
802
792
|
),
|
803
793
|
# CyTOF analysis
|
804
|
-
|
794
|
+
"/cytof_analysis/cell_counts_assignment.csv": FileDetails(
|
805
795
|
"miscellaneous",
|
806
796
|
"comma-separated two-column table with cell counts for each assigned cell type",
|
807
797
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the called cell type, and 'N', the number of cells of that type seen in the sample.",
|
808
798
|
),
|
809
|
-
|
799
|
+
"/cytof_analysis/cell_counts_compartment.csv": FileDetails(
|
810
800
|
"miscellaneous",
|
811
801
|
"comma-separated two-column table with cell counts for each broad compartment assigned",
|
812
802
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that compartment seen in the sample.",
|
813
803
|
),
|
814
|
-
|
804
|
+
"/cytof_analysis/cell_counts_profiling.csv": FileDetails(
|
815
805
|
"miscellaneous",
|
816
806
|
"comma-separated two-column table with cell counts for each profiled subset of assigned cell types",
|
817
807
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
|
@@ -821,62 +811,62 @@ details_dict = {
|
|
821
811
|
"comma-separated two-column table with cell counts for each assigned cell type",
|
822
812
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the called cell type, and 'N', the number of cells of that type seen in the sample.",
|
823
813
|
),
|
824
|
-
|
814
|
+
"csv|cell counts compartment": FileDetails(
|
825
815
|
"analysis",
|
826
816
|
"comma-separated two-column table with cell counts for each broad compartment assigned",
|
827
817
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the broad compartment of the called cell types, and 'N', the number of cells within that compartment seen in the sample.",
|
828
818
|
),
|
829
|
-
|
819
|
+
"csv|cell counts profiling": FileDetails(
|
830
820
|
"analysis",
|
831
821
|
"comma-separated two-column table with cell counts for each profiled subset of all assigned cell types",
|
832
822
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the profiled subset of the assigned cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
|
833
823
|
),
|
834
|
-
|
835
|
-
|
824
|
+
"/cytof_analysis/analysis.zip": FileDetails("analysis", "", ""),
|
825
|
+
"/cytof_analysis/assignment.csv": FileDetails(
|
836
826
|
"miscellaneous",
|
837
827
|
"comma-separated table of marker expression for each assigned cell type",
|
838
828
|
"A plain-text, comma-separated table with a column for each assigned cell type, where rows are the signal on each channel for every cell type assigned.",
|
839
829
|
),
|
840
|
-
|
830
|
+
"/cytof_analysis/compartment.csv": FileDetails(
|
841
831
|
"miscellaneous",
|
842
832
|
"comma-separated table of marker expression for each broad compartment assigned",
|
843
833
|
"A plain-text, comma-separated table with a column for each broad compartment of the called cell types, where rows are the signal on each channel for every compartment.",
|
844
834
|
),
|
845
|
-
|
846
|
-
|
835
|
+
"/cytof_analysis/control_files_analysis.zip": FileDetails("analysis", "", ""),
|
836
|
+
"/cytof_analysis/profiling.csv": FileDetails(
|
847
837
|
"miscellaneous",
|
848
838
|
"comma-separated two-column table with cell counts for each profiled subset of all assigned cell types",
|
849
839
|
"A plain-text, comma-separated table with a numbered index column, the 'CellSubset' as the profiled subset of the assigned cell types, and 'N', the number of cells within that profiled subset seen in the sample.",
|
850
840
|
),
|
851
|
-
|
852
|
-
|
841
|
+
"/cytof_analysis/reports.zip": FileDetails("analysis", "", ""),
|
842
|
+
"/cytof_analysis/source.fcs": FileDetails(
|
853
843
|
"source",
|
854
844
|
"fcs data used as the input for this analysis",
|
855
845
|
"The analysis-ready FCS file used as the input for this analysis. After normalization, debarcoding, and removal of Veri-Cells and other non-specimen cells.",
|
856
846
|
),
|
857
847
|
# CyTOF assay
|
858
|
-
|
848
|
+
"/cytof/spike_in.fcs": FileDetails(
|
859
849
|
"source",
|
860
850
|
"normalized and debarcoded fcs data for a blank spike-in sample",
|
861
851
|
"The FCS file that captures pure spike-in for use as a control, after normalization and debarcoding.",
|
862
852
|
),
|
863
|
-
|
853
|
+
"/cytof/controls/spike_in.fcs": FileDetails(
|
864
854
|
"source",
|
865
855
|
"normalized and debarcoded fcs data for a blank spike-in sample",
|
866
856
|
"The FCS file that captures pure spike-in for use as a control, after normalization and debarcoding.",
|
867
857
|
),
|
868
|
-
|
869
|
-
|
858
|
+
"/cytof/controls/processed.fcs": FileDetails("source", "", ""),
|
859
|
+
"/cytof/source_.fcs": FileDetails(
|
870
860
|
"source",
|
871
861
|
"raw fcs data as generated by the machine, without normalization, debarcoding, or cleaning",
|
872
862
|
"The raw FCS file as generated by the machine, without any normalization, debarcoding, cleaning, etc.",
|
873
863
|
),
|
874
|
-
|
864
|
+
"/cytof/debarcoding_key.csv": FileDetails(
|
875
865
|
"source",
|
876
866
|
"",
|
877
867
|
"",
|
878
868
|
),
|
879
|
-
|
869
|
+
"/cytof/processed.fcs": FileDetails(
|
880
870
|
"source",
|
881
871
|
"fully processed fcs data: normalized, debarcoded, no Veri-Cells, cleaned",
|
882
872
|
"The analysis-ready FCS file after normalization, debarcoding, and removal of Veri-Cells and other non-specimen cells.",
|
cidc_api/models/files/facets.py
CHANGED
@@ -416,7 +416,13 @@ assay_facets: Facets = {
|
|
416
416
|
},
|
417
417
|
"H&E": {
|
418
418
|
"Images": FacetConfig(
|
419
|
-
[
|
419
|
+
[
|
420
|
+
"/hande/image_file.svs",
|
421
|
+
"/hande/image_file.",
|
422
|
+
"/hande/annotated_image.svs",
|
423
|
+
"/hande/annotated_image.",
|
424
|
+
],
|
425
|
+
"Stained image file.",
|
420
426
|
)
|
421
427
|
},
|
422
428
|
"TCR": {
|
@@ -553,23 +559,23 @@ facets_dict: Dict[str, Facets] = {
|
|
553
559
|
"Analysis Ready": analysis_ready_facets,
|
554
560
|
}
|
555
561
|
|
556
|
-
|
557
562
|
FACET_NAME_DELIM = "|"
|
558
563
|
|
559
564
|
|
560
565
|
def _build_facet_groups_to_names():
|
561
566
|
"""Map facet_groups to human-readable data categories."""
|
562
|
-
path_to_name = lambda path: FACET_NAME_DELIM.join(path)
|
563
567
|
|
564
568
|
facet_names = {}
|
565
569
|
for facet_name, subfacet in facets_dict["Assay Type"].items():
|
566
570
|
for subfacet_name, subsubfacet in subfacet.items():
|
567
571
|
for facet_group in subsubfacet.facet_groups:
|
568
|
-
facet_names[facet_group] =
|
572
|
+
facet_names[facet_group] = FACET_NAME_DELIM.join(
|
573
|
+
[facet_name, subfacet_name]
|
574
|
+
)
|
569
575
|
|
570
576
|
for facet_name, subfacet in facets_dict["Clinical Type"].items():
|
571
577
|
for facet_group in subfacet.facet_groups:
|
572
|
-
facet_names[facet_group] =
|
578
|
+
facet_names[facet_group] = FACET_NAME_DELIM.join([facet_name])
|
573
579
|
|
574
580
|
# Note on why we don't use "Analysis Ready": any facet group included in the
|
575
581
|
# "Analysis Ready" facet type will also have an entry in "Assay Type".
|
@@ -594,26 +600,37 @@ def build_data_category_facets(facet_group_file_counts: Dict[str, int]):
|
|
594
600
|
}
|
595
601
|
```
|
596
602
|
"""
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
603
|
+
|
604
|
+
def extract_facet_info(facet_config_entries, _prefix):
|
605
|
+
results = []
|
606
|
+
for label, config in facet_config_entries.items():
|
607
|
+
count = sum(
|
602
608
|
facet_group_file_counts.get(facet_group, 0)
|
603
609
|
for facet_group in config.facet_groups
|
604
|
-
)
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
610
|
+
)
|
611
|
+
if count:
|
612
|
+
results.append(
|
613
|
+
{"label": label, "description": config.description, "count": count}
|
614
|
+
)
|
615
|
+
return results
|
616
|
+
|
617
|
+
assay_types = {}
|
618
|
+
for assay_name, subfacets in assay_facets.items():
|
619
|
+
assay_names = extract_facet_info(subfacets, assay_name)
|
620
|
+
if assay_names:
|
621
|
+
assay_types[assay_name] = assay_names
|
622
|
+
|
623
|
+
results = {}
|
624
|
+
if assay_types:
|
625
|
+
results["Assay Type"] = assay_types
|
626
|
+
clinical_types = extract_facet_info(clinical_facets, None)
|
627
|
+
if clinical_types:
|
628
|
+
results["Clinical Type"] = clinical_types
|
629
|
+
analysis_ready = extract_facet_info(analysis_ready_facets, None)
|
630
|
+
if analysis_ready:
|
631
|
+
results["Analysis Ready"] = analysis_ready
|
632
|
+
|
633
|
+
return results
|
617
634
|
|
618
635
|
|
619
636
|
def build_trial_facets(trial_file_counts: Dict[str, int]):
|
@@ -636,7 +653,7 @@ def get_facet_groups_for_paths(paths: List[List[str]]) -> List[str]:
|
|
636
653
|
facet_config = facet_config[key]
|
637
654
|
assert isinstance(facet_config, FacetConfig)
|
638
655
|
except Exception as e:
|
639
|
-
raise BadRequest(f"no facet for path {path}")
|
656
|
+
raise BadRequest(f"no facet for path {path}") from e
|
640
657
|
facet_groups.extend(facet_config.facet_groups)
|
641
658
|
|
642
659
|
return facet_groups
|
cidc_api/models/migrations.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
import os
|
1
|
+
import os
|
2
|
+
import traceback
|
2
3
|
from contextlib import contextmanager
|
3
4
|
from functools import partial
|
4
5
|
from typing import Callable, List, NamedTuple
|
@@ -83,8 +84,8 @@ def migration_session():
|
|
83
84
|
print("Running GCS rollback...")
|
84
85
|
task_queue.rollback()
|
85
86
|
print("GCS rollback succeeded.")
|
86
|
-
except Exception as
|
87
|
-
print(f"GCS rollback failed: {
|
87
|
+
except Exception as e_inner:
|
88
|
+
print(f"GCS rollback failed: {e_inner.__class__}\n{e_inner}")
|
88
89
|
raise
|
89
90
|
finally:
|
90
91
|
session.close()
|
@@ -196,7 +197,7 @@ def _run_metadata_migration(
|
|
196
197
|
)[1]
|
197
198
|
|
198
199
|
# If the GCS URI has changed, rename the blob
|
199
|
-
# makes call to bucket.rename_blob
|
200
|
+
# makes call to bucket.rename_blob
|
200
201
|
new_gcs_uri = artifact["object_url"]
|
201
202
|
if old_gcs_uri != new_gcs_uri:
|
202
203
|
print(
|
@@ -204,10 +205,16 @@ def _run_metadata_migration(
|
|
204
205
|
)
|
205
206
|
renamer = PieceOfWork(
|
206
207
|
partial(
|
207
|
-
rename_gcs_blob,
|
208
|
+
rename_gcs_blob,
|
209
|
+
GOOGLE_ACL_DATA_BUCKET,
|
210
|
+
old_gcs_uri,
|
211
|
+
new_gcs_uri,
|
208
212
|
),
|
209
213
|
partial(
|
210
|
-
rename_gcs_blob,
|
214
|
+
rename_gcs_blob,
|
215
|
+
GOOGLE_ACL_DATA_BUCKET,
|
216
|
+
new_gcs_uri,
|
217
|
+
old_gcs_uri,
|
211
218
|
),
|
212
219
|
)
|
213
220
|
gcs_tasks.schedule(renamer)
|
@@ -280,9 +287,9 @@ def _run_metadata_migration(
|
|
280
287
|
flag_modified(upload, "metadata_patch")
|
281
288
|
|
282
289
|
# Attempt to make GCS updates
|
283
|
-
print(
|
290
|
+
print("Running all GCS tasks...")
|
284
291
|
gcs_tasks.run_all()
|
285
|
-
print(
|
292
|
+
print("GCS tasks succeeded.")
|
286
293
|
|
287
294
|
|
288
295
|
dont_run = os.environ.get("TESTING") or os.environ.get("ENV") == "dev"
|
@@ -294,7 +301,7 @@ def rename_gcs_blob(bucket, old_name, new_name):
|
|
294
301
|
message = f"GCS: moving {full_old_uri} to {full_new_uri}"
|
295
302
|
if dont_run:
|
296
303
|
print(f"SKIPPING: {message}")
|
297
|
-
return
|
304
|
+
return None
|
298
305
|
|
299
306
|
print(message)
|
300
307
|
|