nci-cidc-api-modules 1.2.34__py3-none-any.whl → 1.2.45__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cidc_api/__init__.py +1 -0
- cidc_api/config/db.py +21 -1
- cidc_api/config/settings.py +1 -0
- cidc_api/models/__init__.py +0 -2
- cidc_api/models/data.py +15 -6
- cidc_api/models/db/stage1/__init__.py +56 -0
- cidc_api/models/db/stage1/additional_treatment_orm.py +22 -0
- cidc_api/models/db/stage1/adverse_event_orm.py +46 -0
- cidc_api/models/db/stage1/base_orm.py +7 -0
- cidc_api/models/db/stage1/baseline_clinical_assessment_orm.py +22 -0
- cidc_api/models/db/stage1/comorbidity_orm.py +23 -0
- cidc_api/models/db/stage1/consent_group_orm.py +32 -0
- cidc_api/models/db/stage1/demographic_orm.py +47 -0
- cidc_api/models/db/stage1/disease_orm.py +52 -0
- cidc_api/models/db/stage1/exposure_orm.py +22 -0
- cidc_api/models/db/stage1/gvhd_diagnosis_acute_orm.py +34 -0
- cidc_api/models/db/stage1/gvhd_diagnosis_chronic_orm.py +36 -0
- cidc_api/models/db/stage1/gvhd_organ_acute_orm.py +21 -0
- cidc_api/models/db/stage1/gvhd_organ_chronic_orm.py +21 -0
- cidc_api/models/db/stage1/medical_history_orm.py +30 -0
- cidc_api/models/db/stage1/other_malignancy_orm.py +29 -0
- cidc_api/models/db/stage1/participant_orm.py +77 -0
- cidc_api/models/db/stage1/prior_treatment_orm.py +29 -0
- cidc_api/models/db/stage1/radiotherapy_dose_orm.py +39 -0
- cidc_api/models/db/stage1/response_by_system_orm.py +30 -0
- cidc_api/models/db/stage1/response_orm.py +28 -0
- cidc_api/models/db/stage1/specimen_orm.py +46 -0
- cidc_api/models/db/stage1/stem_cell_transplant_orm.py +25 -0
- cidc_api/models/db/stage1/surgery_orm.py +27 -0
- cidc_api/models/db/stage1/therapy_agent_dose_orm.py +31 -0
- cidc_api/models/db/stage1/treatment_orm.py +38 -0
- cidc_api/models/db/stage1/trial_orm.py +35 -0
- cidc_api/models/db/stage2/additional_treatment_orm.py +6 -7
- cidc_api/models/db/stage2/administrative_person_orm.py +4 -4
- cidc_api/models/db/stage2/administrative_role_assignment_orm.py +4 -4
- cidc_api/models/db/stage2/adverse_event_orm.py +11 -13
- cidc_api/models/db/stage2/arm_orm.py +3 -3
- cidc_api/models/db/stage2/base_orm.py +7 -0
- cidc_api/models/db/stage2/baseline_clinical_assessment_orm.py +5 -7
- cidc_api/models/db/stage2/cohort_orm.py +3 -3
- cidc_api/models/db/stage2/comorbidity_orm.py +6 -8
- cidc_api/models/db/stage2/consent_group_orm.py +4 -4
- cidc_api/models/db/stage2/contact_orm.py +16 -20
- cidc_api/models/db/stage2/demographic_orm.py +3 -3
- cidc_api/models/db/stage2/disease_orm.py +4 -4
- cidc_api/models/db/stage2/exposure_orm.py +3 -3
- cidc_api/models/db/stage2/file_orm.py +6 -9
- cidc_api/models/db/stage2/gvhd_diagnosis_acute_orm.py +4 -4
- cidc_api/models/db/stage2/gvhd_diagnosis_chronic_orm.py +4 -6
- cidc_api/models/db/stage2/gvhd_organ_acute_orm.py +3 -3
- cidc_api/models/db/stage2/gvhd_organ_chronic_orm.py +3 -3
- cidc_api/models/db/stage2/institution_orm.py +7 -7
- cidc_api/models/db/stage2/medical_history_orm.py +9 -9
- cidc_api/models/db/stage2/other_clinical_endpoint_orm.py +8 -12
- cidc_api/models/db/stage2/other_malignancy_orm.py +8 -10
- cidc_api/models/db/stage2/participant_orm.py +23 -24
- cidc_api/models/db/stage2/prior_treatment_orm.py +12 -13
- cidc_api/models/db/stage2/publication_orm.py +9 -11
- cidc_api/models/db/stage2/radiotherapy_dose_orm.py +8 -9
- cidc_api/models/db/stage2/response_by_system_orm.py +3 -3
- cidc_api/models/db/stage2/response_orm.py +3 -3
- cidc_api/models/db/stage2/shipment_orm.py +17 -17
- cidc_api/models/db/stage2/shipment_specimen_orm.py +4 -4
- cidc_api/models/db/stage2/specimen_orm.py +7 -6
- cidc_api/models/db/stage2/stem_cell_transplant_orm.py +6 -7
- cidc_api/models/db/stage2/surgery_orm.py +6 -7
- cidc_api/models/db/stage2/therapy_agent_dose_orm.py +7 -8
- cidc_api/models/db/stage2/treatment_orm.py +15 -15
- cidc_api/models/db/stage2/trial_orm.py +15 -17
- cidc_api/models/files/facets.py +4 -0
- cidc_api/models/models.py +153 -9
- cidc_api/models/pydantic/{stage2/base.py → base.py} +1 -1
- cidc_api/models/pydantic/stage1/__init__.py +56 -0
- cidc_api/models/pydantic/stage1/additional_treatment.py +23 -0
- cidc_api/models/pydantic/stage1/adverse_event.py +100 -0
- cidc_api/models/pydantic/stage1/baseline_clinical_assessment.py +23 -0
- cidc_api/models/pydantic/stage1/comorbidity.py +36 -0
- cidc_api/models/pydantic/stage1/consent_group.py +30 -0
- cidc_api/models/pydantic/stage1/demographic.py +123 -0
- cidc_api/models/pydantic/stage1/disease.py +158 -0
- cidc_api/models/pydantic/stage1/exposure.py +32 -0
- cidc_api/models/pydantic/stage1/gvhd_diagnosis_acute.py +33 -0
- cidc_api/models/pydantic/stage1/gvhd_diagnosis_chronic.py +32 -0
- cidc_api/models/pydantic/stage1/gvhd_organ_acute.py +22 -0
- cidc_api/models/pydantic/stage1/gvhd_organ_chronic.py +23 -0
- cidc_api/models/pydantic/stage1/medical_history.py +36 -0
- cidc_api/models/pydantic/stage1/other_malignancy.py +49 -0
- cidc_api/models/pydantic/stage1/participant.py +51 -0
- cidc_api/models/pydantic/stage1/prior_treatment.py +45 -0
- cidc_api/models/pydantic/stage1/radiotherapy_dose.py +79 -0
- cidc_api/models/pydantic/stage1/response.py +65 -0
- cidc_api/models/pydantic/stage1/response_by_system.py +112 -0
- cidc_api/models/pydantic/stage1/specimen.py +31 -0
- cidc_api/models/pydantic/stage1/stem_cell_transplant.py +35 -0
- cidc_api/models/pydantic/stage1/surgery.py +49 -0
- cidc_api/models/pydantic/stage1/therapy_agent_dose.py +67 -0
- cidc_api/models/pydantic/stage1/treatment.py +50 -0
- cidc_api/models/pydantic/stage1/trial.py +45 -0
- cidc_api/models/pydantic/stage2/additional_treatment.py +2 -4
- cidc_api/models/pydantic/stage2/administrative_person.py +1 -1
- cidc_api/models/pydantic/stage2/administrative_role_assignment.py +2 -2
- cidc_api/models/pydantic/stage2/adverse_event.py +1 -1
- cidc_api/models/pydantic/stage2/arm.py +2 -2
- cidc_api/models/pydantic/stage2/baseline_clinical_assessment.py +1 -1
- cidc_api/models/pydantic/stage2/cohort.py +1 -1
- cidc_api/models/pydantic/stage2/comorbidity.py +1 -1
- cidc_api/models/pydantic/stage2/consent_group.py +2 -2
- cidc_api/models/pydantic/stage2/contact.py +1 -1
- cidc_api/models/pydantic/stage2/demographic.py +1 -1
- cidc_api/models/pydantic/stage2/disease.py +1 -1
- cidc_api/models/pydantic/stage2/exposure.py +1 -1
- cidc_api/models/pydantic/stage2/file.py +2 -2
- cidc_api/models/pydantic/stage2/gvhd_diagnosis_acute.py +1 -1
- cidc_api/models/pydantic/stage2/gvhd_diagnosis_chronic.py +1 -1
- cidc_api/models/pydantic/stage2/gvhd_organ_acute.py +1 -1
- cidc_api/models/pydantic/stage2/gvhd_organ_chronic.py +1 -1
- cidc_api/models/pydantic/stage2/institution.py +1 -1
- cidc_api/models/pydantic/stage2/medical_history.py +1 -1
- cidc_api/models/pydantic/stage2/other_clinical_endpoint.py +1 -1
- cidc_api/models/pydantic/stage2/other_malignancy.py +1 -1
- cidc_api/models/pydantic/stage2/participant.py +6 -3
- cidc_api/models/pydantic/stage2/prior_treatment.py +6 -15
- cidc_api/models/pydantic/stage2/publication.py +2 -2
- cidc_api/models/pydantic/stage2/radiotherapy_dose.py +1 -1
- cidc_api/models/pydantic/stage2/response.py +2 -2
- cidc_api/models/pydantic/stage2/response_by_system.py +1 -1
- cidc_api/models/pydantic/stage2/shipment.py +2 -2
- cidc_api/models/pydantic/stage2/shipment_specimen.py +1 -1
- cidc_api/models/pydantic/stage2/specimen.py +6 -3
- cidc_api/models/pydantic/stage2/stem_cell_transplant.py +2 -2
- cidc_api/models/pydantic/stage2/surgery.py +1 -1
- cidc_api/models/pydantic/stage2/therapy_agent_dose.py +1 -1
- cidc_api/models/pydantic/stage2/treatment.py +1 -1
- cidc_api/models/pydantic/stage2/trial.py +8 -10
- cidc_api/models/types.py +30 -16
- cidc_api/shared/assay_handling.py +68 -0
- cidc_api/shared/auth.py +5 -5
- cidc_api/shared/file_handling.py +16 -4
- cidc_api/shared/gcloud_client.py +78 -16
- cidc_api/shared/utils.py +18 -9
- cidc_api/telemetry.py +101 -0
- {nci_cidc_api_modules-1.2.34.dist-info → nci_cidc_api_modules-1.2.45.dist-info}/METADATA +21 -12
- nci_cidc_api_modules-1.2.45.dist-info/RECORD +165 -0
- cidc_api/models/db/base_orm.py +0 -25
- nci_cidc_api_modules-1.2.34.dist-info/RECORD +0 -109
- {nci_cidc_api_modules-1.2.34.dist-info → nci_cidc_api_modules-1.2.45.dist-info}/WHEEL +0 -0
- {nci_cidc_api_modules-1.2.34.dist-info → nci_cidc_api_modules-1.2.45.dist-info}/licenses/LICENSE +0 -0
- {nci_cidc_api_modules-1.2.34.dist-info → nci_cidc_api_modules-1.2.45.dist-info}/top_level.txt +0 -0
|
@@ -2,8 +2,8 @@ from typing import Self, Annotated, List
|
|
|
2
2
|
|
|
3
3
|
from pydantic import NonPositiveInt, NegativeInt, model_validator, BeforeValidator
|
|
4
4
|
|
|
5
|
-
from .base import Base
|
|
6
|
-
from cidc_api.models.types import
|
|
5
|
+
from cidc_api.models.pydantic.base import Base
|
|
6
|
+
from cidc_api.models.types import ConditioningRegimenType, StemCellDonorType
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class PriorTreatment(Base):
|
|
@@ -24,12 +24,9 @@ class PriorTreatment(Base):
|
|
|
24
24
|
# the treatment modality.
|
|
25
25
|
prior_treatment_days_to_end: NonPositiveInt | None = None
|
|
26
26
|
|
|
27
|
-
# Specifies the category or kind of prior treatment modality a participant received.
|
|
28
|
-
prior_treatment_type: Annotated[List[PriorTreatmentType], BeforeValidator(Base.split_list)]
|
|
29
|
-
|
|
30
27
|
# Description of the prior treatment such as its full generic name if it is a type of therapy agent,
|
|
31
28
|
# radiotherapy procedure name and location, or surgical procedure name and location.
|
|
32
|
-
prior_treatment_description: str
|
|
29
|
+
prior_treatment_description: str
|
|
33
30
|
|
|
34
31
|
# Best response from any response assessment system to the prior treatment if available or applicable.
|
|
35
32
|
prior_treatment_best_response: str | None = None
|
|
@@ -41,12 +38,6 @@ class PriorTreatment(Base):
|
|
|
41
38
|
# If prior treatment is "Stem cell transplant", indicates what stem cell donor type used.
|
|
42
39
|
prior_treatment_stem_cell_donor_type: StemCellDonorType | None = None
|
|
43
40
|
|
|
44
|
-
# If prior treatment is "Stem cell transplant", indicates the number of days from
|
|
45
|
-
#
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
@model_validator(mode="after")
|
|
49
|
-
def validate_description_cr(self) -> Self:
|
|
50
|
-
if "Other therapy" in self.prior_treatment_type and not self.prior_treatment_description:
|
|
51
|
-
raise ValueError('If type is "Other therapy", please provide description.')
|
|
52
|
-
return self
|
|
41
|
+
# If prior treatment is "Stem cell transplant", indicates the number of days from enrollment
|
|
42
|
+
# to the prior transplant. This must be a negative number.
|
|
43
|
+
prior_treatment_days_from_transplant_to_treatment_initiation: NegativeInt | None = None
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .base import Base
|
|
1
|
+
from cidc_api.models.pydantic.base import Base
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class Publication(Base):
|
|
@@ -6,7 +6,7 @@ class Publication(Base):
|
|
|
6
6
|
publication_id: int | None = None
|
|
7
7
|
|
|
8
8
|
# The unique internal identifier for the associated Trial record
|
|
9
|
-
trial_id:
|
|
9
|
+
trial_id: str | None = None
|
|
10
10
|
|
|
11
11
|
# The version number of the trial dataset
|
|
12
12
|
version: str | None = None
|
|
@@ -2,7 +2,7 @@ from typing import Self
|
|
|
2
2
|
|
|
3
3
|
from pydantic import NonNegativeInt, model_validator
|
|
4
4
|
|
|
5
|
-
from .base import Base
|
|
5
|
+
from cidc_api.models.pydantic.base import Base
|
|
6
6
|
from cidc_api.models.types import SurvivalStatus, YNUNA, CauseOfDeath
|
|
7
7
|
|
|
8
8
|
|
|
@@ -44,7 +44,7 @@ class Response(Base):
|
|
|
44
44
|
evaluable_for_efficacy: bool
|
|
45
45
|
|
|
46
46
|
# Days from enrollment date to the last time the patient's vital status was verified.
|
|
47
|
-
days_to_last_vital_status: NonNegativeInt | None = None
|
|
47
|
+
days_to_last_vital_status: NonNegativeInt | None = None # TODO: Needs CR check
|
|
48
48
|
|
|
49
49
|
@model_validator(mode="after")
|
|
50
50
|
def validate_cause_of_death_cr(self) -> Self:
|
|
@@ -2,7 +2,7 @@ from typing import Self
|
|
|
2
2
|
|
|
3
3
|
from pydantic import PositiveInt, model_validator, NonNegativeInt
|
|
4
4
|
|
|
5
|
-
from .base import Base
|
|
5
|
+
from cidc_api.models.pydantic.base import Base
|
|
6
6
|
from cidc_api.models.types import ResponseSystem, ResponseSystemVersion, BestOverallResponse, YNUNA
|
|
7
7
|
|
|
8
8
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
|
|
3
|
-
from .base import Base
|
|
3
|
+
from cidc_api.models.pydantic.base import Base
|
|
4
4
|
from cidc_api.models.types import AssayPriority, AssayType, Courier, ShipmentCondition, ShipmentQuality
|
|
5
5
|
|
|
6
6
|
|
|
@@ -12,7 +12,7 @@ class Shipment(Base):
|
|
|
12
12
|
institution_id: int | None = None
|
|
13
13
|
|
|
14
14
|
# The unique internal identifier for the associated trial.
|
|
15
|
-
trial_id:
|
|
15
|
+
trial_id: str | None = None
|
|
16
16
|
|
|
17
17
|
# The version number of the trial dataset
|
|
18
18
|
version: str | None = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
|
|
3
|
-
from .base import Base
|
|
3
|
+
from cidc_api.models.pydantic.base import Base
|
|
4
4
|
from cidc_api.models.types import (
|
|
5
5
|
UberonAnatomicalTerm,
|
|
6
6
|
ICDO3MorphologicalCode,
|
|
@@ -66,7 +66,7 @@ class Specimen(Base):
|
|
|
66
66
|
# Categorical description of timepoint at which the sample was taken.
|
|
67
67
|
# CDE: https://cadsr.cancer.gov/onedata/dmdirect/NIH/NCI/CO/CDEDD?filter=CDEDD.ITEM_ID=5899851%20and%20ver_nr=1
|
|
68
68
|
# Note: CIDC doesn't conform to this CDE's PVs
|
|
69
|
-
collection_event_name: str
|
|
69
|
+
collection_event_name: str
|
|
70
70
|
|
|
71
71
|
# The type of the specimen
|
|
72
72
|
specimen_type: SpecimenType | None = None
|
|
@@ -208,4 +208,7 @@ class Specimen(Base):
|
|
|
208
208
|
date_ingested: datetime | None = None
|
|
209
209
|
|
|
210
210
|
# Days from enrollment date to date specimen was collected.
|
|
211
|
-
days_to_specimen_collection: int
|
|
211
|
+
days_to_specimen_collection: int
|
|
212
|
+
|
|
213
|
+
# The location within the body from which a specimen was originally obtained as captured in the Uberon anatomical term.
|
|
214
|
+
organ_site_of_collection: UberonAnatomicalTerm
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from pydantic import NonNegativeInt
|
|
2
|
-
from .base import Base
|
|
2
|
+
from cidc_api.models.pydantic.base import Base
|
|
3
3
|
from cidc_api.models.types import (
|
|
4
4
|
StemCellDonorType,
|
|
5
5
|
AllogeneicDonorType,
|
|
@@ -26,7 +26,7 @@ class StemCellTransplant(Base):
|
|
|
26
26
|
allogeneic_donor_type: AllogeneicDonorType | None = None
|
|
27
27
|
|
|
28
28
|
# Source of the stem cells used for transplant.
|
|
29
|
-
|
|
29
|
+
stem_cell_source: StemCellSource
|
|
30
30
|
|
|
31
31
|
# Days from the enrollment date to the date of the stem cell transplant.
|
|
32
32
|
days_to_transplant: NonNegativeInt
|
|
@@ -2,8 +2,8 @@ from datetime import datetime
|
|
|
2
2
|
from pydantic import BeforeValidator
|
|
3
3
|
from typing import List, Annotated
|
|
4
4
|
|
|
5
|
-
from .base import Base
|
|
6
|
-
from cidc_api.models.types import TrialOrganization, TrialFundingAgency, AssayType, AgeGroup
|
|
5
|
+
from cidc_api.models.pydantic.base import Base
|
|
6
|
+
from cidc_api.models.types import TrialOrganization, TrialFundingAgency, AssayType, AgeGroup, PrimaryPurposeType
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class Trial(Base):
|
|
@@ -12,7 +12,7 @@ class Trial(Base):
|
|
|
12
12
|
|
|
13
13
|
# The unique identifier for the clinical trial. e.g. "GU16-287","BACCI"
|
|
14
14
|
# CDE: https://cadsr.cancer.gov/onedata/dmdirect/NIH/NCI/CO/CDEDD?filter=CDEDD.ITEM_ID=5054234%20and%20ver_nr=1
|
|
15
|
-
trial_id: str | None = None
|
|
15
|
+
trial_id: str | None = None
|
|
16
16
|
|
|
17
17
|
# The version number of the trial dataset. e.g. "1.0"
|
|
18
18
|
version: str | None = None
|
|
@@ -69,6 +69,10 @@ class Trial(Base):
|
|
|
69
69
|
# CDE: https://cadsr.cancer.gov/onedata/dmdirect/NIH/NCI/CO/CDEDD?filter=CDEDD.ITEM_ID=16333703%20and%20ver_nr=1
|
|
70
70
|
dates_of_conduct_end: datetime | None = None
|
|
71
71
|
|
|
72
|
+
# A classification of the study based upon the primary intent of the study's activities.
|
|
73
|
+
# CDE: https://cadsr.cancer.gov/onedata/dmdirect/NIH/NCI/CO/CDEDD?filter=CDEDD.ITEM_ID=11160683%20and%20ver_nr=1
|
|
74
|
+
primary_purpose_type: PrimaryPurposeType
|
|
75
|
+
|
|
72
76
|
# The image of the trial data schema
|
|
73
77
|
schema_file_id: int | None = None
|
|
74
78
|
|
|
@@ -81,11 +85,5 @@ class Trial(Base):
|
|
|
81
85
|
# The list of assays that CIDC expects to receive for this trial.
|
|
82
86
|
expected_assays: List[AssayType] = []
|
|
83
87
|
|
|
84
|
-
# Is the cancer studying a liquid tumor type?
|
|
85
|
-
is_liquid_tumor_trial: bool = False
|
|
86
|
-
|
|
87
88
|
# The dbgap study accession number associated with the trial.
|
|
88
|
-
dbgap_study_accession: str
|
|
89
|
-
|
|
90
|
-
# The internal version identifier for this specific trial dataset.
|
|
91
|
-
version: str
|
|
89
|
+
dbgap_study_accession: str
|
cidc_api/models/types.py
CHANGED
|
@@ -26,6 +26,7 @@ AgeGroup = Literal[
|
|
|
26
26
|
"Pediatric",
|
|
27
27
|
]
|
|
28
28
|
|
|
29
|
+
|
|
29
30
|
TrialOrganization = Literal[
|
|
30
31
|
"ECOG-ACRIN",
|
|
31
32
|
"SWOG",
|
|
@@ -59,6 +60,34 @@ TrialFundingAgency = Literal[
|
|
|
59
60
|
]
|
|
60
61
|
|
|
61
62
|
|
|
63
|
+
PrimaryPurposeType = Literal[
|
|
64
|
+
"Adverse Effect Mitigation Study",
|
|
65
|
+
"Ancillary Study",
|
|
66
|
+
"Basic Science Research ",
|
|
67
|
+
"Correlative Study",
|
|
68
|
+
"Cure Study",
|
|
69
|
+
"Device Feasibility Study",
|
|
70
|
+
"Diagnosis Study",
|
|
71
|
+
"Disease Modifying Treatment Study",
|
|
72
|
+
"Early Detection Study",
|
|
73
|
+
"Education Training Clinical Study",
|
|
74
|
+
"Epidemiology Research ",
|
|
75
|
+
"Genomics Research",
|
|
76
|
+
"Health Services Research",
|
|
77
|
+
"Imaging Research",
|
|
78
|
+
"Interventional Study",
|
|
79
|
+
"Observational Study",
|
|
80
|
+
"Outcomes Research",
|
|
81
|
+
"Prevention Study",
|
|
82
|
+
"Proteomic Research",
|
|
83
|
+
"Rehabilitation Clinical Study ",
|
|
84
|
+
"Screening Study",
|
|
85
|
+
"Supportive Care Study",
|
|
86
|
+
"Transcriptomics Research",
|
|
87
|
+
"Treatment Study",
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
|
|
62
91
|
AssayType = Literal[
|
|
63
92
|
"Olink",
|
|
64
93
|
"WES",
|
|
@@ -84,6 +113,7 @@ AssayType = Literal[
|
|
|
84
113
|
"snRNA-Seq",
|
|
85
114
|
"Visium",
|
|
86
115
|
"Olink HT",
|
|
116
|
+
"TCRseq RNA",
|
|
87
117
|
]
|
|
88
118
|
|
|
89
119
|
|
|
@@ -285,7 +315,6 @@ CancerStageAJCC = Literal[
|
|
|
285
315
|
|
|
286
316
|
|
|
287
317
|
CancerStageFIGO = Literal[
|
|
288
|
-
"value",
|
|
289
318
|
"Stage I",
|
|
290
319
|
"Stage IA",
|
|
291
320
|
"Stage IA1",
|
|
@@ -1016,21 +1045,6 @@ GVHDDiagnosisChronicGlobalSeverity = Literal["Mild", "Moderate", "Severe"]
|
|
|
1016
1045
|
GVHDOrganChronicScore = Literal["0", "1", "2", "3"]
|
|
1017
1046
|
|
|
1018
1047
|
|
|
1019
|
-
PriorTreatmentType = Literal[
|
|
1020
|
-
"Surgery",
|
|
1021
|
-
"Radiotherapy",
|
|
1022
|
-
"Immunotherapy",
|
|
1023
|
-
"Chemotherapy",
|
|
1024
|
-
"Targeted therapy",
|
|
1025
|
-
"Other therapy",
|
|
1026
|
-
"Radiopharmaceutical",
|
|
1027
|
-
"Stem cell transplant",
|
|
1028
|
-
"Immunosuppressive therapy/GVHD prophylaxis for transplant",
|
|
1029
|
-
"Conditioning therapy",
|
|
1030
|
-
"Post-transplant salvage therapy",
|
|
1031
|
-
]
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
1048
|
ConditioningRegimenType = Literal["Myeloablative", "Reduced-intensity", "Non-myeloablative", "Other"]
|
|
1035
1049
|
|
|
1036
1050
|
StemCellDonorType = Literal["Autologous", "Allogeneic"]
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from urllib.parse import quote
|
|
3
|
+
|
|
4
|
+
from werkzeug.exceptions import BadRequest
|
|
5
|
+
|
|
6
|
+
from cidc_api.models import IngestionJobs
|
|
7
|
+
from . import gcloud_client
|
|
8
|
+
from ..shared.auth import get_current_user
|
|
9
|
+
|
|
10
|
+
JOB_TYPE_ASSAY = "assay"
|
|
11
|
+
JOB_TYPE_CLINICAL = "clinical"
|
|
12
|
+
ALLOWED_JOB_TYPES = {JOB_TYPE_CLINICAL, JOB_TYPE_ASSAY}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def resolve_job_type_and_assay_fields(data: dict) -> tuple[str, str | None, str | None]:
|
|
16
|
+
"""Decide job_type and gather assay_type/batch_id from request JSON."""
|
|
17
|
+
assay_type = data.get("assay_type")
|
|
18
|
+
# If job_type is assay or assay_type is present, treat this as an assay job.
|
|
19
|
+
job_type = data.get("job_type") or (JOB_TYPE_ASSAY if assay_type else JOB_TYPE_CLINICAL)
|
|
20
|
+
|
|
21
|
+
if job_type not in ALLOWED_JOB_TYPES:
|
|
22
|
+
raise BadRequest("Invalid job_type. Allowed values are 'clinical' or 'assay'.")
|
|
23
|
+
|
|
24
|
+
if job_type == JOB_TYPE_ASSAY and (not assay_type or not isinstance(assay_type, str)):
|
|
25
|
+
raise BadRequest("assay_type must be provided for job_type='assay'.")
|
|
26
|
+
|
|
27
|
+
assay_type = assay_type.strip() if assay_type else None
|
|
28
|
+
batch_id = data.get("batch_id").strip() if isinstance(data.get("batch_id"), str) else None
|
|
29
|
+
|
|
30
|
+
return job_type, assay_type, batch_id
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def prepare_assay_job(trial_id: str, assay_type: str, batch_id: str) -> tuple[str, str, str, datetime, int, str]:
|
|
34
|
+
"""
|
|
35
|
+
Validate assay job uniqueness and generate submission_id, start_date, version, and the trial’s GCS intake path.
|
|
36
|
+
"""
|
|
37
|
+
if not assay_type:
|
|
38
|
+
raise BadRequest("assay_type must be provided for job_type='assay'.")
|
|
39
|
+
|
|
40
|
+
# Enforce uniqueness of (trial_id, assay_type, batch_id) when batch_id is present.
|
|
41
|
+
if batch_id:
|
|
42
|
+
existing_job = IngestionJobs.get_unique_assay_job(trial_id, assay_type, batch_id)
|
|
43
|
+
if existing_job:
|
|
44
|
+
raise BadRequest(
|
|
45
|
+
f"Assay job {existing_job.id} already exists for this exact trial_id/assay_type/batch_id combination."
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
submission_id = IngestionJobs.next_assay_submission_id(trial_id, assay_type)
|
|
49
|
+
job_status = "INITIAL SUBMISSION"
|
|
50
|
+
error_status = "Upload Incomplete" # job starts with 'Incomplete' notifier
|
|
51
|
+
start_date = datetime.now()
|
|
52
|
+
version = 1
|
|
53
|
+
|
|
54
|
+
# Create or retrieve intake bucket corresponding to the trial
|
|
55
|
+
intake_bucket = gcloud_client.create_intake_bucket(get_current_user().email, trial_id=trial_id)
|
|
56
|
+
gcs_path = f"{intake_bucket.name}/{assay_type}/{submission_id}"
|
|
57
|
+
|
|
58
|
+
return submission_id, job_status, error_status, start_date, version, gcs_path
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_google_links(intake_path: str) -> tuple[str, str]:
|
|
62
|
+
"""Build the GCS URI and GCS Console URL corresponding to the intake path."""
|
|
63
|
+
gcs_uri = f"gs://{intake_path}"
|
|
64
|
+
# Encode path to ensure link opens correctly
|
|
65
|
+
encoded_path = quote(intake_path)
|
|
66
|
+
console_url = f"https://console.cloud.google.com/storage/browser/{encoded_path}"
|
|
67
|
+
|
|
68
|
+
return gcs_uri, console_url
|
cidc_api/shared/auth.py
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
from functools import wraps
|
|
2
2
|
from typing import List
|
|
3
3
|
|
|
4
|
-
from packaging import version
|
|
5
|
-
|
|
6
4
|
from flask import g, request, current_app as app, Flask
|
|
5
|
+
from packaging import version
|
|
7
6
|
from werkzeug.exceptions import Unauthorized, BadRequest, PreconditionFailed
|
|
8
7
|
|
|
9
|
-
from ..models import Users, UserSchema
|
|
10
|
-
|
|
11
8
|
from ..config.logging import get_logger
|
|
12
|
-
|
|
9
|
+
from ..models import Users, UserSchema
|
|
13
10
|
from ..shared.jose import decode_id_token
|
|
11
|
+
from ..telemetry import trace_
|
|
14
12
|
|
|
15
13
|
logger = get_logger(__name__)
|
|
16
14
|
|
|
@@ -144,6 +142,7 @@ def get_current_user() -> Users:
|
|
|
144
142
|
_user_schema = UserSchema()
|
|
145
143
|
|
|
146
144
|
|
|
145
|
+
@trace_()
|
|
147
146
|
def authenticate() -> Users:
|
|
148
147
|
id_token = _extract_token()
|
|
149
148
|
token_payload = decode_id_token(id_token)
|
|
@@ -172,6 +171,7 @@ def _extract_token() -> str:
|
|
|
172
171
|
|
|
173
172
|
|
|
174
173
|
### Authorization logic ###
|
|
174
|
+
@trace_()
|
|
175
175
|
def authorize(user: Users, allowed_roles: List[str], resource: str, method: str) -> bool:
|
|
176
176
|
"""Check if the current user is authorized to act on the current request's resource.
|
|
177
177
|
Raises Unauthorized
|
cidc_api/shared/file_handling.py
CHANGED
|
@@ -10,22 +10,33 @@ from ..config.settings import GOOGLE_CLINICAL_DATA_BUCKET
|
|
|
10
10
|
from ..models import PreprocessedFiles, TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER
|
|
11
11
|
from ..shared.auth import get_current_user
|
|
12
12
|
from ..shared.gcloud_client import upload_file_to_gcs, move_gcs_file
|
|
13
|
+
from ..telemetry import trace_
|
|
13
14
|
|
|
14
15
|
logger = get_logger(__name__)
|
|
15
16
|
|
|
16
17
|
|
|
18
|
+
@trace_()
|
|
17
19
|
def set_current_file(
|
|
18
|
-
file: FileStorage,
|
|
20
|
+
file: FileStorage,
|
|
21
|
+
file_category: str,
|
|
22
|
+
gcs_folder: str,
|
|
23
|
+
session: Session,
|
|
24
|
+
uploader_email: str,
|
|
25
|
+
job_id: int = None,
|
|
26
|
+
append_timestamp: bool = None,
|
|
19
27
|
) -> PreprocessedFiles:
|
|
20
28
|
"""
|
|
21
29
|
Archives any existing 'current' files for the given category and job,
|
|
22
30
|
then uploads the new file as the latest 'current' version.
|
|
23
31
|
"""
|
|
24
32
|
latest_version = PreprocessedFiles.archive_current_files(file_category, job_id=job_id, session=session)
|
|
25
|
-
latest_file = create_file(
|
|
33
|
+
latest_file = create_file(
|
|
34
|
+
file, gcs_folder, file_category, session, uploader_email, job_id, latest_version + 1, append_timestamp
|
|
35
|
+
)
|
|
26
36
|
return latest_file
|
|
27
37
|
|
|
28
38
|
|
|
39
|
+
@trace_()
|
|
29
40
|
def create_file(
|
|
30
41
|
file: FileStorage,
|
|
31
42
|
gcs_folder: str,
|
|
@@ -34,11 +45,12 @@ def create_file(
|
|
|
34
45
|
uploader_email: str,
|
|
35
46
|
job_id: int = None,
|
|
36
47
|
version: int = None,
|
|
48
|
+
append_timestamp: bool = None,
|
|
37
49
|
) -> PreprocessedFiles:
|
|
38
50
|
"""Upload file to GCS and create corresponding metadata record in the database."""
|
|
39
51
|
status = "pending" if gcs_folder.endswith("pending/") else "current"
|
|
40
|
-
# only need timestamp for current/versioned files
|
|
41
|
-
append_timestamp = status == "current"
|
|
52
|
+
# only need timestamp for current/versioned files, if not specified otherwise
|
|
53
|
+
append_timestamp = append_timestamp if append_timestamp is not None else (status == "current")
|
|
42
54
|
# create file in GCS
|
|
43
55
|
gcs_file_path = upload_file_to_gcs(file, GOOGLE_CLINICAL_DATA_BUCKET, gcs_folder, append_timestamp=append_timestamp)
|
|
44
56
|
# create corresponding record in db
|
cidc_api/shared/gcloud_client.py
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
"""Utilities for interacting with the Google Cloud Platform APIs."""
|
|
2
2
|
|
|
3
|
-
# pylint: disable=logging-fstring-interpolation,too-many-lines
|
|
3
|
+
# pylint: disable=logging-fstring-interpolation,too-many-lines, broad-exception-raised
|
|
4
4
|
|
|
5
|
+
import asyncio
|
|
5
6
|
import base64
|
|
6
7
|
import datetime
|
|
7
8
|
import hashlib
|
|
8
9
|
import io
|
|
9
10
|
import json
|
|
10
11
|
import os
|
|
12
|
+
import re
|
|
11
13
|
import warnings
|
|
12
14
|
from collections import namedtuple
|
|
13
15
|
from concurrent.futures import Future
|
|
@@ -25,6 +27,8 @@ from typing import (
|
|
|
25
27
|
)
|
|
26
28
|
|
|
27
29
|
import googleapiclient.discovery
|
|
30
|
+
from gcloud.aio.storage import Storage
|
|
31
|
+
from pandas.core.frame import DataFrame
|
|
28
32
|
import pandas as pd
|
|
29
33
|
import requests
|
|
30
34
|
from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
|
|
@@ -56,6 +60,7 @@ from ..config.settings import (
|
|
|
56
60
|
GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC,
|
|
57
61
|
GOOGLE_HL_CLINICAL_VALIDATION_TOPIC,
|
|
58
62
|
GOOGLE_DL_CLINICAL_VALIDATION_TOPIC,
|
|
63
|
+
GOOGLE_ASSAY_METADATA_VALIDATION_TOPIC,
|
|
59
64
|
TESTING,
|
|
60
65
|
ENV,
|
|
61
66
|
IS_EMAIL_ON,
|
|
@@ -361,15 +366,34 @@ def get_intake_bucket_name(user_email: str) -> str:
|
|
|
361
366
|
return bucket_name
|
|
362
367
|
|
|
363
368
|
|
|
364
|
-
def
|
|
369
|
+
def get_trial_intake_bucket_name(trial_id: str) -> str:
|
|
365
370
|
"""
|
|
366
|
-
|
|
371
|
+
Return a sanitized GCS bucket name for a given trial_id.
|
|
372
|
+
|
|
373
|
+
Produces: <GOOGLE_INTAKE_BUCKET>-<sanitized_trial_id>
|
|
374
|
+
where the trial_id segment is lowercased and restricted to [a-z0-9-].
|
|
375
|
+
"""
|
|
376
|
+
# Replace non-allowed bucket chars with "-"
|
|
377
|
+
sanitized_id = re.sub(r"[^a-z0-9-]", "-", trial_id.lower())
|
|
378
|
+
# Collapse repeated "-" and trim from both ends
|
|
379
|
+
sanitized_id = re.sub(r"-+", "-", sanitized_id).strip("-")
|
|
380
|
+
|
|
381
|
+
return f"{GOOGLE_INTAKE_BUCKET}-{sanitized_id}"
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def create_intake_bucket(user_email: str, trial_id: str = None) -> storage.Bucket:
|
|
385
|
+
"""
|
|
386
|
+
Create (or retrieve) the appropriate data intake bucket.
|
|
387
|
+
If a trial_id is provided, a trial-specific bucket is used;
|
|
388
|
+
otherwise a user-specific intake bucket is used.
|
|
389
|
+
|
|
367
390
|
Grant the user GCS object admin permissions on the bucket, or refresh those
|
|
368
391
|
permissions if they've already been granted.
|
|
369
392
|
Created with uniform bucket-level IAM access, so expiring permission.
|
|
370
393
|
"""
|
|
371
394
|
storage_client = _get_storage_client()
|
|
372
|
-
|
|
395
|
+
# Get trial-specific bucket name if trial_id is given, otherwise a user-specific bucket name.
|
|
396
|
+
bucket_name = get_trial_intake_bucket_name(trial_id) if trial_id else get_intake_bucket_name(user_email)
|
|
373
397
|
bucket = storage_client.bucket(bucket_name)
|
|
374
398
|
|
|
375
399
|
if not bucket.exists():
|
|
@@ -423,25 +447,50 @@ def upload_xlsx_to_intake_bucket(user_email: str, trial_id: str, upload_type: st
|
|
|
423
447
|
return f"https://console.cloud.google.com/storage/browser/_details/{bucket_name}/{blob_name}"
|
|
424
448
|
|
|
425
449
|
|
|
426
|
-
def
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
if blob_name[-3:] == "csv":
|
|
432
|
-
return strip_whitespaces(pd.read_csv(temp_file))
|
|
433
|
-
elif blob_name[-4:] == "xlsx":
|
|
434
|
-
return strip_whitespaces(pd.read_excel(temp_file))
|
|
450
|
+
def prepare_dataframe(extension, bytes) -> DataFrame:
|
|
451
|
+
if extension == "csv":
|
|
452
|
+
return strip_whitespaces(pd.read_csv(bytes, dtype=str, keep_default_na=False))
|
|
453
|
+
elif extension == "xlsx":
|
|
454
|
+
return strip_whitespaces(pd.read_excel(bytes, dtype=str, keep_default_na=False))
|
|
435
455
|
else:
|
|
436
456
|
raise Exception("Can only read csv or xlsx files")
|
|
437
457
|
|
|
438
458
|
|
|
459
|
+
def gcs_xlsx_or_csv_file_to_pandas_dataframe(bucket_name: str, blob_name: str) -> DataFrame:
|
|
460
|
+
"""Reads an XLSX or CSV file from Google Cloud Storage into a Pandas DataFrame."""
|
|
461
|
+
contents = get_file_bytes_from_gcs(bucket_name, blob_name)
|
|
462
|
+
extension = blob_name.split(".")[-1]
|
|
463
|
+
return prepare_dataframe(extension, contents)
|
|
464
|
+
|
|
465
|
+
|
|
439
466
|
def get_file_bytes_from_gcs(bucket_name: str, blob_name: str) -> io.BytesIO:
|
|
440
467
|
"""Reads a file from Google Cloud Storage and returns it as BytesIO."""
|
|
441
468
|
sheet_data = storage.Client().bucket(bucket_name).blob(blob_name).download_as_bytes()
|
|
442
469
|
return io.BytesIO(sheet_data)
|
|
443
470
|
|
|
444
471
|
|
|
472
|
+
async def async_gcs_files_to_pandas_dataframes(bucket_name: str, blob_names: List[str]) -> List[DataFrame]:
|
|
473
|
+
"""Async reads a XLSX or CSV files from Google Cloud Storage into a list of Pandas DataFrames."""
|
|
474
|
+
|
|
475
|
+
all_contents = await asyncio.gather(
|
|
476
|
+
*[async_get_file_bytes_from_gcs(bucket_name, blob_name) for blob_name in blob_names]
|
|
477
|
+
)
|
|
478
|
+
dataframes = []
|
|
479
|
+
|
|
480
|
+
for blob_name, contents in zip(blob_names, all_contents):
|
|
481
|
+
extension = blob_name.split(".")[-1]
|
|
482
|
+
dataframes.append(prepare_dataframe(extension, contents))
|
|
483
|
+
return dataframes
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
async def async_get_file_bytes_from_gcs(bucket_name: str, blob_name: str) -> io.BytesIO:
|
|
487
|
+
"""Async reads a file from Google Cloud Storage and returns it as BytesIO."""
|
|
488
|
+
|
|
489
|
+
async with Storage() as client:
|
|
490
|
+
sheet_data = await client.download(bucket_name, blob_name)
|
|
491
|
+
return io.BytesIO(sheet_data)
|
|
492
|
+
|
|
493
|
+
|
|
445
494
|
def _execute_multiblob_acl_change(
|
|
446
495
|
user_email_list: List[str],
|
|
447
496
|
blob_list: List[storage.Blob],
|
|
@@ -614,6 +663,7 @@ def _build_trial_upload_prefixes(
|
|
|
614
663
|
trial_set: Set[str] = set()
|
|
615
664
|
upload_set: Set[str] = set()
|
|
616
665
|
if not trial_id:
|
|
666
|
+
# import is here becasue of circular import
|
|
617
667
|
from ..models.models import TrialMetadata
|
|
618
668
|
|
|
619
669
|
trial_set = {str(t.trial_id) for t in session.query(TrialMetadata).add_columns(TrialMetadata.trial_id)}
|
|
@@ -886,6 +936,7 @@ def get_signed_url(
|
|
|
886
936
|
bucket_name: str = GOOGLE_ACL_DATA_BUCKET,
|
|
887
937
|
method: str = "GET",
|
|
888
938
|
expiry_mins: int = 30,
|
|
939
|
+
use_short_filename: bool = False,
|
|
889
940
|
) -> str:
|
|
890
941
|
"""
|
|
891
942
|
Generate a signed URL for `object_name` to give a client temporary access.
|
|
@@ -900,7 +951,11 @@ def get_signed_url(
|
|
|
900
951
|
|
|
901
952
|
# Generate the signed URL, allowing a client to use `method` for `expiry_mins` minutes
|
|
902
953
|
expiration = datetime.timedelta(minutes=expiry_mins)
|
|
903
|
-
|
|
954
|
+
if use_short_filename:
|
|
955
|
+
filename = os.path.basename(object_name)
|
|
956
|
+
else:
|
|
957
|
+
# full filename with path included
|
|
958
|
+
filename = object_name.replace("/", "_").replace('"', "_").replace(" ", "_")
|
|
904
959
|
other_kwargs = {}
|
|
905
960
|
if os.environ.get("DEV_GOOGLE_STORAGE", None):
|
|
906
961
|
other_kwargs["api_access_endpoint"] = (os.environ.get("DEV_GOOGLE_STORAGE") or "") + (
|
|
@@ -910,7 +965,7 @@ def get_signed_url(
|
|
|
910
965
|
version="v2",
|
|
911
966
|
expiration=expiration,
|
|
912
967
|
method=method,
|
|
913
|
-
response_disposition=f'attachment; filename="{
|
|
968
|
+
response_disposition=f'attachment; filename="{filename}"',
|
|
914
969
|
**other_kwargs,
|
|
915
970
|
)
|
|
916
971
|
logger.info(f"generated signed URL for {object_name}: {url}")
|
|
@@ -920,7 +975,8 @@ def get_signed_url(
|
|
|
920
975
|
|
|
921
976
|
def _encode_and_publish(content: str, topic: str) -> Future:
|
|
922
977
|
"""Convert `content` to bytes and publish it to `topic`."""
|
|
923
|
-
|
|
978
|
+
publisher_options = pubsub.types.PublisherOptions(enable_open_telemetry_tracing=ENV == "dev-int")
|
|
979
|
+
pubsub_publisher = pubsub.PublisherClient(publisher_options=publisher_options)
|
|
924
980
|
topic = pubsub_publisher.topic_path(GOOGLE_CLOUD_PROJECT, topic)
|
|
925
981
|
data = bytes(content, "utf-8")
|
|
926
982
|
|
|
@@ -994,6 +1050,12 @@ def publish_detailed_validation(job_id: int) -> None:
|
|
|
994
1050
|
_report = _encode_and_publish(str(job_id), GOOGLE_DL_CLINICAL_VALIDATION_TOPIC)
|
|
995
1051
|
|
|
996
1052
|
|
|
1053
|
+
def publish_assay_metadata_validation(job_id: int) -> None:
|
|
1054
|
+
"""Publish to the assay_metadata_validation topic that a job's assay metadata file is ready to be validated."""
|
|
1055
|
+
# Start validation asynchronously
|
|
1056
|
+
_report = _encode_and_publish(str(job_id), GOOGLE_ASSAY_METADATA_VALIDATION_TOPIC)
|
|
1057
|
+
|
|
1058
|
+
|
|
997
1059
|
def send_email(to_emails: List[str], subject: str, html_content: str, **kw) -> None:
|
|
998
1060
|
"""
|
|
999
1061
|
Publish an email-to-send to the emails topic.
|