nci-cidc-api-modules 1.2.34__py3-none-any.whl → 1.2.45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. cidc_api/__init__.py +1 -0
  2. cidc_api/config/db.py +21 -1
  3. cidc_api/config/settings.py +1 -0
  4. cidc_api/models/__init__.py +0 -2
  5. cidc_api/models/data.py +15 -6
  6. cidc_api/models/db/stage1/__init__.py +56 -0
  7. cidc_api/models/db/stage1/additional_treatment_orm.py +22 -0
  8. cidc_api/models/db/stage1/adverse_event_orm.py +46 -0
  9. cidc_api/models/db/stage1/base_orm.py +7 -0
  10. cidc_api/models/db/stage1/baseline_clinical_assessment_orm.py +22 -0
  11. cidc_api/models/db/stage1/comorbidity_orm.py +23 -0
  12. cidc_api/models/db/stage1/consent_group_orm.py +32 -0
  13. cidc_api/models/db/stage1/demographic_orm.py +47 -0
  14. cidc_api/models/db/stage1/disease_orm.py +52 -0
  15. cidc_api/models/db/stage1/exposure_orm.py +22 -0
  16. cidc_api/models/db/stage1/gvhd_diagnosis_acute_orm.py +34 -0
  17. cidc_api/models/db/stage1/gvhd_diagnosis_chronic_orm.py +36 -0
  18. cidc_api/models/db/stage1/gvhd_organ_acute_orm.py +21 -0
  19. cidc_api/models/db/stage1/gvhd_organ_chronic_orm.py +21 -0
  20. cidc_api/models/db/stage1/medical_history_orm.py +30 -0
  21. cidc_api/models/db/stage1/other_malignancy_orm.py +29 -0
  22. cidc_api/models/db/stage1/participant_orm.py +77 -0
  23. cidc_api/models/db/stage1/prior_treatment_orm.py +29 -0
  24. cidc_api/models/db/stage1/radiotherapy_dose_orm.py +39 -0
  25. cidc_api/models/db/stage1/response_by_system_orm.py +30 -0
  26. cidc_api/models/db/stage1/response_orm.py +28 -0
  27. cidc_api/models/db/stage1/specimen_orm.py +46 -0
  28. cidc_api/models/db/stage1/stem_cell_transplant_orm.py +25 -0
  29. cidc_api/models/db/stage1/surgery_orm.py +27 -0
  30. cidc_api/models/db/stage1/therapy_agent_dose_orm.py +31 -0
  31. cidc_api/models/db/stage1/treatment_orm.py +38 -0
  32. cidc_api/models/db/stage1/trial_orm.py +35 -0
  33. cidc_api/models/db/stage2/additional_treatment_orm.py +6 -7
  34. cidc_api/models/db/stage2/administrative_person_orm.py +4 -4
  35. cidc_api/models/db/stage2/administrative_role_assignment_orm.py +4 -4
  36. cidc_api/models/db/stage2/adverse_event_orm.py +11 -13
  37. cidc_api/models/db/stage2/arm_orm.py +3 -3
  38. cidc_api/models/db/stage2/base_orm.py +7 -0
  39. cidc_api/models/db/stage2/baseline_clinical_assessment_orm.py +5 -7
  40. cidc_api/models/db/stage2/cohort_orm.py +3 -3
  41. cidc_api/models/db/stage2/comorbidity_orm.py +6 -8
  42. cidc_api/models/db/stage2/consent_group_orm.py +4 -4
  43. cidc_api/models/db/stage2/contact_orm.py +16 -20
  44. cidc_api/models/db/stage2/demographic_orm.py +3 -3
  45. cidc_api/models/db/stage2/disease_orm.py +4 -4
  46. cidc_api/models/db/stage2/exposure_orm.py +3 -3
  47. cidc_api/models/db/stage2/file_orm.py +6 -9
  48. cidc_api/models/db/stage2/gvhd_diagnosis_acute_orm.py +4 -4
  49. cidc_api/models/db/stage2/gvhd_diagnosis_chronic_orm.py +4 -6
  50. cidc_api/models/db/stage2/gvhd_organ_acute_orm.py +3 -3
  51. cidc_api/models/db/stage2/gvhd_organ_chronic_orm.py +3 -3
  52. cidc_api/models/db/stage2/institution_orm.py +7 -7
  53. cidc_api/models/db/stage2/medical_history_orm.py +9 -9
  54. cidc_api/models/db/stage2/other_clinical_endpoint_orm.py +8 -12
  55. cidc_api/models/db/stage2/other_malignancy_orm.py +8 -10
  56. cidc_api/models/db/stage2/participant_orm.py +23 -24
  57. cidc_api/models/db/stage2/prior_treatment_orm.py +12 -13
  58. cidc_api/models/db/stage2/publication_orm.py +9 -11
  59. cidc_api/models/db/stage2/radiotherapy_dose_orm.py +8 -9
  60. cidc_api/models/db/stage2/response_by_system_orm.py +3 -3
  61. cidc_api/models/db/stage2/response_orm.py +3 -3
  62. cidc_api/models/db/stage2/shipment_orm.py +17 -17
  63. cidc_api/models/db/stage2/shipment_specimen_orm.py +4 -4
  64. cidc_api/models/db/stage2/specimen_orm.py +7 -6
  65. cidc_api/models/db/stage2/stem_cell_transplant_orm.py +6 -7
  66. cidc_api/models/db/stage2/surgery_orm.py +6 -7
  67. cidc_api/models/db/stage2/therapy_agent_dose_orm.py +7 -8
  68. cidc_api/models/db/stage2/treatment_orm.py +15 -15
  69. cidc_api/models/db/stage2/trial_orm.py +15 -17
  70. cidc_api/models/files/facets.py +4 -0
  71. cidc_api/models/models.py +153 -9
  72. cidc_api/models/pydantic/{stage2/base.py → base.py} +1 -1
  73. cidc_api/models/pydantic/stage1/__init__.py +56 -0
  74. cidc_api/models/pydantic/stage1/additional_treatment.py +23 -0
  75. cidc_api/models/pydantic/stage1/adverse_event.py +100 -0
  76. cidc_api/models/pydantic/stage1/baseline_clinical_assessment.py +23 -0
  77. cidc_api/models/pydantic/stage1/comorbidity.py +36 -0
  78. cidc_api/models/pydantic/stage1/consent_group.py +30 -0
  79. cidc_api/models/pydantic/stage1/demographic.py +123 -0
  80. cidc_api/models/pydantic/stage1/disease.py +158 -0
  81. cidc_api/models/pydantic/stage1/exposure.py +32 -0
  82. cidc_api/models/pydantic/stage1/gvhd_diagnosis_acute.py +33 -0
  83. cidc_api/models/pydantic/stage1/gvhd_diagnosis_chronic.py +32 -0
  84. cidc_api/models/pydantic/stage1/gvhd_organ_acute.py +22 -0
  85. cidc_api/models/pydantic/stage1/gvhd_organ_chronic.py +23 -0
  86. cidc_api/models/pydantic/stage1/medical_history.py +36 -0
  87. cidc_api/models/pydantic/stage1/other_malignancy.py +49 -0
  88. cidc_api/models/pydantic/stage1/participant.py +51 -0
  89. cidc_api/models/pydantic/stage1/prior_treatment.py +45 -0
  90. cidc_api/models/pydantic/stage1/radiotherapy_dose.py +79 -0
  91. cidc_api/models/pydantic/stage1/response.py +65 -0
  92. cidc_api/models/pydantic/stage1/response_by_system.py +112 -0
  93. cidc_api/models/pydantic/stage1/specimen.py +31 -0
  94. cidc_api/models/pydantic/stage1/stem_cell_transplant.py +35 -0
  95. cidc_api/models/pydantic/stage1/surgery.py +49 -0
  96. cidc_api/models/pydantic/stage1/therapy_agent_dose.py +67 -0
  97. cidc_api/models/pydantic/stage1/treatment.py +50 -0
  98. cidc_api/models/pydantic/stage1/trial.py +45 -0
  99. cidc_api/models/pydantic/stage2/additional_treatment.py +2 -4
  100. cidc_api/models/pydantic/stage2/administrative_person.py +1 -1
  101. cidc_api/models/pydantic/stage2/administrative_role_assignment.py +2 -2
  102. cidc_api/models/pydantic/stage2/adverse_event.py +1 -1
  103. cidc_api/models/pydantic/stage2/arm.py +2 -2
  104. cidc_api/models/pydantic/stage2/baseline_clinical_assessment.py +1 -1
  105. cidc_api/models/pydantic/stage2/cohort.py +1 -1
  106. cidc_api/models/pydantic/stage2/comorbidity.py +1 -1
  107. cidc_api/models/pydantic/stage2/consent_group.py +2 -2
  108. cidc_api/models/pydantic/stage2/contact.py +1 -1
  109. cidc_api/models/pydantic/stage2/demographic.py +1 -1
  110. cidc_api/models/pydantic/stage2/disease.py +1 -1
  111. cidc_api/models/pydantic/stage2/exposure.py +1 -1
  112. cidc_api/models/pydantic/stage2/file.py +2 -2
  113. cidc_api/models/pydantic/stage2/gvhd_diagnosis_acute.py +1 -1
  114. cidc_api/models/pydantic/stage2/gvhd_diagnosis_chronic.py +1 -1
  115. cidc_api/models/pydantic/stage2/gvhd_organ_acute.py +1 -1
  116. cidc_api/models/pydantic/stage2/gvhd_organ_chronic.py +1 -1
  117. cidc_api/models/pydantic/stage2/institution.py +1 -1
  118. cidc_api/models/pydantic/stage2/medical_history.py +1 -1
  119. cidc_api/models/pydantic/stage2/other_clinical_endpoint.py +1 -1
  120. cidc_api/models/pydantic/stage2/other_malignancy.py +1 -1
  121. cidc_api/models/pydantic/stage2/participant.py +6 -3
  122. cidc_api/models/pydantic/stage2/prior_treatment.py +6 -15
  123. cidc_api/models/pydantic/stage2/publication.py +2 -2
  124. cidc_api/models/pydantic/stage2/radiotherapy_dose.py +1 -1
  125. cidc_api/models/pydantic/stage2/response.py +2 -2
  126. cidc_api/models/pydantic/stage2/response_by_system.py +1 -1
  127. cidc_api/models/pydantic/stage2/shipment.py +2 -2
  128. cidc_api/models/pydantic/stage2/shipment_specimen.py +1 -1
  129. cidc_api/models/pydantic/stage2/specimen.py +6 -3
  130. cidc_api/models/pydantic/stage2/stem_cell_transplant.py +2 -2
  131. cidc_api/models/pydantic/stage2/surgery.py +1 -1
  132. cidc_api/models/pydantic/stage2/therapy_agent_dose.py +1 -1
  133. cidc_api/models/pydantic/stage2/treatment.py +1 -1
  134. cidc_api/models/pydantic/stage2/trial.py +8 -10
  135. cidc_api/models/types.py +30 -16
  136. cidc_api/shared/assay_handling.py +68 -0
  137. cidc_api/shared/auth.py +5 -5
  138. cidc_api/shared/file_handling.py +16 -4
  139. cidc_api/shared/gcloud_client.py +78 -16
  140. cidc_api/shared/utils.py +18 -9
  141. cidc_api/telemetry.py +101 -0
  142. {nci_cidc_api_modules-1.2.34.dist-info → nci_cidc_api_modules-1.2.45.dist-info}/METADATA +21 -12
  143. nci_cidc_api_modules-1.2.45.dist-info/RECORD +165 -0
  144. cidc_api/models/db/base_orm.py +0 -25
  145. nci_cidc_api_modules-1.2.34.dist-info/RECORD +0 -109
  146. {nci_cidc_api_modules-1.2.34.dist-info → nci_cidc_api_modules-1.2.45.dist-info}/WHEEL +0 -0
  147. {nci_cidc_api_modules-1.2.34.dist-info → nci_cidc_api_modules-1.2.45.dist-info}/licenses/LICENSE +0 -0
  148. {nci_cidc_api_modules-1.2.34.dist-info → nci_cidc_api_modules-1.2.45.dist-info}/top_level.txt +0 -0
@@ -2,8 +2,8 @@ from typing import Self, Annotated, List
2
2
 
3
3
  from pydantic import NonPositiveInt, NegativeInt, model_validator, BeforeValidator
4
4
 
5
- from .base import Base
6
- from cidc_api.models.types import PriorTreatmentType, ConditioningRegimenType, StemCellDonorType
5
+ from cidc_api.models.pydantic.base import Base
6
+ from cidc_api.models.types import ConditioningRegimenType, StemCellDonorType
7
7
 
8
8
 
9
9
  class PriorTreatment(Base):
@@ -24,12 +24,9 @@ class PriorTreatment(Base):
24
24
  # the treatment modality.
25
25
  prior_treatment_days_to_end: NonPositiveInt | None = None
26
26
 
27
- # Specifies the category or kind of prior treatment modality a participant received.
28
- prior_treatment_type: Annotated[List[PriorTreatmentType], BeforeValidator(Base.split_list)]
29
-
30
27
  # Description of the prior treatment such as its full generic name if it is a type of therapy agent,
31
28
  # radiotherapy procedure name and location, or surgical procedure name and location.
32
- prior_treatment_description: str | None = None
29
+ prior_treatment_description: str
33
30
 
34
31
  # Best response from any response assessment system to the prior treatment if available or applicable.
35
32
  prior_treatment_best_response: str | None = None
@@ -41,12 +38,6 @@ class PriorTreatment(Base):
41
38
  # If prior treatment is "Stem cell transplant", indicates what stem cell donor type used.
42
39
  prior_treatment_stem_cell_donor_type: StemCellDonorType | None = None
43
40
 
44
- # If prior treatment is "Stem cell transplant", indicates the number of days from the transplant
45
- # date to the start of the current treatment.
46
- prior_treatment_days_to_prior_transplant: NegativeInt | None = None
47
-
48
- @model_validator(mode="after")
49
- def validate_description_cr(self) -> Self:
50
- if "Other therapy" in self.prior_treatment_type and not self.prior_treatment_description:
51
- raise ValueError('If type is "Other therapy", please provide description.')
52
- return self
41
+ # If prior treatment is "Stem cell transplant", indicates the number of days from enrollment
42
+ # to the prior transplant. This must be a negative number.
43
+ prior_treatment_days_from_transplant_to_treatment_initiation: NegativeInt | None = None
@@ -1,4 +1,4 @@
1
- from .base import Base
1
+ from cidc_api.models.pydantic.base import Base
2
2
 
3
3
 
4
4
  class Publication(Base):
@@ -6,7 +6,7 @@ class Publication(Base):
6
6
  publication_id: int | None = None
7
7
 
8
8
  # The unique internal identifier for the associated Trial record
9
- trial_id: int | None = None
9
+ trial_id: str | None = None
10
10
 
11
11
  # The version number of the trial dataset
12
12
  version: str | None = None
@@ -2,7 +2,7 @@ from typing import Self
2
2
 
3
3
  from pydantic import NonNegativeInt, NonNegativeFloat, model_validator
4
4
 
5
- from .base import Base
5
+ from cidc_api.models.pydantic.base import Base
6
6
  from cidc_api.models.types import (
7
7
  YNU,
8
8
  RadiotherapyProcedure,
@@ -2,7 +2,7 @@ from typing import Self
2
2
 
3
3
  from pydantic import NonNegativeInt, model_validator
4
4
 
5
- from .base import Base
5
+ from cidc_api.models.pydantic.base import Base
6
6
  from cidc_api.models.types import SurvivalStatus, YNUNA, CauseOfDeath
7
7
 
8
8
 
@@ -44,7 +44,7 @@ class Response(Base):
44
44
  evaluable_for_efficacy: bool
45
45
 
46
46
  # Days from enrollment date to the last time the patient's vital status was verified.
47
- days_to_last_vital_status: NonNegativeInt | None = None
47
+ days_to_last_vital_status: NonNegativeInt | None = None # TODO: Needs CR check
48
48
 
49
49
  @model_validator(mode="after")
50
50
  def validate_cause_of_death_cr(self) -> Self:
@@ -2,7 +2,7 @@ from typing import Self
2
2
 
3
3
  from pydantic import PositiveInt, model_validator, NonNegativeInt
4
4
 
5
- from .base import Base
5
+ from cidc_api.models.pydantic.base import Base
6
6
  from cidc_api.models.types import ResponseSystem, ResponseSystemVersion, BestOverallResponse, YNUNA
7
7
 
8
8
 
@@ -1,6 +1,6 @@
1
1
  from datetime import datetime
2
2
 
3
- from .base import Base
3
+ from cidc_api.models.pydantic.base import Base
4
4
  from cidc_api.models.types import AssayPriority, AssayType, Courier, ShipmentCondition, ShipmentQuality
5
5
 
6
6
 
@@ -12,7 +12,7 @@ class Shipment(Base):
12
12
  institution_id: int | None = None
13
13
 
14
14
  # The unique internal identifier for the associated trial.
15
- trial_id: int | None = None
15
+ trial_id: str | None = None
16
16
 
17
17
  # The version number of the trial dataset
18
18
  version: str | None = None
@@ -1,4 +1,4 @@
1
- from .base import Base
1
+ from cidc_api.models.pydantic.base import Base
2
2
 
3
3
 
4
4
  class ShipmentSpecimen(Base):
@@ -1,6 +1,6 @@
1
1
  from datetime import datetime
2
2
 
3
- from .base import Base
3
+ from cidc_api.models.pydantic.base import Base
4
4
  from cidc_api.models.types import (
5
5
  UberonAnatomicalTerm,
6
6
  ICDO3MorphologicalCode,
@@ -66,7 +66,7 @@ class Specimen(Base):
66
66
  # Categorical description of timepoint at which the sample was taken.
67
67
  # CDE: https://cadsr.cancer.gov/onedata/dmdirect/NIH/NCI/CO/CDEDD?filter=CDEDD.ITEM_ID=5899851%20and%20ver_nr=1
68
68
  # Note: CIDC doesn't conform to this CDE's PVs
69
- collection_event_name: str | None = None
69
+ collection_event_name: str
70
70
 
71
71
  # The type of the specimen
72
72
  specimen_type: SpecimenType | None = None
@@ -208,4 +208,7 @@ class Specimen(Base):
208
208
  date_ingested: datetime | None = None
209
209
 
210
210
  # Days from enrollment date to date specimen was collected.
211
- days_to_specimen_collection: int | None = None
211
+ days_to_specimen_collection: int
212
+
213
+ # The location within the body from which a specimen was originally obtained as captured in the Uberon anatomical term.
214
+ organ_site_of_collection: UberonAnatomicalTerm
@@ -1,5 +1,5 @@
1
1
  from pydantic import NonNegativeInt
2
- from .base import Base
2
+ from cidc_api.models.pydantic.base import Base
3
3
  from cidc_api.models.types import (
4
4
  StemCellDonorType,
5
5
  AllogeneicDonorType,
@@ -26,7 +26,7 @@ class StemCellTransplant(Base):
26
26
  allogeneic_donor_type: AllogeneicDonorType | None = None
27
27
 
28
28
  # Source of the stem cells used for transplant.
29
- stem_cells_source: StemCellSource
29
+ stem_cell_source: StemCellSource
30
30
 
31
31
  # Days from the enrollment date to the date of the stem cell transplant.
32
32
  days_to_transplant: NonNegativeInt
@@ -2,7 +2,7 @@ from typing import Self
2
2
 
3
3
  from pydantic import NonNegativeInt, model_validator
4
4
 
5
- from .base import Base
5
+ from cidc_api.models.pydantic.base import Base
6
6
  from cidc_api.models.types import SurgicalProcedure, UberonAnatomicalTerm, YNU
7
7
 
8
8
 
@@ -2,7 +2,7 @@ from typing import Self
2
2
 
3
3
  from pydantic import NonNegativeInt, NonNegativeFloat, PositiveFloat, model_validator
4
4
 
5
- from .base import Base
5
+ from cidc_api.models.pydantic.base import Base
6
6
  from cidc_api.models.types import YNU, TherapyAgentDoseUnits
7
7
 
8
8
 
@@ -2,7 +2,7 @@ from typing import Self
2
2
 
3
3
  from pydantic import model_validator
4
4
 
5
- from .base import Base
5
+ from cidc_api.models.pydantic.base import Base
6
6
  from cidc_api.models.types import YNU, OffTreatmentReason
7
7
 
8
8
 
@@ -2,8 +2,8 @@ from datetime import datetime
2
2
  from pydantic import BeforeValidator
3
3
  from typing import List, Annotated
4
4
 
5
- from .base import Base
6
- from cidc_api.models.types import TrialOrganization, TrialFundingAgency, AssayType, AgeGroup
5
+ from cidc_api.models.pydantic.base import Base
6
+ from cidc_api.models.types import TrialOrganization, TrialFundingAgency, AssayType, AgeGroup, PrimaryPurposeType
7
7
 
8
8
 
9
9
  class Trial(Base):
@@ -12,7 +12,7 @@ class Trial(Base):
12
12
 
13
13
  # The unique identifier for the clinical trial. e.g. "GU16-287","BACCI"
14
14
  # CDE: https://cadsr.cancer.gov/onedata/dmdirect/NIH/NCI/CO/CDEDD?filter=CDEDD.ITEM_ID=5054234%20and%20ver_nr=1
15
- trial_id: str | None = None # TODO: Fix stage2 trial_id to not be nullable, once stage 1 models are complete
15
+ trial_id: str | None = None
16
16
 
17
17
  # The version number of the trial dataset. e.g. "1.0"
18
18
  version: str | None = None
@@ -69,6 +69,10 @@ class Trial(Base):
69
69
  # CDE: https://cadsr.cancer.gov/onedata/dmdirect/NIH/NCI/CO/CDEDD?filter=CDEDD.ITEM_ID=16333703%20and%20ver_nr=1
70
70
  dates_of_conduct_end: datetime | None = None
71
71
 
72
+ # A classification of the study based upon the primary intent of the study's activities.
73
+ # CDE: https://cadsr.cancer.gov/onedata/dmdirect/NIH/NCI/CO/CDEDD?filter=CDEDD.ITEM_ID=11160683%20and%20ver_nr=1
74
+ primary_purpose_type: PrimaryPurposeType
75
+
72
76
  # The image of the trial data schema
73
77
  schema_file_id: int | None = None
74
78
 
@@ -81,11 +85,5 @@ class Trial(Base):
81
85
  # The list of assays that CIDC expects to receive for this trial.
82
86
  expected_assays: List[AssayType] = []
83
87
 
84
- # Is the cancer studying a liquid tumor type?
85
- is_liquid_tumor_trial: bool = False
86
-
87
88
  # The dbgap study accession number associated with the trial.
88
- dbgap_study_accession: str | None = None
89
-
90
- # The internal version identifier for this specific trial dataset.
91
- version: str
89
+ dbgap_study_accession: str
cidc_api/models/types.py CHANGED
@@ -26,6 +26,7 @@ AgeGroup = Literal[
26
26
  "Pediatric",
27
27
  ]
28
28
 
29
+
29
30
  TrialOrganization = Literal[
30
31
  "ECOG-ACRIN",
31
32
  "SWOG",
@@ -59,6 +60,34 @@ TrialFundingAgency = Literal[
59
60
  ]
60
61
 
61
62
 
63
+ PrimaryPurposeType = Literal[
64
+ "Adverse Effect Mitigation Study",
65
+ "Ancillary Study",
66
+ "Basic Science Research ",
67
+ "Correlative Study",
68
+ "Cure Study",
69
+ "Device Feasibility Study",
70
+ "Diagnosis Study",
71
+ "Disease Modifying Treatment Study",
72
+ "Early Detection Study",
73
+ "Education Training Clinical Study",
74
+ "Epidemiology Research ",
75
+ "Genomics Research",
76
+ "Health Services Research",
77
+ "Imaging Research",
78
+ "Interventional Study",
79
+ "Observational Study",
80
+ "Outcomes Research",
81
+ "Prevention Study",
82
+ "Proteomic Research",
83
+ "Rehabilitation Clinical Study ",
84
+ "Screening Study",
85
+ "Supportive Care Study",
86
+ "Transcriptomics Research",
87
+ "Treatment Study",
88
+ ]
89
+
90
+
62
91
  AssayType = Literal[
63
92
  "Olink",
64
93
  "WES",
@@ -84,6 +113,7 @@ AssayType = Literal[
84
113
  "snRNA-Seq",
85
114
  "Visium",
86
115
  "Olink HT",
116
+ "TCRseq RNA",
87
117
  ]
88
118
 
89
119
 
@@ -285,7 +315,6 @@ CancerStageAJCC = Literal[
285
315
 
286
316
 
287
317
  CancerStageFIGO = Literal[
288
- "value",
289
318
  "Stage I",
290
319
  "Stage IA",
291
320
  "Stage IA1",
@@ -1016,21 +1045,6 @@ GVHDDiagnosisChronicGlobalSeverity = Literal["Mild", "Moderate", "Severe"]
1016
1045
  GVHDOrganChronicScore = Literal["0", "1", "2", "3"]
1017
1046
 
1018
1047
 
1019
- PriorTreatmentType = Literal[
1020
- "Surgery",
1021
- "Radiotherapy",
1022
- "Immunotherapy",
1023
- "Chemotherapy",
1024
- "Targeted therapy",
1025
- "Other therapy",
1026
- "Radiopharmaceutical",
1027
- "Stem cell transplant",
1028
- "Immunosuppressive therapy/GVHD prophylaxis for transplant",
1029
- "Conditioning therapy",
1030
- "Post-transplant salvage therapy",
1031
- ]
1032
-
1033
-
1034
1048
  ConditioningRegimenType = Literal["Myeloablative", "Reduced-intensity", "Non-myeloablative", "Other"]
1035
1049
 
1036
1050
  StemCellDonorType = Literal["Autologous", "Allogeneic"]
@@ -0,0 +1,68 @@
1
+ from datetime import datetime
2
+ from urllib.parse import quote
3
+
4
+ from werkzeug.exceptions import BadRequest
5
+
6
+ from cidc_api.models import IngestionJobs
7
+ from . import gcloud_client
8
+ from ..shared.auth import get_current_user
9
+
10
+ JOB_TYPE_ASSAY = "assay"
11
+ JOB_TYPE_CLINICAL = "clinical"
12
+ ALLOWED_JOB_TYPES = {JOB_TYPE_CLINICAL, JOB_TYPE_ASSAY}
13
+
14
+
15
+ def resolve_job_type_and_assay_fields(data: dict) -> tuple[str, str | None, str | None]:
16
+ """Decide job_type and gather assay_type/batch_id from request JSON."""
17
+ assay_type = data.get("assay_type")
18
+ # If job_type is assay or assay_type is present, treat this as an assay job.
19
+ job_type = data.get("job_type") or (JOB_TYPE_ASSAY if assay_type else JOB_TYPE_CLINICAL)
20
+
21
+ if job_type not in ALLOWED_JOB_TYPES:
22
+ raise BadRequest("Invalid job_type. Allowed values are 'clinical' or 'assay'.")
23
+
24
+ if job_type == JOB_TYPE_ASSAY and (not assay_type or not isinstance(assay_type, str)):
25
+ raise BadRequest("assay_type must be provided for job_type='assay'.")
26
+
27
+ assay_type = assay_type.strip() if assay_type else None
28
+ batch_id = data.get("batch_id").strip() if isinstance(data.get("batch_id"), str) else None
29
+
30
+ return job_type, assay_type, batch_id
31
+
32
+
33
+ def prepare_assay_job(trial_id: str, assay_type: str, batch_id: str) -> tuple[str, str, str, datetime, int, str]:
34
+ """
35
+ Validate assay job uniqueness and generate submission_id, start_date, version, and the trial’s GCS intake path.
36
+ """
37
+ if not assay_type:
38
+ raise BadRequest("assay_type must be provided for job_type='assay'.")
39
+
40
+ # Enforce uniqueness of (trial_id, assay_type, batch_id) when batch_id is present.
41
+ if batch_id:
42
+ existing_job = IngestionJobs.get_unique_assay_job(trial_id, assay_type, batch_id)
43
+ if existing_job:
44
+ raise BadRequest(
45
+ f"Assay job {existing_job.id} already exists for this exact trial_id/assay_type/batch_id combination."
46
+ )
47
+
48
+ submission_id = IngestionJobs.next_assay_submission_id(trial_id, assay_type)
49
+ job_status = "INITIAL SUBMISSION"
50
+ error_status = "Upload Incomplete" # job starts with 'Incomplete' notifier
51
+ start_date = datetime.now()
52
+ version = 1
53
+
54
+ # Create or retrieve intake bucket corresponding to the trial
55
+ intake_bucket = gcloud_client.create_intake_bucket(get_current_user().email, trial_id=trial_id)
56
+ gcs_path = f"{intake_bucket.name}/{assay_type}/{submission_id}"
57
+
58
+ return submission_id, job_status, error_status, start_date, version, gcs_path
59
+
60
+
61
+ def get_google_links(intake_path: str) -> tuple[str, str]:
62
+ """Build the GCS URI and GCS Console URL corresponding to the intake path."""
63
+ gcs_uri = f"gs://{intake_path}"
64
+ # Encode path to ensure link opens correctly
65
+ encoded_path = quote(intake_path)
66
+ console_url = f"https://console.cloud.google.com/storage/browser/{encoded_path}"
67
+
68
+ return gcs_uri, console_url
cidc_api/shared/auth.py CHANGED
@@ -1,16 +1,14 @@
1
1
  from functools import wraps
2
2
  from typing import List
3
3
 
4
- from packaging import version
5
-
6
4
  from flask import g, request, current_app as app, Flask
5
+ from packaging import version
7
6
  from werkzeug.exceptions import Unauthorized, BadRequest, PreconditionFailed
8
7
 
9
- from ..models import Users, UserSchema
10
-
11
8
  from ..config.logging import get_logger
12
-
9
+ from ..models import Users, UserSchema
13
10
  from ..shared.jose import decode_id_token
11
+ from ..telemetry import trace_
14
12
 
15
13
  logger = get_logger(__name__)
16
14
 
@@ -144,6 +142,7 @@ def get_current_user() -> Users:
144
142
  _user_schema = UserSchema()
145
143
 
146
144
 
145
+ @trace_()
147
146
  def authenticate() -> Users:
148
147
  id_token = _extract_token()
149
148
  token_payload = decode_id_token(id_token)
@@ -172,6 +171,7 @@ def _extract_token() -> str:
172
171
 
173
172
 
174
173
  ### Authorization logic ###
174
+ @trace_()
175
175
  def authorize(user: Users, allowed_roles: List[str], resource: str, method: str) -> bool:
176
176
  """Check if the current user is authorized to act on the current request's resource.
177
177
  Raises Unauthorized
@@ -10,22 +10,33 @@ from ..config.settings import GOOGLE_CLINICAL_DATA_BUCKET
10
10
  from ..models import PreprocessedFiles, TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER
11
11
  from ..shared.auth import get_current_user
12
12
  from ..shared.gcloud_client import upload_file_to_gcs, move_gcs_file
13
+ from ..telemetry import trace_
13
14
 
14
15
  logger = get_logger(__name__)
15
16
 
16
17
 
18
+ @trace_()
17
19
  def set_current_file(
18
- file: FileStorage, file_category: str, gcs_folder: str, session: Session, uploader_email: str, job_id: int = None
20
+ file: FileStorage,
21
+ file_category: str,
22
+ gcs_folder: str,
23
+ session: Session,
24
+ uploader_email: str,
25
+ job_id: int = None,
26
+ append_timestamp: bool = None,
19
27
  ) -> PreprocessedFiles:
20
28
  """
21
29
  Archives any existing 'current' files for the given category and job,
22
30
  then uploads the new file as the latest 'current' version.
23
31
  """
24
32
  latest_version = PreprocessedFiles.archive_current_files(file_category, job_id=job_id, session=session)
25
- latest_file = create_file(file, gcs_folder, file_category, session, uploader_email, job_id, latest_version + 1)
33
+ latest_file = create_file(
34
+ file, gcs_folder, file_category, session, uploader_email, job_id, latest_version + 1, append_timestamp
35
+ )
26
36
  return latest_file
27
37
 
28
38
 
39
+ @trace_()
29
40
  def create_file(
30
41
  file: FileStorage,
31
42
  gcs_folder: str,
@@ -34,11 +45,12 @@ def create_file(
34
45
  uploader_email: str,
35
46
  job_id: int = None,
36
47
  version: int = None,
48
+ append_timestamp: bool = None,
37
49
  ) -> PreprocessedFiles:
38
50
  """Upload file to GCS and create corresponding metadata record in the database."""
39
51
  status = "pending" if gcs_folder.endswith("pending/") else "current"
40
- # only need timestamp for current/versioned files
41
- append_timestamp = status == "current"
52
+ # only need timestamp for current/versioned files, if not specified otherwise
53
+ append_timestamp = append_timestamp if append_timestamp is not None else (status == "current")
42
54
  # create file in GCS
43
55
  gcs_file_path = upload_file_to_gcs(file, GOOGLE_CLINICAL_DATA_BUCKET, gcs_folder, append_timestamp=append_timestamp)
44
56
  # create corresponding record in db
@@ -1,13 +1,15 @@
1
1
  """Utilities for interacting with the Google Cloud Platform APIs."""
2
2
 
3
- # pylint: disable=logging-fstring-interpolation,too-many-lines
3
+ # pylint: disable=logging-fstring-interpolation,too-many-lines, broad-exception-raised
4
4
 
5
+ import asyncio
5
6
  import base64
6
7
  import datetime
7
8
  import hashlib
8
9
  import io
9
10
  import json
10
11
  import os
12
+ import re
11
13
  import warnings
12
14
  from collections import namedtuple
13
15
  from concurrent.futures import Future
@@ -25,6 +27,8 @@ from typing import (
25
27
  )
26
28
 
27
29
  import googleapiclient.discovery
30
+ from gcloud.aio.storage import Storage
31
+ from pandas.core.frame import DataFrame
28
32
  import pandas as pd
29
33
  import requests
30
34
  from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
@@ -56,6 +60,7 @@ from ..config.settings import (
56
60
  GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC,
57
61
  GOOGLE_HL_CLINICAL_VALIDATION_TOPIC,
58
62
  GOOGLE_DL_CLINICAL_VALIDATION_TOPIC,
63
+ GOOGLE_ASSAY_METADATA_VALIDATION_TOPIC,
59
64
  TESTING,
60
65
  ENV,
61
66
  IS_EMAIL_ON,
@@ -361,15 +366,34 @@ def get_intake_bucket_name(user_email: str) -> str:
361
366
  return bucket_name
362
367
 
363
368
 
364
- def create_intake_bucket(user_email: str) -> storage.Bucket:
369
+ def get_trial_intake_bucket_name(trial_id: str) -> str:
365
370
  """
366
- Create a new data intake bucket for this user, or get the existing one.
371
+ Return a sanitized GCS bucket name for a given trial_id.
372
+
373
+ Produces: <GOOGLE_INTAKE_BUCKET>-<sanitized_trial_id>
374
+ where the trial_id segment is lowercased and restricted to [a-z0-9-].
375
+ """
376
+ # Replace non-allowed bucket chars with "-"
377
+ sanitized_id = re.sub(r"[^a-z0-9-]", "-", trial_id.lower())
378
+ # Collapse repeated "-" and trim from both ends
379
+ sanitized_id = re.sub(r"-+", "-", sanitized_id).strip("-")
380
+
381
+ return f"{GOOGLE_INTAKE_BUCKET}-{sanitized_id}"
382
+
383
+
384
+ def create_intake_bucket(user_email: str, trial_id: str = None) -> storage.Bucket:
385
+ """
386
+ Create (or retrieve) the appropriate data intake bucket.
387
+ If a trial_id is provided, a trial-specific bucket is used;
388
+ otherwise a user-specific intake bucket is used.
389
+
367
390
  Grant the user GCS object admin permissions on the bucket, or refresh those
368
391
  permissions if they've already been granted.
369
392
  Created with uniform bucket-level IAM access, so expiring permission.
370
393
  """
371
394
  storage_client = _get_storage_client()
372
- bucket_name = get_intake_bucket_name(user_email)
395
+ # Get trial-specific bucket name if trial_id is given, otherwise a user-specific bucket name.
396
+ bucket_name = get_trial_intake_bucket_name(trial_id) if trial_id else get_intake_bucket_name(user_email)
373
397
  bucket = storage_client.bucket(bucket_name)
374
398
 
375
399
  if not bucket.exists():
@@ -423,25 +447,50 @@ def upload_xlsx_to_intake_bucket(user_email: str, trial_id: str, upload_type: st
423
447
  return f"https://console.cloud.google.com/storage/browser/_details/{bucket_name}/{blob_name}"
424
448
 
425
449
 
426
- def gcs_xlsx_or_csv_file_to_pandas_dataframe(bucket_name: str, blob_name: str):
427
- """Reads an XLSX or CSV file from Google Cloud Storage into a Pandas DataFrame."""
428
- temp_file = get_file_bytes_from_gcs(bucket_name, blob_name)
429
-
430
- # TODO: specify sheet in xlsx file and/or accept tsv and xls files
431
- if blob_name[-3:] == "csv":
432
- return strip_whitespaces(pd.read_csv(temp_file))
433
- elif blob_name[-4:] == "xlsx":
434
- return strip_whitespaces(pd.read_excel(temp_file))
450
+ def prepare_dataframe(extension, bytes) -> DataFrame:
451
+ if extension == "csv":
452
+ return strip_whitespaces(pd.read_csv(bytes, dtype=str, keep_default_na=False))
453
+ elif extension == "xlsx":
454
+ return strip_whitespaces(pd.read_excel(bytes, dtype=str, keep_default_na=False))
435
455
  else:
436
456
  raise Exception("Can only read csv or xlsx files")
437
457
 
438
458
 
459
+ def gcs_xlsx_or_csv_file_to_pandas_dataframe(bucket_name: str, blob_name: str) -> DataFrame:
460
+ """Reads an XLSX or CSV file from Google Cloud Storage into a Pandas DataFrame."""
461
+ contents = get_file_bytes_from_gcs(bucket_name, blob_name)
462
+ extension = blob_name.split(".")[-1]
463
+ return prepare_dataframe(extension, contents)
464
+
465
+
439
466
  def get_file_bytes_from_gcs(bucket_name: str, blob_name: str) -> io.BytesIO:
440
467
  """Reads a file from Google Cloud Storage and returns it as BytesIO."""
441
468
  sheet_data = storage.Client().bucket(bucket_name).blob(blob_name).download_as_bytes()
442
469
  return io.BytesIO(sheet_data)
443
470
 
444
471
 
472
+ async def async_gcs_files_to_pandas_dataframes(bucket_name: str, blob_names: List[str]) -> List[DataFrame]:
473
+ """Async reads a XLSX or CSV files from Google Cloud Storage into a list of Pandas DataFrames."""
474
+
475
+ all_contents = await asyncio.gather(
476
+ *[async_get_file_bytes_from_gcs(bucket_name, blob_name) for blob_name in blob_names]
477
+ )
478
+ dataframes = []
479
+
480
+ for blob_name, contents in zip(blob_names, all_contents):
481
+ extension = blob_name.split(".")[-1]
482
+ dataframes.append(prepare_dataframe(extension, contents))
483
+ return dataframes
484
+
485
+
486
+ async def async_get_file_bytes_from_gcs(bucket_name: str, blob_name: str) -> io.BytesIO:
487
+ """Async reads a file from Google Cloud Storage and returns it as BytesIO."""
488
+
489
+ async with Storage() as client:
490
+ sheet_data = await client.download(bucket_name, blob_name)
491
+ return io.BytesIO(sheet_data)
492
+
493
+
445
494
  def _execute_multiblob_acl_change(
446
495
  user_email_list: List[str],
447
496
  blob_list: List[storage.Blob],
@@ -614,6 +663,7 @@ def _build_trial_upload_prefixes(
614
663
  trial_set: Set[str] = set()
615
664
  upload_set: Set[str] = set()
616
665
  if not trial_id:
666
+ # import is here becasue of circular import
617
667
  from ..models.models import TrialMetadata
618
668
 
619
669
  trial_set = {str(t.trial_id) for t in session.query(TrialMetadata).add_columns(TrialMetadata.trial_id)}
@@ -886,6 +936,7 @@ def get_signed_url(
886
936
  bucket_name: str = GOOGLE_ACL_DATA_BUCKET,
887
937
  method: str = "GET",
888
938
  expiry_mins: int = 30,
939
+ use_short_filename: bool = False,
889
940
  ) -> str:
890
941
  """
891
942
  Generate a signed URL for `object_name` to give a client temporary access.
@@ -900,7 +951,11 @@ def get_signed_url(
900
951
 
901
952
  # Generate the signed URL, allowing a client to use `method` for `expiry_mins` minutes
902
953
  expiration = datetime.timedelta(minutes=expiry_mins)
903
- full_filename = object_name.replace("/", "_").replace('"', "_").replace(" ", "_")
954
+ if use_short_filename:
955
+ filename = os.path.basename(object_name)
956
+ else:
957
+ # full filename with path included
958
+ filename = object_name.replace("/", "_").replace('"', "_").replace(" ", "_")
904
959
  other_kwargs = {}
905
960
  if os.environ.get("DEV_GOOGLE_STORAGE", None):
906
961
  other_kwargs["api_access_endpoint"] = (os.environ.get("DEV_GOOGLE_STORAGE") or "") + (
@@ -910,7 +965,7 @@ def get_signed_url(
910
965
  version="v2",
911
966
  expiration=expiration,
912
967
  method=method,
913
- response_disposition=f'attachment; filename="{full_filename}"',
968
+ response_disposition=f'attachment; filename="{filename}"',
914
969
  **other_kwargs,
915
970
  )
916
971
  logger.info(f"generated signed URL for {object_name}: {url}")
@@ -920,7 +975,8 @@ def get_signed_url(
920
975
 
921
976
  def _encode_and_publish(content: str, topic: str) -> Future:
922
977
  """Convert `content` to bytes and publish it to `topic`."""
923
- pubsub_publisher = pubsub.PublisherClient()
978
+ publisher_options = pubsub.types.PublisherOptions(enable_open_telemetry_tracing=ENV == "dev-int")
979
+ pubsub_publisher = pubsub.PublisherClient(publisher_options=publisher_options)
924
980
  topic = pubsub_publisher.topic_path(GOOGLE_CLOUD_PROJECT, topic)
925
981
  data = bytes(content, "utf-8")
926
982
 
@@ -994,6 +1050,12 @@ def publish_detailed_validation(job_id: int) -> None:
994
1050
  _report = _encode_and_publish(str(job_id), GOOGLE_DL_CLINICAL_VALIDATION_TOPIC)
995
1051
 
996
1052
 
1053
+ def publish_assay_metadata_validation(job_id: int) -> None:
1054
+ """Publish to the assay_metadata_validation topic that a job's assay metadata file is ready to be validated."""
1055
+ # Start validation asynchronously
1056
+ _report = _encode_and_publish(str(job_id), GOOGLE_ASSAY_METADATA_VALIDATION_TOPIC)
1057
+
1058
+
997
1059
  def send_email(to_emails: List[str], subject: str, html_content: str, **kw) -> None:
998
1060
  """
999
1061
  Publish an email-to-send to the emails topic.