nci-cidc-api-modules 1.2.17__py3-none-any.whl → 1.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cidc_api/models/__init__.py +2 -0
- cidc_api/models/data.py +15 -0
- cidc_api/models/migrations.py +12 -39
- cidc_api/models/models.py +23 -0
- cidc_api/models/types.py +1438 -0
- {nci_cidc_api_modules-1.2.17.dist-info → nci_cidc_api_modules-1.2.19.dist-info}/METADATA +5 -3
- {nci_cidc_api_modules-1.2.17.dist-info → nci_cidc_api_modules-1.2.19.dist-info}/RECORD +10 -8
- {nci_cidc_api_modules-1.2.17.dist-info → nci_cidc_api_modules-1.2.19.dist-info}/WHEEL +0 -0
- {nci_cidc_api_modules-1.2.17.dist-info → nci_cidc_api_modules-1.2.19.dist-info}/licenses/LICENSE +0 -0
- {nci_cidc_api_modules-1.2.17.dist-info → nci_cidc_api_modules-1.2.19.dist-info}/top_level.txt +0 -0
cidc_api/models/__init__.py
CHANGED
cidc_api/models/data.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from cidc_api.models.pydantic.stage2 import all_models
|
|
2
|
+
|
|
3
|
+
standard_data_categories = [model.__data_category__ for model in all_models if hasattr(model, "__data_category__")]
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# A class to hold the representation of a trial's dataset all at once
|
|
7
|
+
class Dataset(dict):
|
|
8
|
+
def __init__(self, *args, **kwargs):
|
|
9
|
+
super().__init__(*args, **kwargs)
|
|
10
|
+
for data_category in standard_data_categories:
|
|
11
|
+
self[data_category] = []
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Maps data categories like "treatment" to their associated pydantic model
|
|
15
|
+
data_category_to_model = {model.__data_category__: model for model in all_models if hasattr(model, "__data_category__")}
|
cidc_api/models/migrations.py
CHANGED
|
@@ -91,15 +91,11 @@ def migration_session():
|
|
|
91
91
|
session.close()
|
|
92
92
|
|
|
93
93
|
|
|
94
|
-
def run_metadata_migration(
|
|
95
|
-
metadata_migration: Callable[[dict], MigrationResult], use_upload_jobs_table: bool
|
|
96
|
-
):
|
|
94
|
+
def run_metadata_migration(metadata_migration: Callable[[dict], MigrationResult], use_upload_jobs_table: bool):
|
|
97
95
|
"""Migrate trial metadata, upload job patches, and downloadable files according to `metadata_migration`"""
|
|
98
96
|
with migration_session() as (session, task_queue):
|
|
99
97
|
try:
|
|
100
|
-
_run_metadata_migration(
|
|
101
|
-
metadata_migration, use_upload_jobs_table, task_queue, session
|
|
102
|
-
)
|
|
98
|
+
_run_metadata_migration(metadata_migration, use_upload_jobs_table, task_queue, session)
|
|
103
99
|
except:
|
|
104
100
|
traceback.print_exc()
|
|
105
101
|
raise
|
|
@@ -122,9 +118,7 @@ class ManifestUploads(CommonColumns):
|
|
|
122
118
|
__tablename__ = "manifest_uploads"
|
|
123
119
|
|
|
124
120
|
|
|
125
|
-
def _select_successful_assay_uploads(
|
|
126
|
-
use_upload_jobs_table: bool, session: Session
|
|
127
|
-
) -> List[UploadJobs]:
|
|
121
|
+
def _select_successful_assay_uploads(use_upload_jobs_table: bool, session: Session) -> List[UploadJobs]:
|
|
128
122
|
if use_upload_jobs_table:
|
|
129
123
|
return (
|
|
130
124
|
session.query(UploadJobs)
|
|
@@ -133,21 +127,12 @@ def _select_successful_assay_uploads(
|
|
|
133
127
|
.all()
|
|
134
128
|
)
|
|
135
129
|
|
|
136
|
-
return (
|
|
137
|
-
session.query(AssayUploads)
|
|
138
|
-
.filter_by(status=UploadJobStatus.MERGE_COMPLETED.value)
|
|
139
|
-
.with_for_update()
|
|
140
|
-
.all()
|
|
141
|
-
)
|
|
130
|
+
return session.query(AssayUploads).filter_by(status=UploadJobStatus.MERGE_COMPLETED.value).with_for_update().all()
|
|
142
131
|
|
|
143
132
|
|
|
144
|
-
def _select_manifest_uploads(
|
|
145
|
-
use_upload_jobs_table: bool, session: Session
|
|
146
|
-
) -> List[UploadJobs]:
|
|
133
|
+
def _select_manifest_uploads(use_upload_jobs_table: bool, session: Session) -> List[UploadJobs]:
|
|
147
134
|
if use_upload_jobs_table:
|
|
148
|
-
return (
|
|
149
|
-
session.query(UploadJobs).filter_by(multifile=False).with_for_update().all()
|
|
150
|
-
)
|
|
135
|
+
return session.query(UploadJobs).filter_by(multifile=False).with_for_update().all()
|
|
151
136
|
|
|
152
137
|
return session.query(ManifestUploads).with_for_update().all()
|
|
153
138
|
|
|
@@ -188,21 +173,15 @@ def _run_metadata_migration(
|
|
|
188
173
|
|
|
189
174
|
# Regenerate additional metadata from the migrated clinical trial
|
|
190
175
|
# metadata object.
|
|
191
|
-
print(
|
|
192
|
-
f"Regenerating additional metadata for artifact with uuid {artifact['upload_placeholder']}"
|
|
193
|
-
)
|
|
176
|
+
print(f"Regenerating additional metadata for artifact with uuid {artifact['upload_placeholder']}")
|
|
194
177
|
artifact_path = uuid_path_map[artifact["upload_placeholder"]]
|
|
195
|
-
df.additional_metadata = get_source(
|
|
196
|
-
migration.result, artifact_path, skip_last=True
|
|
197
|
-
)[1]
|
|
178
|
+
df.additional_metadata = get_source(migration.result, artifact_path, skip_last=True)[1]
|
|
198
179
|
|
|
199
180
|
# If the GCS URI has changed, rename the blob
|
|
200
181
|
# makes call to bucket.rename_blob
|
|
201
182
|
new_gcs_uri = artifact["object_url"]
|
|
202
183
|
if old_gcs_uri != new_gcs_uri:
|
|
203
|
-
print(
|
|
204
|
-
f"Encountered GCS data bucket artifact URI to update: {old_gcs_uri}"
|
|
205
|
-
)
|
|
184
|
+
print(f"Encountered GCS data bucket artifact URI to update: {old_gcs_uri}")
|
|
206
185
|
renamer = PieceOfWork(
|
|
207
186
|
partial(
|
|
208
187
|
rename_gcs_blob,
|
|
@@ -220,9 +199,7 @@ def _run_metadata_migration(
|
|
|
220
199
|
gcs_tasks.schedule(renamer)
|
|
221
200
|
|
|
222
201
|
# Migrate all assay upload successes
|
|
223
|
-
successful_assay_uploads = _select_successful_assay_uploads(
|
|
224
|
-
use_upload_jobs_table, session
|
|
225
|
-
)
|
|
202
|
+
successful_assay_uploads = _select_successful_assay_uploads(use_upload_jobs_table, session)
|
|
226
203
|
for upload in successful_assay_uploads:
|
|
227
204
|
print(f"Running metadata migration for assay upload: {upload.id}")
|
|
228
205
|
if use_upload_jobs_table:
|
|
@@ -248,9 +225,7 @@ def _run_metadata_migration(
|
|
|
248
225
|
if old_target_uri in migration.file_updates:
|
|
249
226
|
new_target_uri = migration.file_updates[old_target_uri]["object_url"]
|
|
250
227
|
if old_target_uri != new_target_uri:
|
|
251
|
-
print(
|
|
252
|
-
f"Encountered GCS upload bucket artifact URI to update: {old_upload_uri}"
|
|
253
|
-
)
|
|
228
|
+
print(f"Encountered GCS upload bucket artifact URI to update: {old_upload_uri}")
|
|
254
229
|
new_upload_uri = "/".join([new_target_uri, upload_timestamp])
|
|
255
230
|
renamer = PieceOfWork(
|
|
256
231
|
partial(
|
|
@@ -325,7 +300,5 @@ def republish_artifact_uploads():
|
|
|
325
300
|
with migration_session() as (session, _):
|
|
326
301
|
files = session.query(DownloadableFiles).all()
|
|
327
302
|
for f in files:
|
|
328
|
-
print(
|
|
329
|
-
f"Publishing to 'artifact_upload' topic for downloadable file with in bucket url {f.object_url}"
|
|
330
|
-
)
|
|
303
|
+
print(f"Publishing to 'artifact_upload' topic for downloadable file with in bucket url {f.object_url}")
|
|
331
304
|
publish_artifact_upload(f.object_url)
|
cidc_api/models/models.py
CHANGED
|
@@ -3415,6 +3415,15 @@ class PreprocessedFiles(CommonColumns):
|
|
|
3415
3415
|
|
|
3416
3416
|
return query.filter(cls.job_id.is_(None))
|
|
3417
3417
|
|
|
3418
|
+
@with_default_session
|
|
3419
|
+
def category_description(self, session: Session):
|
|
3420
|
+
category = (
|
|
3421
|
+
session.query(JobFileCategories)
|
|
3422
|
+
.filter(JobFileCategories.job_id == self.job_id, JobFileCategories.category == self.file_category)
|
|
3423
|
+
.first()
|
|
3424
|
+
)
|
|
3425
|
+
return category.description if category else None
|
|
3426
|
+
|
|
3418
3427
|
|
|
3419
3428
|
INGESTION_JOB_STATUSES = [
|
|
3420
3429
|
"DRAFT",
|
|
@@ -3546,6 +3555,7 @@ class JobFileCategories(CommonColumns):
|
|
|
3546
3555
|
)
|
|
3547
3556
|
|
|
3548
3557
|
category = Column(String)
|
|
3558
|
+
description = Column(String)
|
|
3549
3559
|
job_id = Column(Integer, nullable=False)
|
|
3550
3560
|
type = Column(Enum("required", "optional", name="type"), nullable=False)
|
|
3551
3561
|
is_custom = Column(Boolean, nullable=False, default=False, server_default="false")
|
|
@@ -3556,6 +3566,7 @@ class JobFileCategories(CommonColumns):
|
|
|
3556
3566
|
category: str,
|
|
3557
3567
|
job_id: int,
|
|
3558
3568
|
type: str,
|
|
3569
|
+
description: str = None,
|
|
3559
3570
|
is_custom: bool = False,
|
|
3560
3571
|
session: Session = None,
|
|
3561
3572
|
):
|
|
@@ -3563,6 +3574,7 @@ class JobFileCategories(CommonColumns):
|
|
|
3563
3574
|
category=category,
|
|
3564
3575
|
job_id=job_id,
|
|
3565
3576
|
type=type,
|
|
3577
|
+
description=description,
|
|
3566
3578
|
is_custom=is_custom,
|
|
3567
3579
|
)
|
|
3568
3580
|
new_category.insert(session=session)
|
|
@@ -3607,6 +3619,17 @@ class CategoryDataElements(CommonColumns):
|
|
|
3607
3619
|
def elements_for_category(cls, category_id: int, session: Session = None):
|
|
3608
3620
|
return session.query(cls).filter_by(category_id=category_id).all()
|
|
3609
3621
|
|
|
3622
|
+
@classmethod
|
|
3623
|
+
@with_default_session
|
|
3624
|
+
def elements_by_category_for_job(cls, job_id: int, session: Session = None):
|
|
3625
|
+
"""Fetch all CategoryDataElements for a job, along with the category name."""
|
|
3626
|
+
return (
|
|
3627
|
+
session.query(JobFileCategories.category, cls)
|
|
3628
|
+
.join(cls, cls.category_id == JobFileCategories.id)
|
|
3629
|
+
.filter(JobFileCategories.job_id == job_id)
|
|
3630
|
+
.all()
|
|
3631
|
+
)
|
|
3632
|
+
|
|
3610
3633
|
|
|
3611
3634
|
class FileValidationErrors(CommonColumns):
|
|
3612
3635
|
__tablename__ = "file_validation_errors"
|