nci-cidc-api-modules 1.2.15__tar.gz → 1.2.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nci_cidc_api_modules-1.2.15/nci_cidc_api_modules.egg-info → nci_cidc_api_modules-1.2.19}/PKG-INFO +6 -4
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/README.md +3 -2
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/config/db.py +1 -3
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/models/__init__.py +2 -0
- nci_cidc_api_modules-1.2.19/cidc_api/models/data.py +15 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/models/migrations.py +12 -39
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/models/models.py +89 -35
- nci_cidc_api_modules-1.2.19/cidc_api/models/types.py +1438 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/shared/file_handling.py +37 -2
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/shared/gcloud_client.py +6 -4
- nci_cidc_api_modules-1.2.19/cidc_api/shared/utils.py +8 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19/nci_cidc_api_modules.egg-info}/PKG-INFO +6 -4
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/nci_cidc_api_modules.egg-info/SOURCES.txt +3 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/nci_cidc_api_modules.egg-info/requires.txt +2 -1
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/pyproject.toml +7 -2
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/requirements.modules.txt +2 -1
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/LICENSE +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/MANIFEST.in +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/config/__init__.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/config/logging.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/config/secrets.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/config/settings.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/models/files/__init__.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/models/files/details.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/models/files/facets.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/models/schemas.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/shared/__init__.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/shared/auth.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/shared/email_layout.html +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/shared/emails.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/shared/jose.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/shared/rest_utils.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/nci_cidc_api_modules.egg-info/dependency_links.txt +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/nci_cidc_api_modules.egg-info/not-zip-safe +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/nci_cidc_api_modules.egg-info/top_level.txt +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/setup.cfg +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/setup.py +0 -0
- {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/tests/test_api.py +0 -0
{nci_cidc_api_modules-1.2.15/nci_cidc_api_modules.egg-info → nci_cidc_api_modules-1.2.19}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nci_cidc_api_modules
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.19
|
|
4
4
|
Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
|
|
5
5
|
Home-page: https://github.com/NCI-CIDC/cidc-api-gae
|
|
6
6
|
License: MIT license
|
|
@@ -12,7 +12,7 @@ Requires-Dist: cloud-sql-python-connector[pg8000]>=1.18.5
|
|
|
12
12
|
Requires-Dist: flask>=3.1.2
|
|
13
13
|
Requires-Dist: flask-migrate>=4.1.0
|
|
14
14
|
Requires-Dist: flask-sqlalchemy>=3.1.1
|
|
15
|
-
Requires-Dist: google-auth
|
|
15
|
+
Requires-Dist: google-auth==2.41.1
|
|
16
16
|
Requires-Dist: google-api-python-client>=2.185.0
|
|
17
17
|
Requires-Dist: google-cloud-bigquery>=3.38.0
|
|
18
18
|
Requires-Dist: google-cloud-pubsub>=2.32.0
|
|
@@ -28,6 +28,7 @@ Requires-Dist: pyarrow>=22.0.0
|
|
|
28
28
|
Requires-Dist: python-dotenv>=1.2.1
|
|
29
29
|
Requires-Dist: requests>=2.32.5
|
|
30
30
|
Requires-Dist: sqlalchemy>=2.0.44
|
|
31
|
+
Requires-Dist: sqlalchemy-mixins~=2.0.5
|
|
31
32
|
Requires-Dist: werkzeug>=3.1.3
|
|
32
33
|
Requires-Dist: nci-cidc-schemas==0.28.8
|
|
33
34
|
Dynamic: description
|
|
@@ -217,13 +218,14 @@ gcloud auth application-default login
|
|
|
217
218
|
In your .env file, comment out `POSTGRES_URI` and uncommment
|
|
218
219
|
`CLOUD_SQL_INSTANCE_NAME CLOUD_SQL_DB_USER CLOUD_SQL_DB_NAME` Replace `CLOUD_SQL_DB_USER` with your NIH email.
|
|
219
220
|
|
|
220
|
-
### Running database migrations
|
|
221
|
+
### Creating/Running database migrations
|
|
221
222
|
|
|
222
223
|
This project uses [`Flask Migrate`](https://flask-migrate.readthedocs.io/en/latest/) for managing database migrations. To create a new migration and upgrade the database specified in your `.env` config:
|
|
223
224
|
|
|
224
225
|
```bash
|
|
225
226
|
export FLASK_APP=cidc_api/app.py
|
|
226
|
-
#
|
|
227
|
+
# First, make your changes to the model(s)
|
|
228
|
+
# Then, let flask automatically generate the db change. Double check the migration script!
|
|
227
229
|
flask db migrate -m "<a message describing the changes in this migration>"
|
|
228
230
|
# Apply changes to the database
|
|
229
231
|
flask db upgrade
|
|
@@ -176,13 +176,14 @@ gcloud auth application-default login
|
|
|
176
176
|
In your .env file, comment out `POSTGRES_URI` and uncommment
|
|
177
177
|
`CLOUD_SQL_INSTANCE_NAME CLOUD_SQL_DB_USER CLOUD_SQL_DB_NAME` Replace `CLOUD_SQL_DB_USER` with your NIH email.
|
|
178
178
|
|
|
179
|
-
### Running database migrations
|
|
179
|
+
### Creating/Running database migrations
|
|
180
180
|
|
|
181
181
|
This project uses [`Flask Migrate`](https://flask-migrate.readthedocs.io/en/latest/) for managing database migrations. To create a new migration and upgrade the database specified in your `.env` config:
|
|
182
182
|
|
|
183
183
|
```bash
|
|
184
184
|
export FLASK_APP=cidc_api/app.py
|
|
185
|
-
#
|
|
185
|
+
# First, make your changes to the model(s)
|
|
186
|
+
# Then, let flask automatically generate the db change. Double check the migration script!
|
|
186
187
|
flask db migrate -m "<a message describing the changes in this migration>"
|
|
187
188
|
# Apply changes to the database
|
|
188
189
|
flask db upgrade
|
|
@@ -10,8 +10,7 @@ from google.cloud.sql.connector import Connector, IPTypes
|
|
|
10
10
|
from .secrets import get_secrets_manager
|
|
11
11
|
|
|
12
12
|
db = SQLAlchemy()
|
|
13
|
-
BaseModel =
|
|
14
|
-
db.Model = BaseModel
|
|
13
|
+
BaseModel = db.Model
|
|
15
14
|
|
|
16
15
|
connector = Connector()
|
|
17
16
|
|
|
@@ -31,7 +30,6 @@ def getconn():
|
|
|
31
30
|
def init_db(app: Flask):
|
|
32
31
|
"""Connect `app` to the database and run migrations"""
|
|
33
32
|
db.init_app(app)
|
|
34
|
-
db.Model = BaseModel
|
|
35
33
|
Migrate(app, db, app.config["MIGRATIONS_PATH"])
|
|
36
34
|
with app.app_context():
|
|
37
35
|
upgrade(app.config["MIGRATIONS_PATH"])
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from cidc_api.models.pydantic.stage2 import all_models
|
|
2
|
+
|
|
3
|
+
standard_data_categories = [model.__data_category__ for model in all_models if hasattr(model, "__data_category__")]
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# A class to hold the representation of a trial's dataset all at once
|
|
7
|
+
class Dataset(dict):
|
|
8
|
+
def __init__(self, *args, **kwargs):
|
|
9
|
+
super().__init__(*args, **kwargs)
|
|
10
|
+
for data_category in standard_data_categories:
|
|
11
|
+
self[data_category] = []
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Maps data categories like "treatment" to their associated pydantic model
|
|
15
|
+
data_category_to_model = {model.__data_category__: model for model in all_models if hasattr(model, "__data_category__")}
|
|
@@ -91,15 +91,11 @@ def migration_session():
|
|
|
91
91
|
session.close()
|
|
92
92
|
|
|
93
93
|
|
|
94
|
-
def run_metadata_migration(
|
|
95
|
-
metadata_migration: Callable[[dict], MigrationResult], use_upload_jobs_table: bool
|
|
96
|
-
):
|
|
94
|
+
def run_metadata_migration(metadata_migration: Callable[[dict], MigrationResult], use_upload_jobs_table: bool):
|
|
97
95
|
"""Migrate trial metadata, upload job patches, and downloadable files according to `metadata_migration`"""
|
|
98
96
|
with migration_session() as (session, task_queue):
|
|
99
97
|
try:
|
|
100
|
-
_run_metadata_migration(
|
|
101
|
-
metadata_migration, use_upload_jobs_table, task_queue, session
|
|
102
|
-
)
|
|
98
|
+
_run_metadata_migration(metadata_migration, use_upload_jobs_table, task_queue, session)
|
|
103
99
|
except:
|
|
104
100
|
traceback.print_exc()
|
|
105
101
|
raise
|
|
@@ -122,9 +118,7 @@ class ManifestUploads(CommonColumns):
|
|
|
122
118
|
__tablename__ = "manifest_uploads"
|
|
123
119
|
|
|
124
120
|
|
|
125
|
-
def _select_successful_assay_uploads(
|
|
126
|
-
use_upload_jobs_table: bool, session: Session
|
|
127
|
-
) -> List[UploadJobs]:
|
|
121
|
+
def _select_successful_assay_uploads(use_upload_jobs_table: bool, session: Session) -> List[UploadJobs]:
|
|
128
122
|
if use_upload_jobs_table:
|
|
129
123
|
return (
|
|
130
124
|
session.query(UploadJobs)
|
|
@@ -133,21 +127,12 @@ def _select_successful_assay_uploads(
|
|
|
133
127
|
.all()
|
|
134
128
|
)
|
|
135
129
|
|
|
136
|
-
return (
|
|
137
|
-
session.query(AssayUploads)
|
|
138
|
-
.filter_by(status=UploadJobStatus.MERGE_COMPLETED.value)
|
|
139
|
-
.with_for_update()
|
|
140
|
-
.all()
|
|
141
|
-
)
|
|
130
|
+
return session.query(AssayUploads).filter_by(status=UploadJobStatus.MERGE_COMPLETED.value).with_for_update().all()
|
|
142
131
|
|
|
143
132
|
|
|
144
|
-
def _select_manifest_uploads(
|
|
145
|
-
use_upload_jobs_table: bool, session: Session
|
|
146
|
-
) -> List[UploadJobs]:
|
|
133
|
+
def _select_manifest_uploads(use_upload_jobs_table: bool, session: Session) -> List[UploadJobs]:
|
|
147
134
|
if use_upload_jobs_table:
|
|
148
|
-
return (
|
|
149
|
-
session.query(UploadJobs).filter_by(multifile=False).with_for_update().all()
|
|
150
|
-
)
|
|
135
|
+
return session.query(UploadJobs).filter_by(multifile=False).with_for_update().all()
|
|
151
136
|
|
|
152
137
|
return session.query(ManifestUploads).with_for_update().all()
|
|
153
138
|
|
|
@@ -188,21 +173,15 @@ def _run_metadata_migration(
|
|
|
188
173
|
|
|
189
174
|
# Regenerate additional metadata from the migrated clinical trial
|
|
190
175
|
# metadata object.
|
|
191
|
-
print(
|
|
192
|
-
f"Regenerating additional metadata for artifact with uuid {artifact['upload_placeholder']}"
|
|
193
|
-
)
|
|
176
|
+
print(f"Regenerating additional metadata for artifact with uuid {artifact['upload_placeholder']}")
|
|
194
177
|
artifact_path = uuid_path_map[artifact["upload_placeholder"]]
|
|
195
|
-
df.additional_metadata = get_source(
|
|
196
|
-
migration.result, artifact_path, skip_last=True
|
|
197
|
-
)[1]
|
|
178
|
+
df.additional_metadata = get_source(migration.result, artifact_path, skip_last=True)[1]
|
|
198
179
|
|
|
199
180
|
# If the GCS URI has changed, rename the blob
|
|
200
181
|
# makes call to bucket.rename_blob
|
|
201
182
|
new_gcs_uri = artifact["object_url"]
|
|
202
183
|
if old_gcs_uri != new_gcs_uri:
|
|
203
|
-
print(
|
|
204
|
-
f"Encountered GCS data bucket artifact URI to update: {old_gcs_uri}"
|
|
205
|
-
)
|
|
184
|
+
print(f"Encountered GCS data bucket artifact URI to update: {old_gcs_uri}")
|
|
206
185
|
renamer = PieceOfWork(
|
|
207
186
|
partial(
|
|
208
187
|
rename_gcs_blob,
|
|
@@ -220,9 +199,7 @@ def _run_metadata_migration(
|
|
|
220
199
|
gcs_tasks.schedule(renamer)
|
|
221
200
|
|
|
222
201
|
# Migrate all assay upload successes
|
|
223
|
-
successful_assay_uploads = _select_successful_assay_uploads(
|
|
224
|
-
use_upload_jobs_table, session
|
|
225
|
-
)
|
|
202
|
+
successful_assay_uploads = _select_successful_assay_uploads(use_upload_jobs_table, session)
|
|
226
203
|
for upload in successful_assay_uploads:
|
|
227
204
|
print(f"Running metadata migration for assay upload: {upload.id}")
|
|
228
205
|
if use_upload_jobs_table:
|
|
@@ -248,9 +225,7 @@ def _run_metadata_migration(
|
|
|
248
225
|
if old_target_uri in migration.file_updates:
|
|
249
226
|
new_target_uri = migration.file_updates[old_target_uri]["object_url"]
|
|
250
227
|
if old_target_uri != new_target_uri:
|
|
251
|
-
print(
|
|
252
|
-
f"Encountered GCS upload bucket artifact URI to update: {old_upload_uri}"
|
|
253
|
-
)
|
|
228
|
+
print(f"Encountered GCS upload bucket artifact URI to update: {old_upload_uri}")
|
|
254
229
|
new_upload_uri = "/".join([new_target_uri, upload_timestamp])
|
|
255
230
|
renamer = PieceOfWork(
|
|
256
231
|
partial(
|
|
@@ -325,7 +300,5 @@ def republish_artifact_uploads():
|
|
|
325
300
|
with migration_session() as (session, _):
|
|
326
301
|
files = session.query(DownloadableFiles).all()
|
|
327
302
|
for f in files:
|
|
328
|
-
print(
|
|
329
|
-
f"Publishing to 'artifact_upload' topic for downloadable file with in bucket url {f.object_url}"
|
|
330
|
-
)
|
|
303
|
+
print(f"Publishing to 'artifact_upload' topic for downloadable file with in bucket url {f.object_url}")
|
|
331
304
|
publish_artifact_upload(f.object_url)
|
|
@@ -26,6 +26,7 @@ __all__ = [
|
|
|
26
26
|
"FileValidationErrors",
|
|
27
27
|
"IngestionJobs",
|
|
28
28
|
"JobFileCategories",
|
|
29
|
+
"CategoryDataElements",
|
|
29
30
|
"ValidationConfigs",
|
|
30
31
|
"TRIAL_APPENDIX_A",
|
|
31
32
|
"TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER",
|
|
@@ -95,7 +96,7 @@ from sqlalchemy import (
|
|
|
95
96
|
String,
|
|
96
97
|
Table,
|
|
97
98
|
)
|
|
98
|
-
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
|
99
|
+
from sqlalchemy.dialects.postgresql import JSONB, UUID, CITEXT
|
|
99
100
|
from sqlalchemy.engine import ResultProxy
|
|
100
101
|
from sqlalchemy.exc import IntegrityError
|
|
101
102
|
from sqlalchemy.ext.hybrid import hybrid_property
|
|
@@ -131,7 +132,6 @@ from ..config.settings import (
|
|
|
131
132
|
MAX_PAGINATION_PAGE_SIZE,
|
|
132
133
|
TESTING,
|
|
133
134
|
INACTIVE_USER_DAYS,
|
|
134
|
-
GOOGLE_CLINICAL_DATA_BUCKET,
|
|
135
135
|
)
|
|
136
136
|
from ..shared import emails
|
|
137
137
|
from ..shared.gcloud_client import (
|
|
@@ -145,7 +145,6 @@ from ..shared.gcloud_client import (
|
|
|
145
145
|
revoke_intake_access,
|
|
146
146
|
revoke_lister_access,
|
|
147
147
|
revoke_bigquery_access,
|
|
148
|
-
gcs_xlsx_or_csv_file_to_pandas_dataframe,
|
|
149
148
|
)
|
|
150
149
|
|
|
151
150
|
os.environ["TZ"] = "UTC"
|
|
@@ -382,7 +381,7 @@ class Users(CommonColumns):
|
|
|
382
381
|
last_n = Column(String)
|
|
383
382
|
organization = Column(Enum(*ORGS, name="orgs"))
|
|
384
383
|
approval_date = Column(DateTime)
|
|
385
|
-
role = Column(Enum(*ROLES, name="
|
|
384
|
+
role = Column(Enum(*ROLES, name="roles"))
|
|
386
385
|
disabled = Column(Boolean, default=False, server_default="false")
|
|
387
386
|
|
|
388
387
|
@validates("approval_date")
|
|
@@ -607,6 +606,22 @@ class Permissions(CommonColumns):
|
|
|
607
606
|
unique=True,
|
|
608
607
|
postgresql_where=file_group_id.isnot(None),
|
|
609
608
|
),
|
|
609
|
+
Index(
|
|
610
|
+
"unique_trial_id_upload_type_is_null_perms",
|
|
611
|
+
"granted_to_user",
|
|
612
|
+
"trial_id",
|
|
613
|
+
literal_column("(upload_type IS NULL)"),
|
|
614
|
+
unique=True,
|
|
615
|
+
postgresql_where="(upload_type IS NULL)",
|
|
616
|
+
),
|
|
617
|
+
Index(
|
|
618
|
+
"unique_upload_type_trial_id_is_null_perms",
|
|
619
|
+
"granted_to_user",
|
|
620
|
+
literal_column("(trial_id IS NULL)"),
|
|
621
|
+
"upload_type",
|
|
622
|
+
unique=True,
|
|
623
|
+
postgresql_where="(trial_id IS NULL)",
|
|
624
|
+
),
|
|
610
625
|
)
|
|
611
626
|
|
|
612
627
|
# Shorthand to make code related to trial- and upload-type-level permissions
|
|
@@ -2294,7 +2309,7 @@ class DownloadableFiles(CommonColumns):
|
|
|
2294
2309
|
additional_metadata = Column(JSONB, nullable=False)
|
|
2295
2310
|
# TODO rename upload_type, because we store manifests in there too.
|
|
2296
2311
|
# NOTE: this column actually has type CITEXT.
|
|
2297
|
-
upload_type = Column(
|
|
2312
|
+
upload_type = Column(CITEXT, nullable=False)
|
|
2298
2313
|
md5_hash = Column(String, nullable=True)
|
|
2299
2314
|
crc32c_hash = Column(String, nullable=True)
|
|
2300
2315
|
trial_id = Column(String, nullable=False)
|
|
@@ -2314,7 +2329,7 @@ class DownloadableFiles(CommonColumns):
|
|
|
2314
2329
|
# used instead of data_format.
|
|
2315
2330
|
# The columns are left as optional for short term backwards compatibility.
|
|
2316
2331
|
file_name = Column(String, nullable=True)
|
|
2317
|
-
data_format = Column(
|
|
2332
|
+
data_format = Column(CITEXT, nullable=True)
|
|
2318
2333
|
|
|
2319
2334
|
file_groups = relationship(
|
|
2320
2335
|
"FileGroups",
|
|
@@ -3262,11 +3277,11 @@ class PreprocessedFiles(CommonColumns):
|
|
|
3262
3277
|
),
|
|
3263
3278
|
)
|
|
3264
3279
|
|
|
3265
|
-
file_name = Column(String)
|
|
3266
|
-
object_url = Column(String)
|
|
3280
|
+
file_name = Column(String, nullable=False)
|
|
3281
|
+
object_url = Column(String, nullable=False)
|
|
3267
3282
|
job_id = Column(Integer)
|
|
3268
|
-
file_category = Column(String)
|
|
3269
|
-
uploader_email = Column(String)
|
|
3283
|
+
file_category = Column(String, nullable=False)
|
|
3284
|
+
uploader_email = Column(String, nullable=False)
|
|
3270
3285
|
status = Column(String)
|
|
3271
3286
|
version = Column(Integer)
|
|
3272
3287
|
released_version = Column(String)
|
|
@@ -3400,6 +3415,15 @@ class PreprocessedFiles(CommonColumns):
|
|
|
3400
3415
|
|
|
3401
3416
|
return query.filter(cls.job_id.is_(None))
|
|
3402
3417
|
|
|
3418
|
+
@with_default_session
|
|
3419
|
+
def category_description(self, session: Session):
|
|
3420
|
+
category = (
|
|
3421
|
+
session.query(JobFileCategories)
|
|
3422
|
+
.filter(JobFileCategories.job_id == self.job_id, JobFileCategories.category == self.file_category)
|
|
3423
|
+
.first()
|
|
3424
|
+
)
|
|
3425
|
+
return category.description if category else None
|
|
3426
|
+
|
|
3403
3427
|
|
|
3404
3428
|
INGESTION_JOB_STATUSES = [
|
|
3405
3429
|
"DRAFT",
|
|
@@ -3434,7 +3458,7 @@ class IngestionJobs(CommonColumns):
|
|
|
3434
3458
|
),
|
|
3435
3459
|
)
|
|
3436
3460
|
|
|
3437
|
-
status = Column("status", Enum(*INGESTION_JOB_STATUSES, name="
|
|
3461
|
+
status = Column("status", Enum(*INGESTION_JOB_STATUSES, name="ingestion_job_status"), nullable=False)
|
|
3438
3462
|
trial_id = Column(String, nullable=False)
|
|
3439
3463
|
version = Column(Integer, nullable=False)
|
|
3440
3464
|
pending = Column(Boolean, nullable=False, default=False)
|
|
@@ -3450,30 +3474,9 @@ class IngestionJobs(CommonColumns):
|
|
|
3450
3474
|
|
|
3451
3475
|
@with_default_session
|
|
3452
3476
|
def transition_status(self, status: str, session: Session):
|
|
3453
|
-
# create required categories after opening job for submission
|
|
3454
|
-
if self.status == "DRAFT" and status == "INITIAL SUBMISSION":
|
|
3455
|
-
for category in self.derive_required_categories_from_appendix_a():
|
|
3456
|
-
JobFileCategories.create(category=category, job_id=self.id, type="required")
|
|
3457
3477
|
self.status = status
|
|
3458
3478
|
self.update(session=session)
|
|
3459
3479
|
|
|
3460
|
-
def derive_required_categories_from_appendix_a(self) -> List:
|
|
3461
|
-
appendix_a = PreprocessedFiles.get_files_by_category_and_status(TRIAL_APPENDIX_A, "current", job_id=self.id)[0]
|
|
3462
|
-
df = gcs_xlsx_or_csv_file_to_pandas_dataframe(GOOGLE_CLINICAL_DATA_BUCKET, appendix_a.object_url)
|
|
3463
|
-
categories = []
|
|
3464
|
-
headers_ended = False
|
|
3465
|
-
for _index, row in df.iterrows():
|
|
3466
|
-
cell = str(row.iloc[0])
|
|
3467
|
-
if headers_ended:
|
|
3468
|
-
if cell != "nan" and cell not in categories:
|
|
3469
|
-
categories.append(cell)
|
|
3470
|
-
elif cell.lower() == TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER.lower():
|
|
3471
|
-
headers_ended = True
|
|
3472
|
-
if "data_dictionary" not in categories:
|
|
3473
|
-
# Ensure Data_Dictionary is always a required file category
|
|
3474
|
-
categories.append("data_dictionary")
|
|
3475
|
-
return categories
|
|
3476
|
-
|
|
3477
3480
|
@classmethod
|
|
3478
3481
|
@with_default_session
|
|
3479
3482
|
def atomic_set_job_as_pending(cls, job_id: int, session: Session) -> Boolean:
|
|
@@ -3544,15 +3547,18 @@ class JobFileCategories(CommonColumns):
|
|
|
3544
3547
|
["ingestion_jobs.id"],
|
|
3545
3548
|
),
|
|
3546
3549
|
Index(
|
|
3547
|
-
"idx_categories_job_id"
|
|
3550
|
+
"idx_categories_job_id",
|
|
3551
|
+
"job_id",
|
|
3548
3552
|
"category",
|
|
3549
3553
|
unique=True,
|
|
3550
3554
|
),
|
|
3551
3555
|
)
|
|
3552
3556
|
|
|
3553
3557
|
category = Column(String)
|
|
3554
|
-
|
|
3555
|
-
|
|
3558
|
+
description = Column(String)
|
|
3559
|
+
job_id = Column(Integer, nullable=False)
|
|
3560
|
+
type = Column(Enum("required", "optional", name="type"), nullable=False)
|
|
3561
|
+
is_custom = Column(Boolean, nullable=False, default=False, server_default="false")
|
|
3556
3562
|
|
|
3557
3563
|
@staticmethod
|
|
3558
3564
|
@with_default_session
|
|
@@ -3560,12 +3566,16 @@ class JobFileCategories(CommonColumns):
|
|
|
3560
3566
|
category: str,
|
|
3561
3567
|
job_id: int,
|
|
3562
3568
|
type: str,
|
|
3569
|
+
description: str = None,
|
|
3570
|
+
is_custom: bool = False,
|
|
3563
3571
|
session: Session = None,
|
|
3564
3572
|
):
|
|
3565
3573
|
new_category = JobFileCategories(
|
|
3566
3574
|
category=category,
|
|
3567
3575
|
job_id=job_id,
|
|
3568
3576
|
type=type,
|
|
3577
|
+
description=description,
|
|
3578
|
+
is_custom=is_custom,
|
|
3569
3579
|
)
|
|
3570
3580
|
new_category.insert(session=session)
|
|
3571
3581
|
return new_category
|
|
@@ -3576,6 +3586,50 @@ class JobFileCategories(CommonColumns):
|
|
|
3576
3586
|
categories = session.query(cls).filter(cls.job_id == job_id, cls.type == type).all()
|
|
3577
3587
|
return [c.category for c in categories]
|
|
3578
3588
|
|
|
3589
|
+
@classmethod
|
|
3590
|
+
@with_default_session
|
|
3591
|
+
def full_categories_for_job(cls, job_id: int, session: Session = None):
|
|
3592
|
+
return session.query(cls).filter_by(job_id=job_id).all()
|
|
3593
|
+
|
|
3594
|
+
|
|
3595
|
+
class CategoryDataElements(CommonColumns):
|
|
3596
|
+
__tablename__ = "category_data_elements"
|
|
3597
|
+
__table_args__ = (
|
|
3598
|
+
ForeignKeyConstraint(
|
|
3599
|
+
["category_id"],
|
|
3600
|
+
["job_file_categories.id"],
|
|
3601
|
+
ondelete="CASCADE",
|
|
3602
|
+
),
|
|
3603
|
+
Index(
|
|
3604
|
+
"idx_elements_category_id",
|
|
3605
|
+
"category_id",
|
|
3606
|
+
"name",
|
|
3607
|
+
unique=True,
|
|
3608
|
+
),
|
|
3609
|
+
)
|
|
3610
|
+
|
|
3611
|
+
category_id = Column(Integer, nullable=False)
|
|
3612
|
+
name = Column(String, nullable=False)
|
|
3613
|
+
is_custom = Column(Boolean, nullable=False, default=False, server_default="false")
|
|
3614
|
+
element_type = Column(String, nullable=False)
|
|
3615
|
+
cardinality = Column(String, nullable=True)
|
|
3616
|
+
|
|
3617
|
+
@classmethod
|
|
3618
|
+
@with_default_session
|
|
3619
|
+
def elements_for_category(cls, category_id: int, session: Session = None):
|
|
3620
|
+
return session.query(cls).filter_by(category_id=category_id).all()
|
|
3621
|
+
|
|
3622
|
+
@classmethod
|
|
3623
|
+
@with_default_session
|
|
3624
|
+
def elements_by_category_for_job(cls, job_id: int, session: Session = None):
|
|
3625
|
+
"""Fetch all CategoryDataElements for a job, along with the category name."""
|
|
3626
|
+
return (
|
|
3627
|
+
session.query(JobFileCategories.category, cls)
|
|
3628
|
+
.join(cls, cls.category_id == JobFileCategories.id)
|
|
3629
|
+
.filter(JobFileCategories.job_id == job_id)
|
|
3630
|
+
.all()
|
|
3631
|
+
)
|
|
3632
|
+
|
|
3579
3633
|
|
|
3580
3634
|
class FileValidationErrors(CommonColumns):
|
|
3581
3635
|
__tablename__ = "file_validation_errors"
|