nci-cidc-api-modules 1.2.16__py3-none-any.whl → 1.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cidc_api/config/db.py +1 -3
- cidc_api/models/models.py +42 -14
- cidc_api/shared/file_handling.py +37 -2
- {nci_cidc_api_modules-1.2.16.dist-info → nci_cidc_api_modules-1.2.17.dist-info}/METADATA +1 -1
- {nci_cidc_api_modules-1.2.16.dist-info → nci_cidc_api_modules-1.2.17.dist-info}/RECORD +8 -8
- {nci_cidc_api_modules-1.2.16.dist-info → nci_cidc_api_modules-1.2.17.dist-info}/WHEEL +0 -0
- {nci_cidc_api_modules-1.2.16.dist-info → nci_cidc_api_modules-1.2.17.dist-info}/licenses/LICENSE +0 -0
- {nci_cidc_api_modules-1.2.16.dist-info → nci_cidc_api_modules-1.2.17.dist-info}/top_level.txt +0 -0
cidc_api/config/db.py
CHANGED
|
@@ -10,8 +10,7 @@ from google.cloud.sql.connector import Connector, IPTypes
|
|
|
10
10
|
from .secrets import get_secrets_manager
|
|
11
11
|
|
|
12
12
|
db = SQLAlchemy()
|
|
13
|
-
BaseModel =
|
|
14
|
-
db.Model = BaseModel
|
|
13
|
+
BaseModel = db.Model
|
|
15
14
|
|
|
16
15
|
connector = Connector()
|
|
17
16
|
|
|
@@ -31,7 +30,6 @@ def getconn():
|
|
|
31
30
|
def init_db(app: Flask):
|
|
32
31
|
"""Connect `app` to the database and run migrations"""
|
|
33
32
|
db.init_app(app)
|
|
34
|
-
db.Model = BaseModel
|
|
35
33
|
Migrate(app, db, app.config["MIGRATIONS_PATH"])
|
|
36
34
|
with app.app_context():
|
|
37
35
|
upgrade(app.config["MIGRATIONS_PATH"])
|
cidc_api/models/models.py
CHANGED
|
@@ -96,7 +96,7 @@ from sqlalchemy import (
|
|
|
96
96
|
String,
|
|
97
97
|
Table,
|
|
98
98
|
)
|
|
99
|
-
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
|
99
|
+
from sqlalchemy.dialects.postgresql import JSONB, UUID, CITEXT
|
|
100
100
|
from sqlalchemy.engine import ResultProxy
|
|
101
101
|
from sqlalchemy.exc import IntegrityError
|
|
102
102
|
from sqlalchemy.ext.hybrid import hybrid_property
|
|
@@ -381,7 +381,7 @@ class Users(CommonColumns):
|
|
|
381
381
|
last_n = Column(String)
|
|
382
382
|
organization = Column(Enum(*ORGS, name="orgs"))
|
|
383
383
|
approval_date = Column(DateTime)
|
|
384
|
-
role = Column(Enum(*ROLES, name="
|
|
384
|
+
role = Column(Enum(*ROLES, name="roles"))
|
|
385
385
|
disabled = Column(Boolean, default=False, server_default="false")
|
|
386
386
|
|
|
387
387
|
@validates("approval_date")
|
|
@@ -606,6 +606,22 @@ class Permissions(CommonColumns):
|
|
|
606
606
|
unique=True,
|
|
607
607
|
postgresql_where=file_group_id.isnot(None),
|
|
608
608
|
),
|
|
609
|
+
Index(
|
|
610
|
+
"unique_trial_id_upload_type_is_null_perms",
|
|
611
|
+
"granted_to_user",
|
|
612
|
+
"trial_id",
|
|
613
|
+
literal_column("(upload_type IS NULL)"),
|
|
614
|
+
unique=True,
|
|
615
|
+
postgresql_where="(upload_type IS NULL)",
|
|
616
|
+
),
|
|
617
|
+
Index(
|
|
618
|
+
"unique_upload_type_trial_id_is_null_perms",
|
|
619
|
+
"granted_to_user",
|
|
620
|
+
literal_column("(trial_id IS NULL)"),
|
|
621
|
+
"upload_type",
|
|
622
|
+
unique=True,
|
|
623
|
+
postgresql_where="(trial_id IS NULL)",
|
|
624
|
+
),
|
|
609
625
|
)
|
|
610
626
|
|
|
611
627
|
# Shorthand to make code related to trial- and upload-type-level permissions
|
|
@@ -2293,7 +2309,7 @@ class DownloadableFiles(CommonColumns):
|
|
|
2293
2309
|
additional_metadata = Column(JSONB, nullable=False)
|
|
2294
2310
|
# TODO rename upload_type, because we store manifests in there too.
|
|
2295
2311
|
# NOTE: this column actually has type CITEXT.
|
|
2296
|
-
upload_type = Column(
|
|
2312
|
+
upload_type = Column(CITEXT, nullable=False)
|
|
2297
2313
|
md5_hash = Column(String, nullable=True)
|
|
2298
2314
|
crc32c_hash = Column(String, nullable=True)
|
|
2299
2315
|
trial_id = Column(String, nullable=False)
|
|
@@ -2313,7 +2329,7 @@ class DownloadableFiles(CommonColumns):
|
|
|
2313
2329
|
# used instead of data_format.
|
|
2314
2330
|
# The columns are left as optional for short term backwards compatibility.
|
|
2315
2331
|
file_name = Column(String, nullable=True)
|
|
2316
|
-
data_format = Column(
|
|
2332
|
+
data_format = Column(CITEXT, nullable=True)
|
|
2317
2333
|
|
|
2318
2334
|
file_groups = relationship(
|
|
2319
2335
|
"FileGroups",
|
|
@@ -3261,11 +3277,11 @@ class PreprocessedFiles(CommonColumns):
|
|
|
3261
3277
|
),
|
|
3262
3278
|
)
|
|
3263
3279
|
|
|
3264
|
-
file_name = Column(String)
|
|
3265
|
-
object_url = Column(String)
|
|
3280
|
+
file_name = Column(String, nullable=False)
|
|
3281
|
+
object_url = Column(String, nullable=False)
|
|
3266
3282
|
job_id = Column(Integer)
|
|
3267
|
-
file_category = Column(String)
|
|
3268
|
-
uploader_email = Column(String)
|
|
3283
|
+
file_category = Column(String, nullable=False)
|
|
3284
|
+
uploader_email = Column(String, nullable=False)
|
|
3269
3285
|
status = Column(String)
|
|
3270
3286
|
version = Column(Integer)
|
|
3271
3287
|
released_version = Column(String)
|
|
@@ -3433,7 +3449,7 @@ class IngestionJobs(CommonColumns):
|
|
|
3433
3449
|
),
|
|
3434
3450
|
)
|
|
3435
3451
|
|
|
3436
|
-
status = Column("status", Enum(*INGESTION_JOB_STATUSES, name="
|
|
3452
|
+
status = Column("status", Enum(*INGESTION_JOB_STATUSES, name="ingestion_job_status"), nullable=False)
|
|
3437
3453
|
trial_id = Column(String, nullable=False)
|
|
3438
3454
|
version = Column(Integer, nullable=False)
|
|
3439
3455
|
pending = Column(Boolean, nullable=False, default=False)
|
|
@@ -3522,16 +3538,17 @@ class JobFileCategories(CommonColumns):
|
|
|
3522
3538
|
["ingestion_jobs.id"],
|
|
3523
3539
|
),
|
|
3524
3540
|
Index(
|
|
3525
|
-
"idx_categories_job_id"
|
|
3541
|
+
"idx_categories_job_id",
|
|
3542
|
+
"job_id",
|
|
3526
3543
|
"category",
|
|
3527
3544
|
unique=True,
|
|
3528
3545
|
),
|
|
3529
3546
|
)
|
|
3530
3547
|
|
|
3531
3548
|
category = Column(String)
|
|
3532
|
-
job_id = Column(Integer)
|
|
3533
|
-
type = Column(Enum("required", "optional", name="type"))
|
|
3534
|
-
is_custom = Column(Boolean, default=False, server_default="false")
|
|
3549
|
+
job_id = Column(Integer, nullable=False)
|
|
3550
|
+
type = Column(Enum("required", "optional", name="type"), nullable=False)
|
|
3551
|
+
is_custom = Column(Boolean, nullable=False, default=False, server_default="false")
|
|
3535
3552
|
|
|
3536
3553
|
@staticmethod
|
|
3537
3554
|
@with_default_session
|
|
@@ -3557,6 +3574,11 @@ class JobFileCategories(CommonColumns):
|
|
|
3557
3574
|
categories = session.query(cls).filter(cls.job_id == job_id, cls.type == type).all()
|
|
3558
3575
|
return [c.category for c in categories]
|
|
3559
3576
|
|
|
3577
|
+
@classmethod
|
|
3578
|
+
@with_default_session
|
|
3579
|
+
def full_categories_for_job(cls, job_id: int, session: Session = None):
|
|
3580
|
+
return session.query(cls).filter_by(job_id=job_id).all()
|
|
3581
|
+
|
|
3560
3582
|
|
|
3561
3583
|
class CategoryDataElements(CommonColumns):
|
|
3562
3584
|
__tablename__ = "category_data_elements"
|
|
@@ -3567,7 +3589,8 @@ class CategoryDataElements(CommonColumns):
|
|
|
3567
3589
|
ondelete="CASCADE",
|
|
3568
3590
|
),
|
|
3569
3591
|
Index(
|
|
3570
|
-
"idx_elements_category_id"
|
|
3592
|
+
"idx_elements_category_id",
|
|
3593
|
+
"category_id",
|
|
3571
3594
|
"name",
|
|
3572
3595
|
unique=True,
|
|
3573
3596
|
),
|
|
@@ -3579,6 +3602,11 @@ class CategoryDataElements(CommonColumns):
|
|
|
3579
3602
|
element_type = Column(String, nullable=False)
|
|
3580
3603
|
cardinality = Column(String, nullable=True)
|
|
3581
3604
|
|
|
3605
|
+
@classmethod
|
|
3606
|
+
@with_default_session
|
|
3607
|
+
def elements_for_category(cls, category_id: int, session: Session = None):
|
|
3608
|
+
return session.query(cls).filter_by(category_id=category_id).all()
|
|
3609
|
+
|
|
3582
3610
|
|
|
3583
3611
|
class FileValidationErrors(CommonColumns):
|
|
3584
3612
|
__tablename__ = "file_validation_errors"
|
cidc_api/shared/file_handling.py
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
|
|
3
|
+
from pandas import Series, DataFrame
|
|
4
|
+
from sqlalchemy.orm.session import Session
|
|
3
5
|
from werkzeug.datastructures import FileStorage
|
|
4
6
|
from werkzeug.exceptions import BadRequest, InternalServerError
|
|
5
7
|
|
|
6
8
|
from ..config.logging import get_logger
|
|
7
9
|
from ..config.settings import GOOGLE_CLINICAL_DATA_BUCKET
|
|
8
|
-
from ..models import PreprocessedFiles
|
|
10
|
+
from ..models import PreprocessedFiles, TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER
|
|
9
11
|
from ..shared.auth import get_current_user
|
|
10
12
|
from ..shared.gcloud_client import upload_file_to_gcs, move_gcs_file
|
|
11
|
-
from sqlalchemy.orm.session import Session
|
|
12
13
|
|
|
13
14
|
logger = get_logger(__name__)
|
|
14
15
|
|
|
@@ -104,3 +105,37 @@ def strip_filename_and_pending_folder(path_str):
|
|
|
104
105
|
if path.parent.name != "pending":
|
|
105
106
|
raise ValueError("Expected 'pending' folder above file")
|
|
106
107
|
return str(path.parent.parent)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_row_at_condition(df: DataFrame, condition):
|
|
111
|
+
condition_met_index = df[condition].index[0]
|
|
112
|
+
row_at_condition_series = df.iloc[condition_met_index]
|
|
113
|
+
|
|
114
|
+
return row_at_condition_series
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_column(header_row_series: Series, header_name: str, use_raw_header_val: bool = False):
|
|
118
|
+
for idx, raw_header in enumerate(header_row_series):
|
|
119
|
+
if str(raw_header).lower() == header_name.lower():
|
|
120
|
+
return raw_header if use_raw_header_val else header_row_series.index[idx]
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def get_column_from_appendix_a(appendix_a_df: DataFrame, header_name: str):
|
|
125
|
+
category_column = appendix_a_df.columns[0]
|
|
126
|
+
aa_header_condition = appendix_a_df[category_column] == TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER
|
|
127
|
+
header_row_series = get_row_at_condition(appendix_a_df, aa_header_condition)
|
|
128
|
+
return get_column(header_row_series, header_name)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def get_column_from_first_row(df: DataFrame, header_name: str):
|
|
132
|
+
use_raw_header_val = False
|
|
133
|
+
if df.columns.inferred_type == "integer":
|
|
134
|
+
# If columns are integers (i.e. file was read without headers), treat the first row as header values.
|
|
135
|
+
header_row_series = df.iloc[0]
|
|
136
|
+
else:
|
|
137
|
+
# Otherwise columns already are headers
|
|
138
|
+
header_row_series = Series(df.columns)
|
|
139
|
+
use_raw_header_val = True
|
|
140
|
+
|
|
141
|
+
return get_column(header_row_series, header_name, use_raw_header_val=use_raw_header_val)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
cidc_api/config/__init__.py,sha256=5mX8GAPxUKV84iS-aGOoE-4m68LsOCGCDptXNdlgvj0,148
|
|
2
|
-
cidc_api/config/db.py,sha256=
|
|
2
|
+
cidc_api/config/db.py,sha256=CRgpyw7uVP9v7CTAa7_1dcXURqrfjRcLNjGgZC7iPQE,1627
|
|
3
3
|
cidc_api/config/logging.py,sha256=abhVYtn8lfhIt0tyV2WHFgSmp_s2eeJh7kodB6LH4J0,1149
|
|
4
4
|
cidc_api/config/secrets.py,sha256=jRFj7W43pWuPf9DZQLCKF7WPXf5cUv-BAaS3ASqhV_Q,1481
|
|
5
5
|
cidc_api/config/settings.py,sha256=ttOGvk_6zVMn4dtxIZ2-0w3wF2fpAUVfGpVZbKJ2b6s,4653
|
|
6
6
|
cidc_api/models/__init__.py,sha256=bl445G8Zic9YbhZ8ZBni07wtBMhLJRMBA-JqjLxx2bw,66
|
|
7
7
|
cidc_api/models/migrations.py,sha256=gp9vtkYbA9FFy2s-7woelAmsvQbJ41LO2_DY-YkFIrQ,11464
|
|
8
|
-
cidc_api/models/models.py,sha256=
|
|
8
|
+
cidc_api/models/models.py,sha256=8WoCXeRebLpnQ70cGGRw_URdoGNS0sAfFeIMppwQp0s,147598
|
|
9
9
|
cidc_api/models/schemas.py,sha256=6IE2dJoEMcMbi0Vr1V3cYKnPKU0hv9vRKBixOZHe88s,2766
|
|
10
10
|
cidc_api/models/files/__init__.py,sha256=8BMTnUSHzUbz0lBeEQY6NvApxDD3GMWMduoVMos2g4Y,213
|
|
11
11
|
cidc_api/models/files/details.py,sha256=sZkGM7iEV4-J6IDQCdiMV6KBDLbPxCOqUMaU3aY9rX8,65153
|
|
@@ -14,13 +14,13 @@ cidc_api/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
14
14
|
cidc_api/shared/auth.py,sha256=PHqmVGkqDjbmUofytVFwD_9ssgUomESl3fFtFHPwZYQ,9062
|
|
15
15
|
cidc_api/shared/email_layout.html,sha256=pBoTNw3ACHH-ncZFaNvcy5bXMqPwizR78usb0uCYtIc,7670
|
|
16
16
|
cidc_api/shared/emails.py,sha256=8kNFEaSnKpY-GX_iE59QUhSp3c4_uzy3SpHYt2QjuqI,6121
|
|
17
|
-
cidc_api/shared/file_handling.py,sha256=
|
|
17
|
+
cidc_api/shared/file_handling.py,sha256=UP3KZ61Km5RNmNfk126_upKv-jGkDzpc5Pvk1s7cU2Q,5585
|
|
18
18
|
cidc_api/shared/gcloud_client.py,sha256=ovXGS2ynaBgB_23prj23H10GNN4fectiVF7Hj4LJXQk,37302
|
|
19
19
|
cidc_api/shared/jose.py,sha256=-qzGzEDAlokEp9E7WtBtQkXyyfPWTYXlwYpCqVJWmqM,1830
|
|
20
20
|
cidc_api/shared/rest_utils.py,sha256=RwR30WOUAYCxL7V-i2totEyeriG30GbBDvBcpLXhM9w,6594
|
|
21
21
|
cidc_api/shared/utils.py,sha256=oDGC8MHxEf7MDuzWynZuE66OfNUnRZE8z7Yn2Q9kYO8,178
|
|
22
|
-
nci_cidc_api_modules-1.2.
|
|
23
|
-
nci_cidc_api_modules-1.2.
|
|
24
|
-
nci_cidc_api_modules-1.2.
|
|
25
|
-
nci_cidc_api_modules-1.2.
|
|
26
|
-
nci_cidc_api_modules-1.2.
|
|
22
|
+
nci_cidc_api_modules-1.2.17.dist-info/licenses/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
|
|
23
|
+
nci_cidc_api_modules-1.2.17.dist-info/METADATA,sha256=cfEKZgJxtFXHOBDexrc-R04ceMLvGhXmbRjiofj3gBw,39540
|
|
24
|
+
nci_cidc_api_modules-1.2.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
25
|
+
nci_cidc_api_modules-1.2.17.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
|
|
26
|
+
nci_cidc_api_modules-1.2.17.dist-info/RECORD,,
|
|
File without changes
|
{nci_cidc_api_modules-1.2.16.dist-info → nci_cidc_api_modules-1.2.17.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{nci_cidc_api_modules-1.2.16.dist-info → nci_cidc_api_modules-1.2.17.dist-info}/top_level.txt
RENAMED
|
File without changes
|