nci-cidc-api-modules 1.1.11__py3-none-any.whl → 1.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cidc_api/config/db.py +2 -5
- cidc_api/config/logging.py +1 -5
- cidc_api/config/secrets.py +1 -3
- cidc_api/config/settings.py +2 -3
- cidc_api/csms/auth.py +1 -3
- cidc_api/models/csms_api.py +29 -97
- cidc_api/models/files/details.py +5 -15
- cidc_api/models/files/facets.py +9 -29
- cidc_api/models/models.py +220 -378
- cidc_api/shared/auth.py +3 -9
- cidc_api/shared/emails.py +8 -16
- cidc_api/shared/gcloud_client.py +33 -98
- cidc_api/shared/jose.py +1 -3
- cidc_api/shared/rest_utils.py +2 -6
- {nci_cidc_api_modules-1.1.11.dist-info → nci_cidc_api_modules-1.1.13.dist-info}/METADATA +1 -1
- nci_cidc_api_modules-1.1.13.dist-info/RECORD +26 -0
- nci_cidc_api_modules-1.1.11.dist-info/RECORD +0 -26
- {nci_cidc_api_modules-1.1.11.dist-info → nci_cidc_api_modules-1.1.13.dist-info}/WHEEL +0 -0
- {nci_cidc_api_modules-1.1.11.dist-info → nci_cidc_api_modules-1.1.13.dist-info}/licenses/LICENSE +0 -0
- {nci_cidc_api_modules-1.1.11.dist-info → nci_cidc_api_modules-1.1.13.dist-info}/top_level.txt +0 -0
cidc_api/models/models.py
CHANGED
@@ -28,6 +28,7 @@ import hashlib
|
|
28
28
|
import os
|
29
29
|
import re
|
30
30
|
from collections import defaultdict
|
31
|
+
from copy import deepcopy
|
31
32
|
from datetime import datetime, timedelta
|
32
33
|
from enum import Enum as EnumBaseClass
|
33
34
|
from functools import wraps
|
@@ -77,6 +78,7 @@ from sqlalchemy import (
|
|
77
78
|
or_,
|
78
79
|
Table,
|
79
80
|
MetaData,
|
81
|
+
true,
|
80
82
|
)
|
81
83
|
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
82
84
|
from sqlalchemy.engine import ResultProxy
|
@@ -92,10 +94,11 @@ from sqlalchemy.sql import (
|
|
92
94
|
# instead of the sqlalchemy.sql versions we are importing here. The solution is to
|
93
95
|
# break up this giant file.
|
94
96
|
and_ as sql_and,
|
95
|
-
|
97
|
+
or_ as sql_or,
|
96
98
|
# select, # ALREADY IMPORTED
|
97
99
|
text,
|
98
100
|
)
|
101
|
+
from sqlalchemy.sql.elements import BooleanClauseList
|
99
102
|
from sqlalchemy.sql.functions import coalesce
|
100
103
|
from werkzeug.exceptions import BadRequest
|
101
104
|
|
@@ -184,11 +187,7 @@ class CommonColumns(BaseModel): # type: ignore
|
|
184
187
|
if hasattr(b, "__table__"):
|
185
188
|
columns_to_check.extend(b.__table__.columns)
|
186
189
|
|
187
|
-
ret = {
|
188
|
-
c.name: getattr(self, c.name)
|
189
|
-
for c in columns_to_check
|
190
|
-
if hasattr(self, c.name)
|
191
|
-
}
|
190
|
+
ret = {c.name: getattr(self, c.name) for c in columns_to_check if hasattr(self, c.name)}
|
192
191
|
ret = {k: v for k, v in ret.items() if v is not None}
|
193
192
|
return ret
|
194
193
|
|
@@ -269,16 +268,12 @@ class CommonColumns(BaseModel): # type: ignore
|
|
269
268
|
if sort_field:
|
270
269
|
# Get the attribute from the class, in case this is a hybrid attribute
|
271
270
|
sort_attribute = getattr(cls, sort_field)
|
272
|
-
field_with_dir = (
|
273
|
-
asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
|
274
|
-
)
|
271
|
+
field_with_dir = asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
|
275
272
|
query = query.order_by(field_with_dir)
|
276
273
|
if sort_field != "id":
|
277
274
|
# When sorting, need to guarantee unique order for offset/limit pagination to produce
|
278
275
|
# consistent results. Adding secondary "id" sort field to ensure unique order.
|
279
|
-
secondary_field_with_dir = (
|
280
|
-
asc("id") if sort_direction == "asc" else desc("id")
|
281
|
-
)
|
276
|
+
secondary_field_with_dir = asc("id") if sort_direction == "asc" else desc("id")
|
282
277
|
query = query.order_by(secondary_field_with_dir)
|
283
278
|
|
284
279
|
# Apply filter function
|
@@ -299,9 +294,7 @@ class CommonColumns(BaseModel): # type: ignore
|
|
299
294
|
|
300
295
|
@classmethod
|
301
296
|
@with_default_session
|
302
|
-
def count_by(
|
303
|
-
cls, expr, session: Session, filter_: Callable[[Query], Query] = lambda q: q
|
304
|
-
) -> Dict[str, int]:
|
297
|
+
def count_by(cls, expr, session: Session, filter_: Callable[[Query], Query] = lambda q: q) -> Dict[str, int]:
|
305
298
|
"""
|
306
299
|
Return a dictionary mapping results of `expr` to the number of times each result
|
307
300
|
occurs in the table related to this model. E.g., for the `UploadJobs` model,
|
@@ -326,9 +319,7 @@ class CommonColumns(BaseModel): # type: ignore
|
|
326
319
|
filter_: Callable[[Query], Query] = lambda q: q,
|
327
320
|
):
|
328
321
|
"""Get a list of distinct values for the given column."""
|
329
|
-
assert (
|
330
|
-
column_name in cls.__table__.columns.keys()
|
331
|
-
), f"{cls.__tablename__} has no column {column_name}"
|
322
|
+
assert column_name in cls.__table__.columns.keys(), f"{cls.__tablename__} has no column {column_name}"
|
332
323
|
|
333
324
|
base_query = session.query(getattr(cls, column_name))
|
334
325
|
filtered_query = filter_(base_query)
|
@@ -342,9 +333,7 @@ class CommonColumns(BaseModel): # type: ignore
|
|
342
333
|
@classmethod
|
343
334
|
def get_unique_columns(cls):
|
344
335
|
"""Get a list of all the unique columns in this table."""
|
345
|
-
return [
|
346
|
-
column for column in cls.__table__.c if column.unique or column.primary_key
|
347
|
-
]
|
336
|
+
return [column for column in cls.__table__.c if column.unique or column.primary_key]
|
348
337
|
|
349
338
|
|
350
339
|
class CIDCRole(EnumBaseClass):
|
@@ -392,6 +381,11 @@ class Users(CommonColumns):
|
|
392
381
|
"""Returns true if this user is an NCI Biobank user."""
|
393
382
|
return self.role == CIDCRole.NCI_BIOBANK_USER.value
|
394
383
|
|
384
|
+
def is_admin_or_nci_user(self) -> bool:
|
385
|
+
"""Returns true if this user is a CIDC admin or NCI Biobank user. These users
|
386
|
+
share full access to much of the system."""
|
387
|
+
return self.is_admin() or self.is_nci_user()
|
388
|
+
|
395
389
|
def has_download_permissions(self) -> bool:
|
396
390
|
"""Returns false if this user is a Network Viewer or PACT User."""
|
397
391
|
return self.role not in (
|
@@ -434,9 +428,7 @@ class Users(CommonColumns):
|
|
434
428
|
user = Users.find_by_email(email)
|
435
429
|
if not user:
|
436
430
|
logger.info("Creating new user with email %s", email)
|
437
|
-
user = Users(
|
438
|
-
email=email, contact_email=email, first_n=first_n, last_n=last_n
|
439
|
-
)
|
431
|
+
user = Users(email=email, contact_email=email, first_n=first_n, last_n=last_n)
|
440
432
|
user.insert(session=session)
|
441
433
|
return user
|
442
434
|
|
@@ -450,9 +442,7 @@ class Users(CommonColumns):
|
|
450
442
|
user_inactivity_cutoff = datetime.today() - timedelta(days=INACTIVE_USER_DAYS)
|
451
443
|
update_query = (
|
452
444
|
update(Users)
|
453
|
-
.where(
|
454
|
-
and_(Users._accessed < user_inactivity_cutoff, Users.disabled == False)
|
455
|
-
)
|
445
|
+
.where(and_(Users._accessed < user_inactivity_cutoff, Users.disabled == False))
|
456
446
|
.values(disabled=True)
|
457
447
|
.returning(Users.id)
|
458
448
|
)
|
@@ -460,9 +450,7 @@ class Users(CommonColumns):
|
|
460
450
|
if commit:
|
461
451
|
session.commit()
|
462
452
|
|
463
|
-
disabled_users = [
|
464
|
-
Users.find_by_id(uid, session=session) for uid in disabled_user_ids
|
465
|
-
]
|
453
|
+
disabled_users = [Users.find_by_id(uid, session=session) for uid in disabled_user_ids]
|
466
454
|
for u in disabled_users:
|
467
455
|
Permissions.revoke_user_permissions(u, session=session)
|
468
456
|
revoke_bigquery_access(u.email)
|
@@ -501,15 +489,13 @@ class Users(CommonColumns):
|
|
501
489
|
.union_all(
|
502
490
|
# Handle admins separately, since they can view all data for all
|
503
491
|
# trials even if they have no permissions assigned to them.
|
504
|
-
session.query(
|
505
|
-
|
506
|
-
)
|
492
|
+
session.query(*user_columns, TrialMetadata.trial_id, literal("*,clinical_data")).filter(
|
493
|
+
Users.role == CIDCRole.ADMIN.value
|
494
|
+
)
|
507
495
|
)
|
508
496
|
)
|
509
497
|
|
510
|
-
df = pd.DataFrame(
|
511
|
-
query, columns=["email", "organization", "role", "trial_id", "permissions"]
|
512
|
-
).fillna("*")
|
498
|
+
df = pd.DataFrame(query, columns=["email", "organization", "role", "trial_id", "permissions"]).fillna("*")
|
513
499
|
|
514
500
|
with pd.ExcelWriter(
|
515
501
|
io
|
@@ -630,19 +616,11 @@ class Permissions(CommonColumns):
|
|
630
616
|
|
631
617
|
NOTE: values provided to the `commit` argument will be ignored. This method always commits.
|
632
618
|
"""
|
633
|
-
if
|
634
|
-
|
635
|
-
and self.trial_id == self.EVERY
|
636
|
-
and not self.file_group_id
|
637
|
-
):
|
638
|
-
raise ValueError(
|
639
|
-
"A permission must have a trial id, upload type, or file group."
|
640
|
-
)
|
619
|
+
if self.upload_type == self.EVERY and self.trial_id == self.EVERY and not self.file_group_id:
|
620
|
+
raise ValueError("A permission must have a trial id, upload type, or file group.")
|
641
621
|
|
642
622
|
if self.file_group_id and self.upload_type != "file_group":
|
643
|
-
raise ValueError(
|
644
|
-
"If a permission has a file group, its upload_type must be set to file_group"
|
645
|
-
)
|
623
|
+
raise ValueError("If a permission has a file group, its upload_type must be set to file_group")
|
646
624
|
|
647
625
|
grantee = Users.find_by_id(self.granted_to_user, session=session)
|
648
626
|
if grantee is None:
|
@@ -724,11 +702,7 @@ class Permissions(CommonColumns):
|
|
724
702
|
super().insert(session=session, commit=True)
|
725
703
|
|
726
704
|
# Don't make any GCS changes if this user doesn't have download access, is disabled, or isn't approved
|
727
|
-
if (
|
728
|
-
not grantee.has_download_permissions()
|
729
|
-
or grantee.disabled
|
730
|
-
or grantee.approval_date is None
|
731
|
-
):
|
705
|
+
if not grantee.has_download_permissions() or grantee.disabled or grantee.approval_date is None:
|
732
706
|
# TODO: pact users do not have download permissions currently
|
733
707
|
return
|
734
708
|
|
@@ -736,9 +710,7 @@ class Permissions(CommonColumns):
|
|
736
710
|
# Grant ACL download permissions in GCS
|
737
711
|
|
738
712
|
if self.upload_type == "file_group":
|
739
|
-
Permissions.grant_download_access_to_file_group(
|
740
|
-
grantee.email, file_group
|
741
|
-
)
|
713
|
+
Permissions.grant_download_access_to_file_group(grantee.email, file_group)
|
742
714
|
else:
|
743
715
|
|
744
716
|
# if they have any download permissions, they need the CIDC Lister role
|
@@ -746,9 +718,7 @@ class Permissions(CommonColumns):
|
|
746
718
|
grant_download_access(grantee.email, self.trial_id, self.upload_type)
|
747
719
|
# Remove permissions staged for deletion, if any
|
748
720
|
for perm in perms_to_delete:
|
749
|
-
revoke_download_access(
|
750
|
-
grantee.email, perm.trial_id, perm.upload_type
|
751
|
-
)
|
721
|
+
revoke_download_access(grantee.email, perm.trial_id, perm.upload_type)
|
752
722
|
except Exception as e:
|
753
723
|
# Add back deleted permissions, if any
|
754
724
|
for perm in perms_to_delete:
|
@@ -760,9 +730,7 @@ class Permissions(CommonColumns):
|
|
760
730
|
raise IAMException("IAM grant failed.") from e
|
761
731
|
|
762
732
|
@with_default_session
|
763
|
-
def delete(
|
764
|
-
self, deleted_by: Union[Users, int], session: Session, commit: bool = True
|
765
|
-
) -> None:
|
733
|
+
def delete(self, deleted_by: Union[Users, int], session: Session, commit: bool = True) -> None:
|
766
734
|
"""
|
767
735
|
Delete this permission record from the database and revoke the corresponding IAM policy binding
|
768
736
|
on the GCS data bucket.
|
@@ -795,9 +763,7 @@ class Permissions(CommonColumns):
|
|
795
763
|
revoke_lister_access(grantee.email)
|
796
764
|
|
797
765
|
except Exception as e:
|
798
|
-
raise IAMException(
|
799
|
-
"IAM revoke failed, and permission db record not removed."
|
800
|
-
) from e
|
766
|
+
raise IAMException("IAM revoke failed, and permission db record not removed.") from e
|
801
767
|
|
802
768
|
info_message = f"admin-action: {deleted_by_user.email} removed from {grantee.email} the permission {self.upload_type or 'all assays'} on {self.trial_id or 'all trials'}"
|
803
769
|
logger.info(info_message)
|
@@ -841,10 +807,7 @@ class Permissions(CommonColumns):
|
|
841
807
|
# if getting EVERY, return all
|
842
808
|
| (upload_type == Permissions.EVERY)
|
843
809
|
# if permission is EVERY, don't return if looking for clinical_data
|
844
|
-
| (
|
845
|
-
(Permissions.upload_type == Permissions.EVERY)
|
846
|
-
& (upload_type != "clinical_data")
|
847
|
-
)
|
810
|
+
| ((Permissions.upload_type == Permissions.EVERY) & (upload_type != "clinical_data"))
|
848
811
|
)
|
849
812
|
),
|
850
813
|
)
|
@@ -864,23 +827,16 @@ class Permissions(CommonColumns):
|
|
864
827
|
permissions_list: List[Permissions] = []
|
865
828
|
for upload in upload_type:
|
866
829
|
permissions_list.extend(
|
867
|
-
Permissions.get_for_trial_type(
|
868
|
-
trial_id=trial_id, upload_type=upload, session=session
|
869
|
-
)
|
830
|
+
Permissions.get_for_trial_type(trial_id=trial_id, upload_type=upload, session=session)
|
870
831
|
)
|
871
832
|
|
872
|
-
permissions_dict: Dict[str, Dict[str, List[Permissions]]] = defaultdict(
|
873
|
-
lambda: defaultdict(list)
|
874
|
-
)
|
833
|
+
permissions_dict: Dict[str, Dict[str, List[Permissions]]] = defaultdict(lambda: defaultdict(list))
|
875
834
|
for perm in permissions_list:
|
876
835
|
permissions_dict[perm.trial_id][perm.upload_type].append(perm)
|
877
836
|
|
878
837
|
user_dict: Dict[str, Dict[str, List[Users]]] = {
|
879
838
|
trial: {
|
880
|
-
upload: [
|
881
|
-
Users.find_by_id(id=perm.granted_to_user, session=session)
|
882
|
-
for perm in perms
|
883
|
-
]
|
839
|
+
upload: [Users.find_by_id(id=perm.granted_to_user, session=session) for perm in perms]
|
884
840
|
for upload, perms in upload_dict.items()
|
885
841
|
}
|
886
842
|
for trial, upload_dict in permissions_dict.items()
|
@@ -895,11 +851,7 @@ class Permissions(CommonColumns):
|
|
895
851
|
for trial, upload_dict in user_dict.items()
|
896
852
|
}
|
897
853
|
# remove any trial that doesn't have any uploads in it
|
898
|
-
user_email_dict = {
|
899
|
-
trial: upload_dict
|
900
|
-
for trial, upload_dict in user_email_dict.items()
|
901
|
-
if len(upload_dict)
|
902
|
-
}
|
854
|
+
user_email_dict = {trial: upload_dict for trial, upload_dict in user_email_dict.items() if len(upload_dict)}
|
903
855
|
return user_email_dict
|
904
856
|
|
905
857
|
@staticmethod
|
@@ -919,14 +871,8 @@ class Permissions(CommonColumns):
|
|
919
871
|
session.query(Permissions)
|
920
872
|
.filter(
|
921
873
|
Permissions.granted_to_user == user_id,
|
922
|
-
(
|
923
|
-
|
924
|
-
& (Permissions.upload_type == upload_type)
|
925
|
-
)
|
926
|
-
| (
|
927
|
-
(Permissions.trial_id == Permissions.EVERY)
|
928
|
-
& (Permissions.upload_type == upload_type)
|
929
|
-
)
|
874
|
+
((Permissions.trial_id == trial_id) & (Permissions.upload_type == upload_type))
|
875
|
+
| ((Permissions.trial_id == Permissions.EVERY) & (Permissions.upload_type == upload_type))
|
930
876
|
| (
|
931
877
|
(Permissions.trial_id == trial_id)
|
932
878
|
# if permission is EVERY, don't return if looking for clinical_data
|
@@ -949,11 +895,7 @@ class Permissions(CommonColumns):
|
|
949
895
|
)
|
950
896
|
if results:
|
951
897
|
file_group_ids = {file_group.id for file_group in file_groups}
|
952
|
-
results = [
|
953
|
-
result
|
954
|
-
for result in results
|
955
|
-
if result.file_group_id in file_group_ids
|
956
|
-
]
|
898
|
+
results = [result for result in results if result.file_group_id in file_group_ids]
|
957
899
|
|
958
900
|
results = results and results or None
|
959
901
|
return results
|
@@ -997,9 +939,7 @@ class Permissions(CommonColumns):
|
|
997
939
|
upload_type=[p.upload_type for p in trial_perms],
|
998
940
|
)
|
999
941
|
for perm in file_group_perms:
|
1000
|
-
file_group: FileGroups = FileGroups.find_by_id(
|
1001
|
-
perm.file_group_id, session=session
|
1002
|
-
)
|
942
|
+
file_group: FileGroups = FileGroups.find_by_id(perm.file_group_id, session=session)
|
1003
943
|
Permissions.grant_download_access_to_file_group(user.email, file_group)
|
1004
944
|
|
1005
945
|
# Regrant all of the user's intake bucket upload permissions, if they have any
|
@@ -1039,9 +979,7 @@ class Permissions(CommonColumns):
|
|
1039
979
|
|
1040
980
|
@classmethod
|
1041
981
|
@with_default_session
|
1042
|
-
def grant_download_permissions_for_upload_job(
|
1043
|
-
cls, upload: "UploadJobs", session: Session
|
1044
|
-
) -> None:
|
982
|
+
def grant_download_permissions_for_upload_job(cls, upload: "UploadJobs", session: Session) -> None:
|
1045
983
|
"""
|
1046
984
|
For a given UploadJob, issue all relevant Permissions on Google
|
1047
985
|
Loads all cross-trial permissions for the upload_type
|
@@ -1059,21 +997,14 @@ class Permissions(CommonColumns):
|
|
1059
997
|
filters.append(cls.upload_type == upload.upload_type)
|
1060
998
|
else:
|
1061
999
|
# upload.upload_type can't be None
|
1062
|
-
filters.append(
|
1063
|
-
or_(cls.upload_type == upload.upload_type, cls.upload_type == None)
|
1064
|
-
)
|
1000
|
+
filters.append(or_(cls.upload_type == upload.upload_type, cls.upload_type == None))
|
1065
1001
|
|
1066
1002
|
perms = session.query(cls).filter(*filters).all()
|
1067
1003
|
user_email_list: List[str] = []
|
1068
1004
|
|
1069
1005
|
for perm in perms:
|
1070
1006
|
user = Users.find_by_id(perm.granted_to_user, session=session)
|
1071
|
-
if (
|
1072
|
-
user.is_admin()
|
1073
|
-
or user.is_nci_user()
|
1074
|
-
or user.disabled
|
1075
|
-
or user.email in user_email_list
|
1076
|
-
):
|
1007
|
+
if user.is_admin() or user.is_nci_user() or user.disabled or user.email in user_email_list:
|
1077
1008
|
continue
|
1078
1009
|
|
1079
1010
|
user_email_list.append(user.email)
|
@@ -1102,10 +1033,8 @@ class ValidationMultiError(Exception):
|
|
1102
1033
|
"""Holds multiple jsonschema.ValidationErrors"""
|
1103
1034
|
|
1104
1035
|
|
1105
|
-
trial_metadata_validator: json_validation._Validator = (
|
1106
|
-
|
1107
|
-
"clinical_trial.json", return_validator=True
|
1108
|
-
)
|
1036
|
+
trial_metadata_validator: json_validation._Validator = json_validation.load_and_validate_schema(
|
1037
|
+
"clinical_trial.json", return_validator=True
|
1109
1038
|
)
|
1110
1039
|
|
1111
1040
|
FileBundle = Dict[str, Dict[FilePurpose, List[int]]]
|
@@ -1121,13 +1050,13 @@ class TrialMetadata(CommonColumns):
|
|
1121
1050
|
_metadata_idx = Index("metadata_idx", metadata_json, postgresql_using="gin")
|
1122
1051
|
|
1123
1052
|
@staticmethod
|
1124
|
-
def validate_metadata_json(metadata_json: dict) -> dict:
|
1125
|
-
# Prior to running trial_metadata_validator.iter_error_messages on the metadata_json,
|
1126
|
-
# strip out unnecessary manifest data for the validation so that
|
1127
|
-
# that no longer conform to the post-CSMS-integration schema can be kept.
|
1053
|
+
def validate_metadata_json(metadata_json: dict, strip_metadata=True) -> dict:
|
1054
|
+
# Prior to running trial_metadata_validator.iter_error_messages on the metadata_json, if
|
1055
|
+
# strip_metadata=True, will strip out unnecessary manifest data for the validation so that
|
1056
|
+
# existing manifest data that no longer conform to the post-CSMS-integration schema can be kept.
|
1128
1057
|
# See more details in the strip_metadata_for_validation function docs.
|
1129
|
-
metadata_to_validate =
|
1130
|
-
metadata_json
|
1058
|
+
metadata_to_validate = (
|
1059
|
+
json_validation.strip_metadata_for_validation(metadata_json) if strip_metadata else metadata_json
|
1131
1060
|
)
|
1132
1061
|
errs = trial_metadata_validator.iter_error_messages(metadata_to_validate)
|
1133
1062
|
messages = list(f"'metadata_json': {err}" for err in errs)
|
@@ -1163,49 +1092,34 @@ class TrialMetadata(CommonColumns):
|
|
1163
1092
|
Find a trial by its CIMAC id.
|
1164
1093
|
"""
|
1165
1094
|
try:
|
1166
|
-
trial = (
|
1167
|
-
session.query(TrialMetadata)
|
1168
|
-
.filter_by(trial_id=trial_id)
|
1169
|
-
.with_for_update()
|
1170
|
-
.one()
|
1171
|
-
)
|
1095
|
+
trial = session.query(TrialMetadata).filter_by(trial_id=trial_id).with_for_update().one()
|
1172
1096
|
except NoResultFound as e:
|
1173
1097
|
raise NoResultFound(f"No trial found with id {trial_id}") from e
|
1174
1098
|
return trial
|
1175
1099
|
|
1176
1100
|
@staticmethod
|
1177
1101
|
@with_default_session
|
1178
|
-
def patch_assays(
|
1179
|
-
trial_id: str, assay_patch: dict, session: Session, commit: bool = False
|
1180
|
-
):
|
1102
|
+
def patch_assays(trial_id: str, assay_patch: dict, session: Session, commit: bool = False):
|
1181
1103
|
"""
|
1182
1104
|
Applies assay updates to the metadata object from the trial with id `trial_id`.
|
1183
1105
|
|
1184
1106
|
TODO: apply this update directly to the not-yet-existent TrialMetadata.manifest field
|
1185
1107
|
"""
|
1186
|
-
return TrialMetadata._patch_trial_metadata(
|
1187
|
-
trial_id, assay_patch, session=session, commit=commit
|
1188
|
-
)
|
1108
|
+
return TrialMetadata._patch_trial_metadata(trial_id, assay_patch, session=session, commit=commit)
|
1189
1109
|
|
1190
1110
|
@staticmethod
|
1191
1111
|
@with_default_session
|
1192
|
-
def patch_manifest(
|
1193
|
-
trial_id: str, manifest_patch: dict, session: Session, commit: bool = False
|
1194
|
-
):
|
1112
|
+
def patch_manifest(trial_id: str, manifest_patch: dict, session: Session, commit: bool = False):
|
1195
1113
|
"""
|
1196
1114
|
Applies manifest updates to the metadata object from the trial with id `trial_id`.
|
1197
1115
|
|
1198
1116
|
TODO: apply this update directly to the not-yet-existent TrialMetadata.assays field
|
1199
1117
|
"""
|
1200
|
-
return TrialMetadata._patch_trial_metadata(
|
1201
|
-
trial_id, manifest_patch, session=session, commit=commit
|
1202
|
-
)
|
1118
|
+
return TrialMetadata._patch_trial_metadata(trial_id, manifest_patch, session=session, commit=commit)
|
1203
1119
|
|
1204
1120
|
@staticmethod
|
1205
1121
|
@with_default_session
|
1206
|
-
def _patch_trial_metadata(
|
1207
|
-
trial_id: str, json_patch: dict, session: Session, commit: bool = False
|
1208
|
-
):
|
1122
|
+
def _patch_trial_metadata(trial_id: str, json_patch: dict, session: Session, commit: bool = False):
|
1209
1123
|
"""
|
1210
1124
|
Applies updates to the metadata object from the trial with id `trial_id`
|
1211
1125
|
and commits current session.
|
@@ -1217,9 +1131,7 @@ class TrialMetadata(CommonColumns):
|
|
1217
1131
|
trial = TrialMetadata.select_for_update_by_trial_id(trial_id, session=session)
|
1218
1132
|
|
1219
1133
|
# Merge assay metadata into the existing clinical trial metadata
|
1220
|
-
updated_metadata, errs = prism.merge_clinical_trial_metadata(
|
1221
|
-
json_patch, trial.metadata_json
|
1222
|
-
)
|
1134
|
+
updated_metadata, errs = prism.merge_clinical_trial_metadata(json_patch, trial.metadata_json)
|
1223
1135
|
if errs:
|
1224
1136
|
raise ValidationMultiError(errs)
|
1225
1137
|
# Save updates to trial record
|
@@ -1234,9 +1146,7 @@ class TrialMetadata(CommonColumns):
|
|
1234
1146
|
|
1235
1147
|
@staticmethod
|
1236
1148
|
@with_default_session
|
1237
|
-
def create(
|
1238
|
-
trial_id: str, metadata_json: dict, session: Session, commit: bool = True
|
1239
|
-
):
|
1149
|
+
def create(trial_id: str, metadata_json: dict, session: Session, commit: bool = True):
|
1240
1150
|
"""
|
1241
1151
|
Create a new clinical trial metadata record.
|
1242
1152
|
"""
|
@@ -1248,9 +1158,7 @@ class TrialMetadata(CommonColumns):
|
|
1248
1158
|
return trial
|
1249
1159
|
|
1250
1160
|
@staticmethod
|
1251
|
-
def merge_gcs_artifact(
|
1252
|
-
metadata: dict, upload_type: str, uuid: str, gcs_object: Blob
|
1253
|
-
):
|
1161
|
+
def merge_gcs_artifact(metadata: dict, upload_type: str, uuid: str, gcs_object: Blob):
|
1254
1162
|
return prism.merge_artifact(
|
1255
1163
|
ct=metadata,
|
1256
1164
|
assay_type=upload_type, # assay_type is the old name for upload_type
|
@@ -1263,9 +1171,7 @@ class TrialMetadata(CommonColumns):
|
|
1263
1171
|
)
|
1264
1172
|
|
1265
1173
|
@staticmethod
|
1266
|
-
def merge_gcs_artifacts(
|
1267
|
-
metadata: dict, upload_type: str, uuids_and_gcs_objects: List[Tuple[str, Blob]]
|
1268
|
-
):
|
1174
|
+
def merge_gcs_artifacts(metadata: dict, upload_type: str, uuids_and_gcs_objects: List[Tuple[str, Blob]]):
|
1269
1175
|
return prism.merge_artifacts(
|
1270
1176
|
metadata,
|
1271
1177
|
[
|
@@ -1354,32 +1260,16 @@ class TrialMetadata(CommonColumns):
|
|
1354
1260
|
subqueries = []
|
1355
1261
|
|
1356
1262
|
if include_file_bundles:
|
1357
|
-
|
1358
|
-
if user and not user.is_admin() and not user.is_nci_user():
|
1359
|
-
permissions = Permissions.find_for_user(user.id)
|
1360
|
-
# An 'empty' upload_type means full trial-level access
|
1361
|
-
allowed_upload_types = [
|
1362
|
-
p.upload_type for p in permissions if p.upload_type
|
1363
|
-
]
|
1364
|
-
logger.info(
|
1365
|
-
f"Restricting file bundle for user {user.id} to {allowed_upload_types=}"
|
1366
|
-
)
|
1263
|
+
file_bundle_query = DownloadableFiles.build_file_bundle_query()
|
1367
1264
|
|
1368
|
-
file_bundle_query = DownloadableFiles.build_file_bundle_query(
|
1369
|
-
allowed_upload_types
|
1370
|
-
)
|
1371
1265
|
columns.append(file_bundle_query.c.file_bundle)
|
1372
1266
|
subqueries.append(file_bundle_query)
|
1373
1267
|
|
1374
1268
|
if include_counts:
|
1375
1269
|
trial_summaries: List[dict] = cls.get_summaries()
|
1376
1270
|
|
1377
|
-
participant_counts: Dict[str, int] = {
|
1378
|
-
|
1379
|
-
}
|
1380
|
-
sample_counts: Dict[str, int] = {
|
1381
|
-
t["trial_id"]: t["total_samples"] for t in trial_summaries
|
1382
|
-
}
|
1271
|
+
participant_counts: Dict[str, int] = {t["trial_id"]: t["total_participants"] for t in trial_summaries}
|
1272
|
+
sample_counts: Dict[str, int] = {t["trial_id"]: t["total_samples"] for t in trial_summaries}
|
1383
1273
|
|
1384
1274
|
# Combine all query components
|
1385
1275
|
query = session.query(*columns)
|
@@ -1387,6 +1277,7 @@ class TrialMetadata(CommonColumns):
|
|
1387
1277
|
# Each subquery will have a trial_id column and one record per trial id
|
1388
1278
|
query = query.outerjoin(subquery, cls.trial_id == subquery.c.trial_id)
|
1389
1279
|
|
1280
|
+
query = query.order_by(cls.trial_id)
|
1390
1281
|
query = cls._add_pagination_filters(query, **pagination_args)
|
1391
1282
|
|
1392
1283
|
trials = []
|
@@ -1402,25 +1293,43 @@ class TrialMetadata(CommonColumns):
|
|
1402
1293
|
setattr(trial, column, value)
|
1403
1294
|
|
1404
1295
|
if include_counts:
|
1405
|
-
setattr(
|
1406
|
-
trial, "num_participants", participant_counts.get(trial.trial_id, 0)
|
1407
|
-
)
|
1296
|
+
setattr(trial, "num_participants", participant_counts.get(trial.trial_id, 0))
|
1408
1297
|
setattr(trial, "num_samples", sample_counts.get(trial.trial_id, 0))
|
1409
1298
|
|
1410
1299
|
if include_file_bundles and hasattr(trial, "file_bundle"):
|
1411
|
-
|
1300
|
+
# File bundle has all existing object ids. Remove ones that aren't allowed by permissions.
|
1301
|
+
|
1302
|
+
# Gather all object ids in the file bundle
|
1303
|
+
all_object_ids = set()
|
1304
|
+
|
1305
|
+
for assay, purposes in trial.file_bundle.items():
|
1306
|
+
for purpose, object_ids in purposes.items():
|
1307
|
+
all_object_ids = all_object_ids.union(object_ids)
|
1308
|
+
# Remove any impermissible object ids
|
1309
|
+
filtered_object_ids = DownloadableFiles.filter_object_ids_by_permissions(user, all_object_ids)
|
1310
|
+
logger.debug(f"Filtered object ids: {len(all_object_ids)} -> {len(filtered_object_ids)}")
|
1311
|
+
|
1312
|
+
for assay, purposes in trial.file_bundle.items():
|
1412
1313
|
size_results = {}
|
1413
|
-
for
|
1414
|
-
ids
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1418
|
-
|
1419
|
-
|
1420
|
-
)
|
1421
|
-
)
|
1314
|
+
for purpose, object_ids in purposes.items():
|
1315
|
+
# Only allow object ids that are permitted
|
1316
|
+
permitted_object_ids = list(set(object_ids).intersection(filtered_object_ids))
|
1317
|
+
trial.file_bundle[assay][purpose] = permitted_object_ids
|
1318
|
+
if permitted_object_ids:
|
1319
|
+
# For any files left in the purpose, get their total size
|
1320
|
+
filter_ = lambda q: q.filter(DownloadableFiles.id.in_(permitted_object_ids))
|
1321
|
+
size_results[f"{purpose}_size"] = DownloadableFiles.get_total_bytes(filter_=filter_)
|
1422
1322
|
trial.file_bundle[assay].update(size_results)
|
1423
1323
|
|
1324
|
+
# Trim the file bundle
|
1325
|
+
for assay, purposes in deepcopy(trial.file_bundle).items():
|
1326
|
+
for purpose, object_ids in purposes.items():
|
1327
|
+
if not object_ids:
|
1328
|
+
# No file ids left in the purpose after filtering. Remove this purpose from the bundle.
|
1329
|
+
del trial.file_bundle[assay][purpose]
|
1330
|
+
if not trial.file_bundle[assay]:
|
1331
|
+
del trial.file_bundle[assay]
|
1332
|
+
|
1424
1333
|
trials.append(trial)
|
1425
1334
|
|
1426
1335
|
return trials
|
@@ -2017,9 +1926,7 @@ class TrialMetadata(CommonColumns):
|
|
2017
1926
|
summaries_query = "SELECT result FROM trial_summaries_mv"
|
2018
1927
|
# Retrieve trial-level summary results from data cached in trial_summaries_mv materialized view.
|
2019
1928
|
# The source of the SQL query used in trial_summaries_mv is get_summaries_query()
|
2020
|
-
summaries = [
|
2021
|
-
summary for (summary,) in session.execute(summaries_query) if summary
|
2022
|
-
]
|
1929
|
+
summaries = [summary for (summary,) in session.execute(summaries_query) if summary]
|
2023
1930
|
|
2024
1931
|
# Shortcut to impute 0 values for assays where trials don't yet have data
|
2025
1932
|
summaries = pd.DataFrame(summaries).fillna(0).to_dict("records")
|
@@ -2037,9 +1944,7 @@ class UploadJobStatus(EnumBaseClass):
|
|
2037
1944
|
MERGE_FAILED = "merge-failed"
|
2038
1945
|
|
2039
1946
|
@classmethod
|
2040
|
-
def is_valid_transition(
|
2041
|
-
cls, current: str, target: str, is_manifest: bool = False
|
2042
|
-
) -> bool:
|
1947
|
+
def is_valid_transition(cls, current: str, target: str, is_manifest: bool = False) -> bool:
|
2043
1948
|
"""
|
2044
1949
|
Enforce logic about which state transitions are valid. E.g.,
|
2045
1950
|
an upload whose status is "merge-completed" should never be updated
|
@@ -2084,9 +1989,7 @@ class UploadJobs(CommonColumns):
|
|
2084
1989
|
)
|
2085
1990
|
|
2086
1991
|
# The current status of the upload job
|
2087
|
-
_status = Column(
|
2088
|
-
"status", Enum(*UPLOAD_STATUSES, name="upload_job_status"), nullable=False
|
2089
|
-
)
|
1992
|
+
_status = Column("status", Enum(*UPLOAD_STATUSES, name="upload_job_status"), nullable=False)
|
2090
1993
|
# A long, random identifier for this upload job
|
2091
1994
|
token = Column(UUID, server_default=text("gen_random_uuid()"), nullable=False)
|
2092
1995
|
# Text containing feedback on why the upload status is what it is
|
@@ -2108,9 +2011,7 @@ class UploadJobs(CommonColumns):
|
|
2108
2011
|
trial_id = Column(String, nullable=False, index=True)
|
2109
2012
|
|
2110
2013
|
# Create a GIN index on the GCS object names
|
2111
|
-
_gcs_objects_idx = Index(
|
2112
|
-
"upload_jobs_gcs_gcs_file_map_idx", gcs_file_map, postgresql_using="gin"
|
2113
|
-
)
|
2014
|
+
_gcs_objects_idx = Index("upload_jobs_gcs_gcs_file_map_idx", gcs_file_map, postgresql_using="gin")
|
2114
2015
|
|
2115
2016
|
@hybrid_property
|
2116
2017
|
def status(self):
|
@@ -2124,9 +2025,7 @@ class UploadJobs(CommonColumns):
|
|
2124
2025
|
old_status = self.status or UploadJobStatus.STARTED.value
|
2125
2026
|
is_manifest = self.upload_type in prism.SUPPORTED_MANIFESTS
|
2126
2027
|
if not UploadJobStatus.is_valid_transition(old_status, status, is_manifest):
|
2127
|
-
raise ValueError(
|
2128
|
-
f"Upload job with status {self.status} can't transition to status {status}"
|
2129
|
-
)
|
2028
|
+
raise ValueError(f"Upload job with status {self.status} can't transition to status {status}")
|
2130
2029
|
self._status = status
|
2131
2030
|
|
2132
2031
|
def _set_status_no_validation(self, status: str):
|
@@ -2166,9 +2065,7 @@ class UploadJobs(CommonColumns):
|
|
2166
2065
|
assert prism.PROTOCOL_ID_FIELD_NAME in metadata, "metadata must have a trial ID"
|
2167
2066
|
|
2168
2067
|
is_manifest_upload = upload_type in prism.SUPPORTED_MANIFESTS
|
2169
|
-
assert
|
2170
|
-
gcs_file_map is not None or is_manifest_upload
|
2171
|
-
), "assay/analysis uploads must have a gcs_file_map"
|
2068
|
+
assert gcs_file_map is not None or is_manifest_upload, "assay/analysis uploads must have a gcs_file_map"
|
2172
2069
|
|
2173
2070
|
trial_id = metadata[prism.PROTOCOL_ID_FIELD_NAME]
|
2174
2071
|
|
@@ -2219,9 +2116,7 @@ class UploadJobs(CommonColumns):
|
|
2219
2116
|
job.metadata_patch,
|
2220
2117
|
updated_artifact,
|
2221
2118
|
_,
|
2222
|
-
) = prism.merge_artifact_extra_metadata(
|
2223
|
-
job.metadata_patch, uuid, job.upload_type, file
|
2224
|
-
)
|
2119
|
+
) = prism.merge_artifact_extra_metadata(job.metadata_patch, uuid, job.upload_type, file)
|
2225
2120
|
logger.info("Updated md for %s: %s", uuid, updated_artifact.keys())
|
2226
2121
|
|
2227
2122
|
# A workaround fix for JSON field modifications not being tracked
|
@@ -2243,25 +2138,14 @@ class UploadJobs(CommonColumns):
|
|
2243
2138
|
@classmethod
|
2244
2139
|
@with_default_session
|
2245
2140
|
def find_first_manifest_job(cls, trial_id: str, session):
|
2246
|
-
return (
|
2247
|
-
session.query(UploadJobs)
|
2248
|
-
.filter_by(trial_id=trial_id, gcs_xlsx_uri="")
|
2249
|
-
.order_by(text("id ASC"))
|
2250
|
-
.first()
|
2251
|
-
)
|
2141
|
+
return session.query(UploadJobs).filter_by(trial_id=trial_id, gcs_xlsx_uri="").order_by(text("id ASC")).first()
|
2252
2142
|
|
2253
2143
|
@with_default_session
|
2254
|
-
def ingestion_success(
|
2255
|
-
self, trial, session: Session, commit: bool = False, send_email: bool = False
|
2256
|
-
):
|
2144
|
+
def ingestion_success(self, trial, session: Session, commit: bool = False, send_email: bool = False):
|
2257
2145
|
"""Set own status to reflect successful merge and trigger email notifying CIDC admins."""
|
2258
2146
|
# Do status update if the transition is valid
|
2259
|
-
if not UploadJobStatus.is_valid_transition(
|
2260
|
-
self.status
|
2261
|
-
):
|
2262
|
-
raise Exception(
|
2263
|
-
f"Cannot declare ingestion success given current status: {self.status}"
|
2264
|
-
)
|
2147
|
+
if not UploadJobStatus.is_valid_transition(self.status, UploadJobStatus.MERGE_COMPLETED.value):
|
2148
|
+
raise Exception(f"Cannot declare ingestion success given current status: {self.status}")
|
2265
2149
|
self.status = UploadJobStatus.MERGE_COMPLETED.value
|
2266
2150
|
|
2267
2151
|
if commit:
|
@@ -2277,11 +2161,7 @@ class FilesToFileGroups(BaseModel):
|
|
2277
2161
|
"""
|
2278
2162
|
|
2279
2163
|
__tablename__ = "files_to_file_groups"
|
2280
|
-
__table_args__ = (
|
2281
|
-
PrimaryKeyConstraint(
|
2282
|
-
"file_group_id", "file_id", name="pk_files_to_file_groups"
|
2283
|
-
),
|
2284
|
-
)
|
2164
|
+
__table_args__ = (PrimaryKeyConstraint("file_group_id", "file_id", name="pk_files_to_file_groups"),)
|
2285
2165
|
file_group_id = Column(ForeignKey("file_groups.id"), primary_key=True)
|
2286
2166
|
file_id = Column(ForeignKey("downloadable_files.id"), primary_key=True)
|
2287
2167
|
_created = Column(DateTime, default=func.now(), nullable=False)
|
@@ -2504,9 +2384,7 @@ class DownloadableFiles(CommonColumns):
|
|
2504
2384
|
return downloadable_files_for_query, files_to_file_groups_for_query
|
2505
2385
|
|
2506
2386
|
@classmethod
|
2507
|
-
def _convert_list_results(
|
2508
|
-
cls, downloadable_files_for_query: Table, query_files: List
|
2509
|
-
):
|
2387
|
+
def _convert_list_results(cls, downloadable_files_for_query: Table, query_files: List):
|
2510
2388
|
"""Converts the results of a SQLalchemy expression language query into actual DownloadableFiles
|
2511
2389
|
objects. This is necessary since the UI depends on some of the derived properties in
|
2512
2390
|
DownloadableFiles.
|
@@ -2544,9 +2422,7 @@ class DownloadableFiles(CommonColumns):
|
|
2544
2422
|
where_clauses.append(downloadable_files_for_query.c.trial_id.in_(trial_ids))
|
2545
2423
|
if facets:
|
2546
2424
|
facet_groups = get_facet_groups_for_paths(facets)
|
2547
|
-
where_clauses.append(
|
2548
|
-
downloadable_files_for_query.c.facet_group.in_(facet_groups)
|
2549
|
-
)
|
2425
|
+
where_clauses.append(downloadable_files_for_query.c.facet_group.in_(facet_groups))
|
2550
2426
|
|
2551
2427
|
if user and not is_admin:
|
2552
2428
|
permissions = Permissions.find_for_user(user.id)
|
@@ -2562,27 +2438,15 @@ class DownloadableFiles(CommonColumns):
|
|
2562
2438
|
elif permission.file_group_id is None:
|
2563
2439
|
where_clauses.append(
|
2564
2440
|
sql_and(
|
2565
|
-
(
|
2566
|
-
|
2567
|
-
== permission.trial_id
|
2568
|
-
),
|
2569
|
-
(
|
2570
|
-
downloadable_files_for_query.c.upload_type
|
2571
|
-
== permission.upload_type
|
2572
|
-
),
|
2441
|
+
(downloadable_files_for_query.c.trial_id == permission.trial_id),
|
2442
|
+
(downloadable_files_for_query.c.upload_type == permission.upload_type),
|
2573
2443
|
)
|
2574
2444
|
)
|
2575
2445
|
else:
|
2576
2446
|
where_clauses.append(
|
2577
2447
|
sql_and(
|
2578
|
-
(
|
2579
|
-
|
2580
|
-
== permission.trial_id
|
2581
|
-
),
|
2582
|
-
(
|
2583
|
-
files_to_file_groups_for_query.c.file_group_id
|
2584
|
-
== permission.file_group_id
|
2585
|
-
),
|
2448
|
+
(downloadable_files_for_query.c.trial_id == permission.trial_id),
|
2449
|
+
(files_to_file_groups_for_query.c.file_group_id == permission.file_group_id),
|
2586
2450
|
)
|
2587
2451
|
)
|
2588
2452
|
|
@@ -2596,9 +2460,7 @@ class DownloadableFiles(CommonColumns):
|
|
2596
2460
|
)
|
2597
2461
|
)
|
2598
2462
|
if full_type_perms:
|
2599
|
-
where_clauses.append(
|
2600
|
-
downloadable_files_for_query.c.upload_type.in_(full_type_perms)
|
2601
|
-
)
|
2463
|
+
where_clauses.append(downloadable_files_for_query.c.upload_type.in_(full_type_perms))
|
2602
2464
|
|
2603
2465
|
# Need to be careful about return logic. Empty results could be because the user
|
2604
2466
|
# is an admin, whereas None means the user has no permissions to view any files.
|
@@ -2607,6 +2469,55 @@ class DownloadableFiles(CommonColumns):
|
|
2607
2469
|
|
2608
2470
|
return None
|
2609
2471
|
|
2472
|
+
@classmethod
|
2473
|
+
def _generate_where_clause_with_permissions(cls, user: Users) -> BooleanClauseList:
|
2474
|
+
"""
|
2475
|
+
Returns a where clause for DownloadableFiles filtered down to only the files the user
|
2476
|
+
has access to based on their permissions and role.
|
2477
|
+
|
2478
|
+
The generated clause will have this form
|
2479
|
+
WHERE
|
2480
|
+
downloadable_files.trial_id IN ('x', 'y', ...) AND upload_type != 'clinical_data' <- trial-level permissions
|
2481
|
+
OR
|
2482
|
+
upload_type IN ('u', 'v', ...)) <- upload-type-level permissions
|
2483
|
+
OR
|
2484
|
+
trial_id = '5' AND upload_type = 'mif' <- regular permissions
|
2485
|
+
OR
|
2486
|
+
trial_id = '6' and upload_type = 'hande'
|
2487
|
+
...
|
2488
|
+
"""
|
2489
|
+
# From the perspective of viewing files, NCI Biobank users are admins.
|
2490
|
+
if user.is_admin_or_nci_user():
|
2491
|
+
return true() # Admin has full permissions to all
|
2492
|
+
|
2493
|
+
permissions = Permissions.find_for_user(user.id)
|
2494
|
+
|
2495
|
+
full_access_trial_ids = [p.trial_id for p in permissions if not p.upload_type]
|
2496
|
+
full_access_upload_types = [p.upload_type for p in permissions if not p.trial_id]
|
2497
|
+
regular_permissions = [p for p in permissions if p.trial_id and p.upload_type]
|
2498
|
+
|
2499
|
+
full_access_trial_clause = sql_and(
|
2500
|
+
DownloadableFiles.trial_id.in_(full_access_trial_ids),
|
2501
|
+
DownloadableFiles.upload_type != "clinical_data",
|
2502
|
+
)
|
2503
|
+
|
2504
|
+
full_access_upload_type_clause = sql_or(DownloadableFiles.upload_type.in_(full_access_upload_types))
|
2505
|
+
|
2506
|
+
regular_permission_clauses = [
|
2507
|
+
sql_and(
|
2508
|
+
DownloadableFiles.trial_id == p.trial_id,
|
2509
|
+
DownloadableFiles.upload_type == p.upload_type,
|
2510
|
+
)
|
2511
|
+
for p in regular_permissions
|
2512
|
+
]
|
2513
|
+
clause = sql_or(
|
2514
|
+
full_access_trial_clause,
|
2515
|
+
full_access_upload_type_clause,
|
2516
|
+
*regular_permission_clauses,
|
2517
|
+
)
|
2518
|
+
|
2519
|
+
return clause
|
2520
|
+
|
2610
2521
|
@classmethod
|
2611
2522
|
@with_default_session
|
2612
2523
|
def list_with_permissions(
|
@@ -2646,26 +2557,18 @@ class DownloadableFiles(CommonColumns):
|
|
2646
2557
|
if where_clauses:
|
2647
2558
|
|
2648
2559
|
# No where clause (the user is likely an admin).
|
2649
|
-
statement = select([downloadable_files_for_query]).select_from(
|
2650
|
-
downloadable_files_for_query
|
2651
|
-
)
|
2560
|
+
statement = select([downloadable_files_for_query]).select_from(downloadable_files_for_query)
|
2652
2561
|
|
2653
2562
|
else:
|
2654
2563
|
statement = (
|
2655
2564
|
select([downloadable_files_for_query])
|
2656
2565
|
.where(sql_and(*where_clauses))
|
2657
|
-
.select_from(
|
2658
|
-
downloadable_files_for_query.outerjoin(
|
2659
|
-
files_to_file_groups_for_query
|
2660
|
-
)
|
2661
|
-
)
|
2566
|
+
.select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
|
2662
2567
|
)
|
2663
2568
|
|
2664
2569
|
if sort_field:
|
2665
2570
|
sort_attribute = getattr(cls, sort_field)
|
2666
|
-
field_with_dir = (
|
2667
|
-
asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
|
2668
|
-
)
|
2571
|
+
field_with_dir = asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
|
2669
2572
|
statement = statement.order_by(field_with_dir)
|
2670
2573
|
|
2671
2574
|
# Enforce positive page numbers
|
@@ -2722,28 +2625,22 @@ class DownloadableFiles(CommonColumns):
|
|
2722
2625
|
if where_clauses:
|
2723
2626
|
|
2724
2627
|
# No where clause (the user is likely an admin).
|
2725
|
-
statement = select(
|
2726
|
-
|
2727
|
-
)
|
2628
|
+
statement = select([func.count(downloadable_files_for_query.c.id)]).select_from(
|
2629
|
+
downloadable_files_for_query
|
2630
|
+
)
|
2728
2631
|
|
2729
2632
|
else:
|
2730
2633
|
statement = (
|
2731
2634
|
select([func.count(downloadable_files_for_query.c.id)])
|
2732
2635
|
.where(sql_and(*where_clauses))
|
2733
|
-
.select_from(
|
2734
|
-
downloadable_files_for_query.outerjoin(
|
2735
|
-
files_to_file_groups_for_query
|
2736
|
-
)
|
2737
|
-
)
|
2636
|
+
.select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
|
2738
2637
|
)
|
2739
2638
|
|
2740
2639
|
return session.execute(statement).fetchone()[0]
|
2741
2640
|
|
2742
2641
|
@classmethod
|
2743
2642
|
@with_default_session
|
2744
|
-
def count_by_facet_with_permissions(
|
2745
|
-
cls, session: Session, trial_ids: List[str] = None, user: Users = None
|
2746
|
-
):
|
2643
|
+
def count_by_facet_with_permissions(cls, session: Session, trial_ids: List[str] = None, user: Users = None):
|
2747
2644
|
"""
|
2748
2645
|
Returns a map of facet_group to a count of the number of files that the given user
|
2749
2646
|
has permissions to view.
|
@@ -2787,11 +2684,7 @@ class DownloadableFiles(CommonColumns):
|
|
2787
2684
|
]
|
2788
2685
|
)
|
2789
2686
|
.where(sql_and(*where_clauses))
|
2790
|
-
.select_from(
|
2791
|
-
downloadable_files_for_query.outerjoin(
|
2792
|
-
files_to_file_groups_for_query
|
2793
|
-
)
|
2794
|
-
)
|
2687
|
+
.select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
|
2795
2688
|
)
|
2796
2689
|
|
2797
2690
|
statement = statement.group_by(downloadable_files_for_query.c.facet_group)
|
@@ -2835,35 +2728,35 @@ class DownloadableFiles(CommonColumns):
|
|
2835
2728
|
else:
|
2836
2729
|
statement = (
|
2837
2730
|
select([downloadable_files_for_query.c.object_url])
|
2838
|
-
.where(
|
2839
|
-
|
2840
|
-
)
|
2841
|
-
.select_from(
|
2842
|
-
downloadable_files_for_query.outerjoin(
|
2843
|
-
files_to_file_groups_for_query
|
2844
|
-
)
|
2845
|
-
)
|
2731
|
+
.where(sql_and(*where_clauses, downloadable_files_for_query.c.id.in_(ids)))
|
2732
|
+
.select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
|
2846
2733
|
)
|
2847
2734
|
|
2848
2735
|
return [row[0] for row in session.execute(statement).fetchall()]
|
2849
2736
|
|
2850
2737
|
@classmethod
|
2851
|
-
|
2852
|
-
|
2853
|
-
|
2738
|
+
@with_default_session
|
2739
|
+
def filter_object_ids_by_permissions(cls, user: Users, ids: Iterable[int], session: Session) -> Iterable[int]:
|
2740
|
+
"""
|
2741
|
+
Takes a list of object ids and filters it to return only those object ids the user has permission for.
|
2742
|
+
"""
|
2743
|
+
|
2744
|
+
where_clause = DownloadableFiles._generate_where_clause_with_permissions(user)
|
2745
|
+
statement = select([DownloadableFiles.id]).where(sql_and(DownloadableFiles.id.in_(ids), where_clause))
|
2746
|
+
|
2747
|
+
return [row[0] for row in session.execute(statement).fetchall()]
|
2748
|
+
|
2749
|
+
@classmethod
|
2750
|
+
def _generate_trial_file_counts(cls, downloadable_files: Iterable) -> Dict[str, int]:
|
2854
2751
|
results = defaultdict(lambda: 0)
|
2855
2752
|
for downloadable_file in downloadable_files:
|
2856
2753
|
if downloadable_file.data_category:
|
2857
|
-
results[downloadable_file.trial_id] =
|
2858
|
-
results[downloadable_file.trial_id] + 1
|
2859
|
-
)
|
2754
|
+
results[downloadable_file.trial_id] = results[downloadable_file.trial_id] + 1
|
2860
2755
|
return results
|
2861
2756
|
|
2862
2757
|
@classmethod
|
2863
2758
|
@with_default_session
|
2864
|
-
def remove_participants_and_samples_info_files(
|
2865
|
-
cls, trial_id: str, session: Session
|
2866
|
-
):
|
2759
|
+
def remove_participants_and_samples_info_files(cls, trial_id: str, session: Session):
|
2867
2760
|
"""
|
2868
2761
|
Remove participants info and samples info downloadable files
|
2869
2762
|
"""
|
@@ -2911,27 +2804,19 @@ class DownloadableFiles(CommonColumns):
|
|
2911
2804
|
if not where_clauses:
|
2912
2805
|
|
2913
2806
|
# No where clause (the user is likely an admin).
|
2914
|
-
statement = select([downloadable_files_for_query]).select_from(
|
2915
|
-
downloadable_files_for_query
|
2916
|
-
)
|
2807
|
+
statement = select([downloadable_files_for_query]).select_from(downloadable_files_for_query)
|
2917
2808
|
|
2918
2809
|
else:
|
2919
2810
|
statement = (
|
2920
2811
|
select([downloadable_files_for_query])
|
2921
2812
|
.where(sql_and(*where_clauses))
|
2922
|
-
.select_from(
|
2923
|
-
downloadable_files_for_query.outerjoin(
|
2924
|
-
files_to_file_groups_for_query
|
2925
|
-
)
|
2926
|
-
)
|
2813
|
+
.select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
|
2927
2814
|
)
|
2928
2815
|
|
2929
2816
|
downloadable_files = DownloadableFiles._convert_list_results(
|
2930
2817
|
downloadable_files_for_query, session.execute(statement).fetchall()
|
2931
2818
|
)
|
2932
|
-
trial_file_counts = DownloadableFiles._generate_trial_file_counts(
|
2933
|
-
downloadable_files
|
2934
|
-
)
|
2819
|
+
trial_file_counts = DownloadableFiles._generate_trial_file_counts(downloadable_files)
|
2935
2820
|
return build_trial_facets(trial_file_counts)
|
2936
2821
|
|
2937
2822
|
@with_default_session
|
@@ -2956,13 +2841,9 @@ class DownloadableFiles(CommonColumns):
|
|
2956
2841
|
"trial_id": self.trial_id,
|
2957
2842
|
"id": self.id,
|
2958
2843
|
}
|
2959
|
-
related_files = result_proxy_to_models(
|
2960
|
-
session.execute(query, params), DownloadableFiles
|
2961
|
-
)
|
2844
|
+
related_files = result_proxy_to_models(session.execute(query, params), DownloadableFiles)
|
2962
2845
|
else:
|
2963
|
-
not_sample_specific = not_(
|
2964
|
-
literal_column("additional_metadata::text").like('%.cimac_id":%')
|
2965
|
-
)
|
2846
|
+
not_sample_specific = not_(literal_column("additional_metadata::text").like('%.cimac_id":%'))
|
2966
2847
|
related_files = (
|
2967
2848
|
session.query(DownloadableFiles)
|
2968
2849
|
.filter(
|
@@ -3018,9 +2899,7 @@ class DownloadableFiles(CommonColumns):
|
|
3018
2899
|
full_type_perms.append(perm.upload_type)
|
3019
2900
|
else:
|
3020
2901
|
trial_type_perms.append((perm.trial_id, perm.upload_type))
|
3021
|
-
df_tuples = tuple_(
|
3022
|
-
DownloadableFiles.trial_id, DownloadableFiles.upload_type
|
3023
|
-
)
|
2902
|
+
df_tuples = tuple_(DownloadableFiles.trial_id, DownloadableFiles.upload_type)
|
3024
2903
|
file_filters.append(
|
3025
2904
|
or_(
|
3026
2905
|
# don't include clinical_data in cross-trial permission
|
@@ -3069,16 +2948,9 @@ class DownloadableFiles(CommonColumns):
|
|
3069
2948
|
etag = make_etag(filtered_metadata.values())
|
3070
2949
|
|
3071
2950
|
object_url = filtered_metadata["object_url"]
|
3072
|
-
df = (
|
3073
|
-
session.query(DownloadableFiles)
|
3074
|
-
.filter_by(object_url=object_url)
|
3075
|
-
.with_for_update()
|
3076
|
-
.first()
|
3077
|
-
)
|
2951
|
+
df = session.query(DownloadableFiles).filter_by(object_url=object_url).with_for_update().first()
|
3078
2952
|
if df:
|
3079
|
-
df = session.merge(
|
3080
|
-
DownloadableFiles(id=df.id, _etag=etag, **filtered_metadata)
|
3081
|
-
)
|
2953
|
+
df = session.merge(DownloadableFiles(id=df.id, _etag=etag, **filtered_metadata))
|
3082
2954
|
else:
|
3083
2955
|
df = DownloadableFiles(_etag=etag, **filtered_metadata)
|
3084
2956
|
|
@@ -3107,12 +2979,7 @@ class DownloadableFiles(CommonColumns):
|
|
3107
2979
|
"""
|
3108
2980
|
|
3109
2981
|
# trying to find existing one
|
3110
|
-
df = (
|
3111
|
-
session.query(DownloadableFiles)
|
3112
|
-
.filter_by(object_url=blob.name)
|
3113
|
-
.with_for_update()
|
3114
|
-
.first()
|
3115
|
-
)
|
2982
|
+
df = session.query(DownloadableFiles).filter_by(object_url=blob.name).with_for_update().first()
|
3116
2983
|
if not df:
|
3117
2984
|
df = DownloadableFiles()
|
3118
2985
|
|
@@ -3145,18 +3012,14 @@ class DownloadableFiles(CommonColumns):
|
|
3145
3012
|
|
3146
3013
|
@classmethod
|
3147
3014
|
@with_default_session
|
3148
|
-
def list_object_urls(
|
3149
|
-
cls, ids: List[int], session: Session, filter_: Callable[[Query], Query]
|
3150
|
-
) -> List[str]:
|
3015
|
+
def list_object_urls(cls, ids: List[int], session: Session, filter_: Callable[[Query], Query]) -> List[str]:
|
3151
3016
|
"""Get all object_urls for a batch of downloadable file record IDs"""
|
3152
3017
|
query = session.query(cls.object_url).filter(cls.id.in_(ids))
|
3153
3018
|
query = filter_(query)
|
3154
3019
|
return [r[0] for r in query.all()]
|
3155
3020
|
|
3156
3021
|
@classmethod
|
3157
|
-
def build_file_bundle_query(
|
3158
|
-
cls, allowed_upload_types: Optional[List[str]]
|
3159
|
-
) -> Query:
|
3022
|
+
def build_file_bundle_query(cls) -> Query:
|
3160
3023
|
"""
|
3161
3024
|
Build a query that selects nested file bundles from the downloadable files table.
|
3162
3025
|
The `file_bundles` query below should produce one bundle per unique `trial_id` that
|
@@ -3171,8 +3034,6 @@ class DownloadableFiles(CommonColumns):
|
|
3171
3034
|
}
|
3172
3035
|
```
|
3173
3036
|
where "type" is something like `"Olink"` or `"Participants Info"` and "purpose" is a `FilePurpose` string.
|
3174
|
-
|
3175
|
-
If `allowed_upload_types` is provided, the query will filter by files that only have an `upload_type` that appear in the list.
|
3176
3037
|
"""
|
3177
3038
|
tid_col, type_col, purp_col, ids_col, purps_col = (
|
3178
3039
|
literal_column("trial_id"),
|
@@ -3182,28 +3043,24 @@ class DownloadableFiles(CommonColumns):
|
|
3182
3043
|
literal_column("purposes"),
|
3183
3044
|
)
|
3184
3045
|
|
3185
|
-
id_bundles =
|
3186
|
-
|
3187
|
-
|
3188
|
-
|
3189
|
-
|
3190
|
-
|
3191
|
-
|
3192
|
-
|
3193
|
-
|
3194
|
-
|
3195
|
-
|
3196
|
-
|
3197
|
-
id_bundles = id_bundles.alias("id_bundles")
|
3198
|
-
|
3046
|
+
id_bundles = (
|
3047
|
+
select(
|
3048
|
+
[
|
3049
|
+
cls.trial_id,
|
3050
|
+
cls.data_category_prefix.label(type_col.key),
|
3051
|
+
cls.file_purpose.label(purp_col.key),
|
3052
|
+
func.json_agg(cls.id).label(ids_col.key),
|
3053
|
+
]
|
3054
|
+
)
|
3055
|
+
.group_by(cls.trial_id, cls.data_category_prefix, cls.file_purpose)
|
3056
|
+
.alias("id_bundles")
|
3057
|
+
)
|
3199
3058
|
purpose_bundles = (
|
3200
3059
|
select(
|
3201
3060
|
[
|
3202
3061
|
tid_col,
|
3203
3062
|
type_col,
|
3204
|
-
func.json_object_agg(
|
3205
|
-
func.coalesce(purp_col, "miscellaneous"), ids_col
|
3206
|
-
).label(purps_col.key),
|
3063
|
+
func.json_object_agg(func.coalesce(purp_col, "miscellaneous"), ids_col).label(purps_col.key),
|
3207
3064
|
]
|
3208
3065
|
)
|
3209
3066
|
.select_from(id_bundles)
|
@@ -3214,9 +3071,7 @@ class DownloadableFiles(CommonColumns):
|
|
3214
3071
|
select(
|
3215
3072
|
[
|
3216
3073
|
tid_col.label(tid_col.key),
|
3217
|
-
func.json_object_agg(
|
3218
|
-
func.coalesce(type_col, "other"), purps_col
|
3219
|
-
).label("file_bundle"),
|
3074
|
+
func.json_object_agg(func.coalesce(type_col, "other"), purps_col).label("file_bundle"),
|
3220
3075
|
]
|
3221
3076
|
)
|
3222
3077
|
.select_from(purpose_bundles)
|
@@ -3227,9 +3082,7 @@ class DownloadableFiles(CommonColumns):
|
|
3227
3082
|
|
3228
3083
|
@classmethod
|
3229
3084
|
@with_default_session
|
3230
|
-
def get_total_bytes(
|
3231
|
-
cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q
|
3232
|
-
) -> int:
|
3085
|
+
def get_total_bytes(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q) -> int:
|
3233
3086
|
"""Get the total number of bytes of data stored across all files."""
|
3234
3087
|
filtered_query = filter_(session.query(func.sum(cls.file_size_bytes)))
|
3235
3088
|
total_bytes = filtered_query.one()[0]
|
@@ -3238,9 +3091,7 @@ class DownloadableFiles(CommonColumns):
|
|
3238
3091
|
|
3239
3092
|
@classmethod
|
3240
3093
|
@with_default_session
|
3241
|
-
def get_trial_facets(
|
3242
|
-
cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q
|
3243
|
-
):
|
3094
|
+
def get_trial_facets(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q):
|
3244
3095
|
trial_file_counts = cls.count_by(
|
3245
3096
|
cls.trial_id,
|
3246
3097
|
session=session,
|
@@ -3254,12 +3105,8 @@ class DownloadableFiles(CommonColumns):
|
|
3254
3105
|
# TODO fix this
|
3255
3106
|
@classmethod
|
3256
3107
|
@with_default_session
|
3257
|
-
def get_data_category_facets(
|
3258
|
-
cls, session
|
3259
|
-
):
|
3260
|
-
facet_group_file_counts = cls.count_by(
|
3261
|
-
cls.facet_group, session=session, filter_=filter_
|
3262
|
-
)
|
3108
|
+
def get_data_category_facets(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q):
|
3109
|
+
facet_group_file_counts = cls.count_by(cls.facet_group, session=session, filter_=filter_)
|
3263
3110
|
data_category_facets = build_data_category_facets(facet_group_file_counts)
|
3264
3111
|
return data_category_facets
|
3265
3112
|
|
@@ -3275,10 +3122,7 @@ class DownloadableFiles(CommonColumns):
|
|
3275
3122
|
# Query clause for computing a downloadable file's data category.
|
3276
3123
|
# Used above in the DownloadableFiles.data_category computed property.
|
3277
3124
|
DATA_CATEGORY_CASE_CLAUSE = case(
|
3278
|
-
[
|
3279
|
-
(DownloadableFiles.facet_group == k, v)
|
3280
|
-
for k, v in facet_groups_to_categories.items()
|
3281
|
-
]
|
3125
|
+
[(DownloadableFiles.facet_group == k, v) for k, v in facet_groups_to_categories.items()]
|
3282
3126
|
)
|
3283
3127
|
|
3284
3128
|
# Query clause for computing a downloadable file's file purpose.
|
@@ -3291,9 +3135,7 @@ FILE_PURPOSE_CASE_CLAUSE = case(
|
|
3291
3135
|
)
|
3292
3136
|
|
3293
3137
|
|
3294
|
-
def result_proxy_to_models(
|
3295
|
-
result_proxy: ResultProxy, model: BaseModel
|
3296
|
-
) -> List[BaseModel]:
|
3138
|
+
def result_proxy_to_models(result_proxy: ResultProxy, model: BaseModel) -> List[BaseModel]:
|
3297
3139
|
"""Materialize a sqlalchemy `result_proxy` iterable as a list of `model` instances"""
|
3298
3140
|
return [model(**dict(row_proxy)) for row_proxy in result_proxy.all()]
|
3299
3141
|
|