nci-cidc-api-modules 1.1.12__py3-none-any.whl → 1.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cidc_api/config/db.py +2 -5
- cidc_api/config/logging.py +1 -5
- cidc_api/config/secrets.py +1 -3
- cidc_api/config/settings.py +2 -3
- cidc_api/csms/auth.py +1 -3
- cidc_api/models/csms_api.py +29 -97
- cidc_api/models/files/details.py +5 -15
- cidc_api/models/files/facets.py +9 -29
- cidc_api/models/models.py +215 -375
- cidc_api/shared/auth.py +3 -9
- cidc_api/shared/emails.py +8 -16
- cidc_api/shared/gcloud_client.py +33 -98
- cidc_api/shared/jose.py +1 -3
- cidc_api/shared/rest_utils.py +2 -6
- {nci_cidc_api_modules-1.1.12.dist-info → nci_cidc_api_modules-1.1.14.dist-info}/METADATA +2 -2
- nci_cidc_api_modules-1.1.14.dist-info/RECORD +26 -0
- nci_cidc_api_modules-1.1.12.dist-info/RECORD +0 -26
- {nci_cidc_api_modules-1.1.12.dist-info → nci_cidc_api_modules-1.1.14.dist-info}/WHEEL +0 -0
- {nci_cidc_api_modules-1.1.12.dist-info → nci_cidc_api_modules-1.1.14.dist-info}/licenses/LICENSE +0 -0
- {nci_cidc_api_modules-1.1.12.dist-info → nci_cidc_api_modules-1.1.14.dist-info}/top_level.txt +0 -0
cidc_api/models/models.py
CHANGED
@@ -28,6 +28,7 @@ import hashlib
|
|
28
28
|
import os
|
29
29
|
import re
|
30
30
|
from collections import defaultdict
|
31
|
+
from copy import deepcopy
|
31
32
|
from datetime import datetime, timedelta
|
32
33
|
from enum import Enum as EnumBaseClass
|
33
34
|
from functools import wraps
|
@@ -77,6 +78,7 @@ from sqlalchemy import (
|
|
77
78
|
or_,
|
78
79
|
Table,
|
79
80
|
MetaData,
|
81
|
+
true,
|
80
82
|
)
|
81
83
|
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
82
84
|
from sqlalchemy.engine import ResultProxy
|
@@ -92,10 +94,11 @@ from sqlalchemy.sql import (
|
|
92
94
|
# instead of the sqlalchemy.sql versions we are importing here. The solution is to
|
93
95
|
# break up this giant file.
|
94
96
|
and_ as sql_and,
|
95
|
-
|
97
|
+
or_ as sql_or,
|
96
98
|
# select, # ALREADY IMPORTED
|
97
99
|
text,
|
98
100
|
)
|
101
|
+
from sqlalchemy.sql.elements import BooleanClauseList
|
99
102
|
from sqlalchemy.sql.functions import coalesce
|
100
103
|
from werkzeug.exceptions import BadRequest
|
101
104
|
|
@@ -184,11 +187,7 @@ class CommonColumns(BaseModel): # type: ignore
|
|
184
187
|
if hasattr(b, "__table__"):
|
185
188
|
columns_to_check.extend(b.__table__.columns)
|
186
189
|
|
187
|
-
ret = {
|
188
|
-
c.name: getattr(self, c.name)
|
189
|
-
for c in columns_to_check
|
190
|
-
if hasattr(self, c.name)
|
191
|
-
}
|
190
|
+
ret = {c.name: getattr(self, c.name) for c in columns_to_check if hasattr(self, c.name)}
|
192
191
|
ret = {k: v for k, v in ret.items() if v is not None}
|
193
192
|
return ret
|
194
193
|
|
@@ -269,16 +268,12 @@ class CommonColumns(BaseModel): # type: ignore
|
|
269
268
|
if sort_field:
|
270
269
|
# Get the attribute from the class, in case this is a hybrid attribute
|
271
270
|
sort_attribute = getattr(cls, sort_field)
|
272
|
-
field_with_dir = (
|
273
|
-
asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
|
274
|
-
)
|
271
|
+
field_with_dir = asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
|
275
272
|
query = query.order_by(field_with_dir)
|
276
273
|
if sort_field != "id":
|
277
274
|
# When sorting, need to guarantee unique order for offset/limit pagination to produce
|
278
275
|
# consistent results. Adding secondary "id" sort field to ensure unique order.
|
279
|
-
secondary_field_with_dir = (
|
280
|
-
asc("id") if sort_direction == "asc" else desc("id")
|
281
|
-
)
|
276
|
+
secondary_field_with_dir = asc("id") if sort_direction == "asc" else desc("id")
|
282
277
|
query = query.order_by(secondary_field_with_dir)
|
283
278
|
|
284
279
|
# Apply filter function
|
@@ -299,9 +294,7 @@ class CommonColumns(BaseModel): # type: ignore
|
|
299
294
|
|
300
295
|
@classmethod
|
301
296
|
@with_default_session
|
302
|
-
def count_by(
|
303
|
-
cls, expr, session: Session, filter_: Callable[[Query], Query] = lambda q: q
|
304
|
-
) -> Dict[str, int]:
|
297
|
+
def count_by(cls, expr, session: Session, filter_: Callable[[Query], Query] = lambda q: q) -> Dict[str, int]:
|
305
298
|
"""
|
306
299
|
Return a dictionary mapping results of `expr` to the number of times each result
|
307
300
|
occurs in the table related to this model. E.g., for the `UploadJobs` model,
|
@@ -326,9 +319,7 @@ class CommonColumns(BaseModel): # type: ignore
|
|
326
319
|
filter_: Callable[[Query], Query] = lambda q: q,
|
327
320
|
):
|
328
321
|
"""Get a list of distinct values for the given column."""
|
329
|
-
assert (
|
330
|
-
column_name in cls.__table__.columns.keys()
|
331
|
-
), f"{cls.__tablename__} has no column {column_name}"
|
322
|
+
assert column_name in cls.__table__.columns.keys(), f"{cls.__tablename__} has no column {column_name}"
|
332
323
|
|
333
324
|
base_query = session.query(getattr(cls, column_name))
|
334
325
|
filtered_query = filter_(base_query)
|
@@ -342,9 +333,7 @@ class CommonColumns(BaseModel): # type: ignore
|
|
342
333
|
@classmethod
|
343
334
|
def get_unique_columns(cls):
|
344
335
|
"""Get a list of all the unique columns in this table."""
|
345
|
-
return [
|
346
|
-
column for column in cls.__table__.c if column.unique or column.primary_key
|
347
|
-
]
|
336
|
+
return [column for column in cls.__table__.c if column.unique or column.primary_key]
|
348
337
|
|
349
338
|
|
350
339
|
class CIDCRole(EnumBaseClass):
|
@@ -392,6 +381,11 @@ class Users(CommonColumns):
|
|
392
381
|
"""Returns true if this user is an NCI Biobank user."""
|
393
382
|
return self.role == CIDCRole.NCI_BIOBANK_USER.value
|
394
383
|
|
384
|
+
def is_admin_or_nci_user(self) -> bool:
|
385
|
+
"""Returns true if this user is a CIDC admin or NCI Biobank user. These users
|
386
|
+
share full access to much of the system."""
|
387
|
+
return self.is_admin() or self.is_nci_user()
|
388
|
+
|
395
389
|
def has_download_permissions(self) -> bool:
|
396
390
|
"""Returns false if this user is a Network Viewer or PACT User."""
|
397
391
|
return self.role not in (
|
@@ -434,9 +428,7 @@ class Users(CommonColumns):
|
|
434
428
|
user = Users.find_by_email(email)
|
435
429
|
if not user:
|
436
430
|
logger.info("Creating new user with email %s", email)
|
437
|
-
user = Users(
|
438
|
-
email=email, contact_email=email, first_n=first_n, last_n=last_n
|
439
|
-
)
|
431
|
+
user = Users(email=email, contact_email=email, first_n=first_n, last_n=last_n)
|
440
432
|
user.insert(session=session)
|
441
433
|
return user
|
442
434
|
|
@@ -450,9 +442,7 @@ class Users(CommonColumns):
|
|
450
442
|
user_inactivity_cutoff = datetime.today() - timedelta(days=INACTIVE_USER_DAYS)
|
451
443
|
update_query = (
|
452
444
|
update(Users)
|
453
|
-
.where(
|
454
|
-
and_(Users._accessed < user_inactivity_cutoff, Users.disabled == False)
|
455
|
-
)
|
445
|
+
.where(and_(Users._accessed < user_inactivity_cutoff, Users.disabled == False))
|
456
446
|
.values(disabled=True)
|
457
447
|
.returning(Users.id)
|
458
448
|
)
|
@@ -460,9 +450,7 @@ class Users(CommonColumns):
|
|
460
450
|
if commit:
|
461
451
|
session.commit()
|
462
452
|
|
463
|
-
disabled_users = [
|
464
|
-
Users.find_by_id(uid, session=session) for uid in disabled_user_ids
|
465
|
-
]
|
453
|
+
disabled_users = [Users.find_by_id(uid, session=session) for uid in disabled_user_ids]
|
466
454
|
for u in disabled_users:
|
467
455
|
Permissions.revoke_user_permissions(u, session=session)
|
468
456
|
revoke_bigquery_access(u.email)
|
@@ -501,15 +489,13 @@ class Users(CommonColumns):
|
|
501
489
|
.union_all(
|
502
490
|
# Handle admins separately, since they can view all data for all
|
503
491
|
# trials even if they have no permissions assigned to them.
|
504
|
-
session.query(
|
505
|
-
|
506
|
-
)
|
492
|
+
session.query(*user_columns, TrialMetadata.trial_id, literal("*,clinical_data")).filter(
|
493
|
+
Users.role == CIDCRole.ADMIN.value
|
494
|
+
)
|
507
495
|
)
|
508
496
|
)
|
509
497
|
|
510
|
-
df = pd.DataFrame(
|
511
|
-
query, columns=["email", "organization", "role", "trial_id", "permissions"]
|
512
|
-
).fillna("*")
|
498
|
+
df = pd.DataFrame(query, columns=["email", "organization", "role", "trial_id", "permissions"]).fillna("*")
|
513
499
|
|
514
500
|
with pd.ExcelWriter(
|
515
501
|
io
|
@@ -630,19 +616,11 @@ class Permissions(CommonColumns):
|
|
630
616
|
|
631
617
|
NOTE: values provided to the `commit` argument will be ignored. This method always commits.
|
632
618
|
"""
|
633
|
-
if
|
634
|
-
|
635
|
-
and self.trial_id == self.EVERY
|
636
|
-
and not self.file_group_id
|
637
|
-
):
|
638
|
-
raise ValueError(
|
639
|
-
"A permission must have a trial id, upload type, or file group."
|
640
|
-
)
|
619
|
+
if self.upload_type == self.EVERY and self.trial_id == self.EVERY and not self.file_group_id:
|
620
|
+
raise ValueError("A permission must have a trial id, upload type, or file group.")
|
641
621
|
|
642
622
|
if self.file_group_id and self.upload_type != "file_group":
|
643
|
-
raise ValueError(
|
644
|
-
"If a permission has a file group, its upload_type must be set to file_group"
|
645
|
-
)
|
623
|
+
raise ValueError("If a permission has a file group, its upload_type must be set to file_group")
|
646
624
|
|
647
625
|
grantee = Users.find_by_id(self.granted_to_user, session=session)
|
648
626
|
if grantee is None:
|
@@ -724,11 +702,7 @@ class Permissions(CommonColumns):
|
|
724
702
|
super().insert(session=session, commit=True)
|
725
703
|
|
726
704
|
# Don't make any GCS changes if this user doesn't have download access, is disabled, or isn't approved
|
727
|
-
if (
|
728
|
-
not grantee.has_download_permissions()
|
729
|
-
or grantee.disabled
|
730
|
-
or grantee.approval_date is None
|
731
|
-
):
|
705
|
+
if not grantee.has_download_permissions() or grantee.disabled or grantee.approval_date is None:
|
732
706
|
# TODO: pact users do not have download permissions currently
|
733
707
|
return
|
734
708
|
|
@@ -736,9 +710,7 @@ class Permissions(CommonColumns):
|
|
736
710
|
# Grant ACL download permissions in GCS
|
737
711
|
|
738
712
|
if self.upload_type == "file_group":
|
739
|
-
Permissions.grant_download_access_to_file_group(
|
740
|
-
grantee.email, file_group
|
741
|
-
)
|
713
|
+
Permissions.grant_download_access_to_file_group(grantee.email, file_group)
|
742
714
|
else:
|
743
715
|
|
744
716
|
# if they have any download permissions, they need the CIDC Lister role
|
@@ -746,9 +718,7 @@ class Permissions(CommonColumns):
|
|
746
718
|
grant_download_access(grantee.email, self.trial_id, self.upload_type)
|
747
719
|
# Remove permissions staged for deletion, if any
|
748
720
|
for perm in perms_to_delete:
|
749
|
-
revoke_download_access(
|
750
|
-
grantee.email, perm.trial_id, perm.upload_type
|
751
|
-
)
|
721
|
+
revoke_download_access(grantee.email, perm.trial_id, perm.upload_type)
|
752
722
|
except Exception as e:
|
753
723
|
# Add back deleted permissions, if any
|
754
724
|
for perm in perms_to_delete:
|
@@ -760,9 +730,7 @@ class Permissions(CommonColumns):
|
|
760
730
|
raise IAMException("IAM grant failed.") from e
|
761
731
|
|
762
732
|
@with_default_session
|
763
|
-
def delete(
|
764
|
-
self, deleted_by: Union[Users, int], session: Session, commit: bool = True
|
765
|
-
) -> None:
|
733
|
+
def delete(self, deleted_by: Union[Users, int], session: Session, commit: bool = True) -> None:
|
766
734
|
"""
|
767
735
|
Delete this permission record from the database and revoke the corresponding IAM policy binding
|
768
736
|
on the GCS data bucket.
|
@@ -795,9 +763,7 @@ class Permissions(CommonColumns):
|
|
795
763
|
revoke_lister_access(grantee.email)
|
796
764
|
|
797
765
|
except Exception as e:
|
798
|
-
raise IAMException(
|
799
|
-
"IAM revoke failed, and permission db record not removed."
|
800
|
-
) from e
|
766
|
+
raise IAMException("IAM revoke failed, and permission db record not removed.") from e
|
801
767
|
|
802
768
|
info_message = f"admin-action: {deleted_by_user.email} removed from {grantee.email} the permission {self.upload_type or 'all assays'} on {self.trial_id or 'all trials'}"
|
803
769
|
logger.info(info_message)
|
@@ -841,10 +807,7 @@ class Permissions(CommonColumns):
|
|
841
807
|
# if getting EVERY, return all
|
842
808
|
| (upload_type == Permissions.EVERY)
|
843
809
|
# if permission is EVERY, don't return if looking for clinical_data
|
844
|
-
| (
|
845
|
-
(Permissions.upload_type == Permissions.EVERY)
|
846
|
-
& (upload_type != "clinical_data")
|
847
|
-
)
|
810
|
+
| ((Permissions.upload_type == Permissions.EVERY) & (upload_type != "clinical_data"))
|
848
811
|
)
|
849
812
|
),
|
850
813
|
)
|
@@ -864,23 +827,16 @@ class Permissions(CommonColumns):
|
|
864
827
|
permissions_list: List[Permissions] = []
|
865
828
|
for upload in upload_type:
|
866
829
|
permissions_list.extend(
|
867
|
-
Permissions.get_for_trial_type(
|
868
|
-
trial_id=trial_id, upload_type=upload, session=session
|
869
|
-
)
|
830
|
+
Permissions.get_for_trial_type(trial_id=trial_id, upload_type=upload, session=session)
|
870
831
|
)
|
871
832
|
|
872
|
-
permissions_dict: Dict[str, Dict[str, List[Permissions]]] = defaultdict(
|
873
|
-
lambda: defaultdict(list)
|
874
|
-
)
|
833
|
+
permissions_dict: Dict[str, Dict[str, List[Permissions]]] = defaultdict(lambda: defaultdict(list))
|
875
834
|
for perm in permissions_list:
|
876
835
|
permissions_dict[perm.trial_id][perm.upload_type].append(perm)
|
877
836
|
|
878
837
|
user_dict: Dict[str, Dict[str, List[Users]]] = {
|
879
838
|
trial: {
|
880
|
-
upload: [
|
881
|
-
Users.find_by_id(id=perm.granted_to_user, session=session)
|
882
|
-
for perm in perms
|
883
|
-
]
|
839
|
+
upload: [Users.find_by_id(id=perm.granted_to_user, session=session) for perm in perms]
|
884
840
|
for upload, perms in upload_dict.items()
|
885
841
|
}
|
886
842
|
for trial, upload_dict in permissions_dict.items()
|
@@ -895,11 +851,7 @@ class Permissions(CommonColumns):
|
|
895
851
|
for trial, upload_dict in user_dict.items()
|
896
852
|
}
|
897
853
|
# remove any trial that doesn't have any uploads in it
|
898
|
-
user_email_dict = {
|
899
|
-
trial: upload_dict
|
900
|
-
for trial, upload_dict in user_email_dict.items()
|
901
|
-
if len(upload_dict)
|
902
|
-
}
|
854
|
+
user_email_dict = {trial: upload_dict for trial, upload_dict in user_email_dict.items() if len(upload_dict)}
|
903
855
|
return user_email_dict
|
904
856
|
|
905
857
|
@staticmethod
|
@@ -919,14 +871,8 @@ class Permissions(CommonColumns):
|
|
919
871
|
session.query(Permissions)
|
920
872
|
.filter(
|
921
873
|
Permissions.granted_to_user == user_id,
|
922
|
-
(
|
923
|
-
|
924
|
-
& (Permissions.upload_type == upload_type)
|
925
|
-
)
|
926
|
-
| (
|
927
|
-
(Permissions.trial_id == Permissions.EVERY)
|
928
|
-
& (Permissions.upload_type == upload_type)
|
929
|
-
)
|
874
|
+
((Permissions.trial_id == trial_id) & (Permissions.upload_type == upload_type))
|
875
|
+
| ((Permissions.trial_id == Permissions.EVERY) & (Permissions.upload_type == upload_type))
|
930
876
|
| (
|
931
877
|
(Permissions.trial_id == trial_id)
|
932
878
|
# if permission is EVERY, don't return if looking for clinical_data
|
@@ -949,11 +895,7 @@ class Permissions(CommonColumns):
|
|
949
895
|
)
|
950
896
|
if results:
|
951
897
|
file_group_ids = {file_group.id for file_group in file_groups}
|
952
|
-
results = [
|
953
|
-
result
|
954
|
-
for result in results
|
955
|
-
if result.file_group_id in file_group_ids
|
956
|
-
]
|
898
|
+
results = [result for result in results if result.file_group_id in file_group_ids]
|
957
899
|
|
958
900
|
results = results and results or None
|
959
901
|
return results
|
@@ -997,9 +939,7 @@ class Permissions(CommonColumns):
|
|
997
939
|
upload_type=[p.upload_type for p in trial_perms],
|
998
940
|
)
|
999
941
|
for perm in file_group_perms:
|
1000
|
-
file_group: FileGroups = FileGroups.find_by_id(
|
1001
|
-
perm.file_group_id, session=session
|
1002
|
-
)
|
942
|
+
file_group: FileGroups = FileGroups.find_by_id(perm.file_group_id, session=session)
|
1003
943
|
Permissions.grant_download_access_to_file_group(user.email, file_group)
|
1004
944
|
|
1005
945
|
# Regrant all of the user's intake bucket upload permissions, if they have any
|
@@ -1039,9 +979,7 @@ class Permissions(CommonColumns):
|
|
1039
979
|
|
1040
980
|
@classmethod
|
1041
981
|
@with_default_session
|
1042
|
-
def grant_download_permissions_for_upload_job(
|
1043
|
-
cls, upload: "UploadJobs", session: Session
|
1044
|
-
) -> None:
|
982
|
+
def grant_download_permissions_for_upload_job(cls, upload: "UploadJobs", session: Session) -> None:
|
1045
983
|
"""
|
1046
984
|
For a given UploadJob, issue all relevant Permissions on Google
|
1047
985
|
Loads all cross-trial permissions for the upload_type
|
@@ -1059,21 +997,14 @@ class Permissions(CommonColumns):
|
|
1059
997
|
filters.append(cls.upload_type == upload.upload_type)
|
1060
998
|
else:
|
1061
999
|
# upload.upload_type can't be None
|
1062
|
-
filters.append(
|
1063
|
-
or_(cls.upload_type == upload.upload_type, cls.upload_type == None)
|
1064
|
-
)
|
1000
|
+
filters.append(or_(cls.upload_type == upload.upload_type, cls.upload_type == None))
|
1065
1001
|
|
1066
1002
|
perms = session.query(cls).filter(*filters).all()
|
1067
1003
|
user_email_list: List[str] = []
|
1068
1004
|
|
1069
1005
|
for perm in perms:
|
1070
1006
|
user = Users.find_by_id(perm.granted_to_user, session=session)
|
1071
|
-
if (
|
1072
|
-
user.is_admin()
|
1073
|
-
or user.is_nci_user()
|
1074
|
-
or user.disabled
|
1075
|
-
or user.email in user_email_list
|
1076
|
-
):
|
1007
|
+
if user.is_admin() or user.is_nci_user() or user.disabled or user.email in user_email_list:
|
1077
1008
|
continue
|
1078
1009
|
|
1079
1010
|
user_email_list.append(user.email)
|
@@ -1102,10 +1033,8 @@ class ValidationMultiError(Exception):
|
|
1102
1033
|
"""Holds multiple jsonschema.ValidationErrors"""
|
1103
1034
|
|
1104
1035
|
|
1105
|
-
trial_metadata_validator: json_validation._Validator = (
|
1106
|
-
|
1107
|
-
"clinical_trial.json", return_validator=True
|
1108
|
-
)
|
1036
|
+
trial_metadata_validator: json_validation._Validator = json_validation.load_and_validate_schema(
|
1037
|
+
"clinical_trial.json", return_validator=True
|
1109
1038
|
)
|
1110
1039
|
|
1111
1040
|
FileBundle = Dict[str, Dict[FilePurpose, List[int]]]
|
@@ -1127,9 +1056,7 @@ class TrialMetadata(CommonColumns):
|
|
1127
1056
|
# existing manifest data that no longer conform to the post-CSMS-integration schema can be kept.
|
1128
1057
|
# See more details in the strip_metadata_for_validation function docs.
|
1129
1058
|
metadata_to_validate = (
|
1130
|
-
json_validation.strip_metadata_for_validation(metadata_json)
|
1131
|
-
if strip_metadata
|
1132
|
-
else metadata_json
|
1059
|
+
json_validation.strip_metadata_for_validation(metadata_json) if strip_metadata else metadata_json
|
1133
1060
|
)
|
1134
1061
|
errs = trial_metadata_validator.iter_error_messages(metadata_to_validate)
|
1135
1062
|
messages = list(f"'metadata_json': {err}" for err in errs)
|
@@ -1165,49 +1092,34 @@ class TrialMetadata(CommonColumns):
|
|
1165
1092
|
Find a trial by its CIMAC id.
|
1166
1093
|
"""
|
1167
1094
|
try:
|
1168
|
-
trial = (
|
1169
|
-
session.query(TrialMetadata)
|
1170
|
-
.filter_by(trial_id=trial_id)
|
1171
|
-
.with_for_update()
|
1172
|
-
.one()
|
1173
|
-
)
|
1095
|
+
trial = session.query(TrialMetadata).filter_by(trial_id=trial_id).with_for_update().one()
|
1174
1096
|
except NoResultFound as e:
|
1175
1097
|
raise NoResultFound(f"No trial found with id {trial_id}") from e
|
1176
1098
|
return trial
|
1177
1099
|
|
1178
1100
|
@staticmethod
|
1179
1101
|
@with_default_session
|
1180
|
-
def patch_assays(
|
1181
|
-
trial_id: str, assay_patch: dict, session: Session, commit: bool = False
|
1182
|
-
):
|
1102
|
+
def patch_assays(trial_id: str, assay_patch: dict, session: Session, commit: bool = False):
|
1183
1103
|
"""
|
1184
1104
|
Applies assay updates to the metadata object from the trial with id `trial_id`.
|
1185
1105
|
|
1186
1106
|
TODO: apply this update directly to the not-yet-existent TrialMetadata.manifest field
|
1187
1107
|
"""
|
1188
|
-
return TrialMetadata._patch_trial_metadata(
|
1189
|
-
trial_id, assay_patch, session=session, commit=commit
|
1190
|
-
)
|
1108
|
+
return TrialMetadata._patch_trial_metadata(trial_id, assay_patch, session=session, commit=commit)
|
1191
1109
|
|
1192
1110
|
@staticmethod
|
1193
1111
|
@with_default_session
|
1194
|
-
def patch_manifest(
|
1195
|
-
trial_id: str, manifest_patch: dict, session: Session, commit: bool = False
|
1196
|
-
):
|
1112
|
+
def patch_manifest(trial_id: str, manifest_patch: dict, session: Session, commit: bool = False):
|
1197
1113
|
"""
|
1198
1114
|
Applies manifest updates to the metadata object from the trial with id `trial_id`.
|
1199
1115
|
|
1200
1116
|
TODO: apply this update directly to the not-yet-existent TrialMetadata.assays field
|
1201
1117
|
"""
|
1202
|
-
return TrialMetadata._patch_trial_metadata(
|
1203
|
-
trial_id, manifest_patch, session=session, commit=commit
|
1204
|
-
)
|
1118
|
+
return TrialMetadata._patch_trial_metadata(trial_id, manifest_patch, session=session, commit=commit)
|
1205
1119
|
|
1206
1120
|
@staticmethod
|
1207
1121
|
@with_default_session
|
1208
|
-
def _patch_trial_metadata(
|
1209
|
-
trial_id: str, json_patch: dict, session: Session, commit: bool = False
|
1210
|
-
):
|
1122
|
+
def _patch_trial_metadata(trial_id: str, json_patch: dict, session: Session, commit: bool = False):
|
1211
1123
|
"""
|
1212
1124
|
Applies updates to the metadata object from the trial with id `trial_id`
|
1213
1125
|
and commits current session.
|
@@ -1219,9 +1131,7 @@ class TrialMetadata(CommonColumns):
|
|
1219
1131
|
trial = TrialMetadata.select_for_update_by_trial_id(trial_id, session=session)
|
1220
1132
|
|
1221
1133
|
# Merge assay metadata into the existing clinical trial metadata
|
1222
|
-
updated_metadata, errs = prism.merge_clinical_trial_metadata(
|
1223
|
-
json_patch, trial.metadata_json
|
1224
|
-
)
|
1134
|
+
updated_metadata, errs = prism.merge_clinical_trial_metadata(json_patch, trial.metadata_json)
|
1225
1135
|
if errs:
|
1226
1136
|
raise ValidationMultiError(errs)
|
1227
1137
|
# Save updates to trial record
|
@@ -1236,9 +1146,7 @@ class TrialMetadata(CommonColumns):
|
|
1236
1146
|
|
1237
1147
|
@staticmethod
|
1238
1148
|
@with_default_session
|
1239
|
-
def create(
|
1240
|
-
trial_id: str, metadata_json: dict, session: Session, commit: bool = True
|
1241
|
-
):
|
1149
|
+
def create(trial_id: str, metadata_json: dict, session: Session, commit: bool = True):
|
1242
1150
|
"""
|
1243
1151
|
Create a new clinical trial metadata record.
|
1244
1152
|
"""
|
@@ -1250,9 +1158,7 @@ class TrialMetadata(CommonColumns):
|
|
1250
1158
|
return trial
|
1251
1159
|
|
1252
1160
|
@staticmethod
|
1253
|
-
def merge_gcs_artifact(
|
1254
|
-
metadata: dict, upload_type: str, uuid: str, gcs_object: Blob
|
1255
|
-
):
|
1161
|
+
def merge_gcs_artifact(metadata: dict, upload_type: str, uuid: str, gcs_object: Blob):
|
1256
1162
|
return prism.merge_artifact(
|
1257
1163
|
ct=metadata,
|
1258
1164
|
assay_type=upload_type, # assay_type is the old name for upload_type
|
@@ -1265,9 +1171,7 @@ class TrialMetadata(CommonColumns):
|
|
1265
1171
|
)
|
1266
1172
|
|
1267
1173
|
@staticmethod
|
1268
|
-
def merge_gcs_artifacts(
|
1269
|
-
metadata: dict, upload_type: str, uuids_and_gcs_objects: List[Tuple[str, Blob]]
|
1270
|
-
):
|
1174
|
+
def merge_gcs_artifacts(metadata: dict, upload_type: str, uuids_and_gcs_objects: List[Tuple[str, Blob]]):
|
1271
1175
|
return prism.merge_artifacts(
|
1272
1176
|
metadata,
|
1273
1177
|
[
|
@@ -1356,32 +1260,16 @@ class TrialMetadata(CommonColumns):
|
|
1356
1260
|
subqueries = []
|
1357
1261
|
|
1358
1262
|
if include_file_bundles:
|
1359
|
-
|
1360
|
-
if user and not user.is_admin() and not user.is_nci_user():
|
1361
|
-
permissions = Permissions.find_for_user(user.id)
|
1362
|
-
# An 'empty' upload_type means full trial-level access
|
1363
|
-
allowed_upload_types = [
|
1364
|
-
p.upload_type for p in permissions if p.upload_type
|
1365
|
-
]
|
1366
|
-
logger.info(
|
1367
|
-
f"Restricting file bundle for user {user.id} to {allowed_upload_types=}"
|
1368
|
-
)
|
1263
|
+
file_bundle_query = DownloadableFiles.build_file_bundle_query()
|
1369
1264
|
|
1370
|
-
file_bundle_query = DownloadableFiles.build_file_bundle_query(
|
1371
|
-
allowed_upload_types
|
1372
|
-
)
|
1373
1265
|
columns.append(file_bundle_query.c.file_bundle)
|
1374
1266
|
subqueries.append(file_bundle_query)
|
1375
1267
|
|
1376
1268
|
if include_counts:
|
1377
1269
|
trial_summaries: List[dict] = cls.get_summaries()
|
1378
1270
|
|
1379
|
-
participant_counts: Dict[str, int] = {
|
1380
|
-
|
1381
|
-
}
|
1382
|
-
sample_counts: Dict[str, int] = {
|
1383
|
-
t["trial_id"]: t["total_samples"] for t in trial_summaries
|
1384
|
-
}
|
1271
|
+
participant_counts: Dict[str, int] = {t["trial_id"]: t["total_participants"] for t in trial_summaries}
|
1272
|
+
sample_counts: Dict[str, int] = {t["trial_id"]: t["total_samples"] for t in trial_summaries}
|
1385
1273
|
|
1386
1274
|
# Combine all query components
|
1387
1275
|
query = session.query(*columns)
|
@@ -1389,6 +1277,7 @@ class TrialMetadata(CommonColumns):
|
|
1389
1277
|
# Each subquery will have a trial_id column and one record per trial id
|
1390
1278
|
query = query.outerjoin(subquery, cls.trial_id == subquery.c.trial_id)
|
1391
1279
|
|
1280
|
+
query = query.order_by(cls.trial_id)
|
1392
1281
|
query = cls._add_pagination_filters(query, **pagination_args)
|
1393
1282
|
|
1394
1283
|
trials = []
|
@@ -1404,25 +1293,43 @@ class TrialMetadata(CommonColumns):
|
|
1404
1293
|
setattr(trial, column, value)
|
1405
1294
|
|
1406
1295
|
if include_counts:
|
1407
|
-
setattr(
|
1408
|
-
trial, "num_participants", participant_counts.get(trial.trial_id, 0)
|
1409
|
-
)
|
1296
|
+
setattr(trial, "num_participants", participant_counts.get(trial.trial_id, 0))
|
1410
1297
|
setattr(trial, "num_samples", sample_counts.get(trial.trial_id, 0))
|
1411
1298
|
|
1412
1299
|
if include_file_bundles and hasattr(trial, "file_bundle"):
|
1413
|
-
|
1300
|
+
# File bundle has all existing object ids. Remove ones that aren't allowed by permissions.
|
1301
|
+
|
1302
|
+
# Gather all object ids in the file bundle
|
1303
|
+
all_object_ids = set()
|
1304
|
+
|
1305
|
+
for assay, purposes in trial.file_bundle.items():
|
1306
|
+
for purpose, object_ids in purposes.items():
|
1307
|
+
all_object_ids = all_object_ids.union(object_ids)
|
1308
|
+
# Remove any impermissible object ids
|
1309
|
+
filtered_object_ids = DownloadableFiles.filter_object_ids_by_permissions(user, all_object_ids)
|
1310
|
+
logger.debug(f"Filtered object ids: {len(all_object_ids)} -> {len(filtered_object_ids)}")
|
1311
|
+
|
1312
|
+
for assay, purposes in trial.file_bundle.items():
|
1414
1313
|
size_results = {}
|
1415
|
-
for
|
1416
|
-
ids
|
1417
|
-
|
1418
|
-
|
1419
|
-
|
1420
|
-
|
1421
|
-
|
1422
|
-
)
|
1423
|
-
)
|
1314
|
+
for purpose, object_ids in purposes.items():
|
1315
|
+
# Only allow object ids that are permitted
|
1316
|
+
permitted_object_ids = list(set(object_ids).intersection(filtered_object_ids))
|
1317
|
+
trial.file_bundle[assay][purpose] = permitted_object_ids
|
1318
|
+
if permitted_object_ids:
|
1319
|
+
# For any files left in the purpose, get their total size
|
1320
|
+
filter_ = lambda q: q.filter(DownloadableFiles.id.in_(permitted_object_ids))
|
1321
|
+
size_results[f"{purpose}_size"] = DownloadableFiles.get_total_bytes(filter_=filter_)
|
1424
1322
|
trial.file_bundle[assay].update(size_results)
|
1425
1323
|
|
1324
|
+
# Trim the file bundle
|
1325
|
+
for assay, purposes in deepcopy(trial.file_bundle).items():
|
1326
|
+
for purpose, object_ids in purposes.items():
|
1327
|
+
if not object_ids:
|
1328
|
+
# No file ids left in the purpose after filtering. Remove this purpose from the bundle.
|
1329
|
+
del trial.file_bundle[assay][purpose]
|
1330
|
+
if not trial.file_bundle[assay]:
|
1331
|
+
del trial.file_bundle[assay]
|
1332
|
+
|
1426
1333
|
trials.append(trial)
|
1427
1334
|
|
1428
1335
|
return trials
|
@@ -2019,9 +1926,7 @@ class TrialMetadata(CommonColumns):
|
|
2019
1926
|
summaries_query = "SELECT result FROM trial_summaries_mv"
|
2020
1927
|
# Retrieve trial-level summary results from data cached in trial_summaries_mv materialized view.
|
2021
1928
|
# The source of the SQL query used in trial_summaries_mv is get_summaries_query()
|
2022
|
-
summaries = [
|
2023
|
-
summary for (summary,) in session.execute(summaries_query) if summary
|
2024
|
-
]
|
1929
|
+
summaries = [summary for (summary,) in session.execute(summaries_query) if summary]
|
2025
1930
|
|
2026
1931
|
# Shortcut to impute 0 values for assays where trials don't yet have data
|
2027
1932
|
summaries = pd.DataFrame(summaries).fillna(0).to_dict("records")
|
@@ -2039,9 +1944,7 @@ class UploadJobStatus(EnumBaseClass):
|
|
2039
1944
|
MERGE_FAILED = "merge-failed"
|
2040
1945
|
|
2041
1946
|
@classmethod
|
2042
|
-
def is_valid_transition(
|
2043
|
-
cls, current: str, target: str, is_manifest: bool = False
|
2044
|
-
) -> bool:
|
1947
|
+
def is_valid_transition(cls, current: str, target: str, is_manifest: bool = False) -> bool:
|
2045
1948
|
"""
|
2046
1949
|
Enforce logic about which state transitions are valid. E.g.,
|
2047
1950
|
an upload whose status is "merge-completed" should never be updated
|
@@ -2086,9 +1989,7 @@ class UploadJobs(CommonColumns):
|
|
2086
1989
|
)
|
2087
1990
|
|
2088
1991
|
# The current status of the upload job
|
2089
|
-
_status = Column(
|
2090
|
-
"status", Enum(*UPLOAD_STATUSES, name="upload_job_status"), nullable=False
|
2091
|
-
)
|
1992
|
+
_status = Column("status", Enum(*UPLOAD_STATUSES, name="upload_job_status"), nullable=False)
|
2092
1993
|
# A long, random identifier for this upload job
|
2093
1994
|
token = Column(UUID, server_default=text("gen_random_uuid()"), nullable=False)
|
2094
1995
|
# Text containing feedback on why the upload status is what it is
|
@@ -2110,9 +2011,7 @@ class UploadJobs(CommonColumns):
|
|
2110
2011
|
trial_id = Column(String, nullable=False, index=True)
|
2111
2012
|
|
2112
2013
|
# Create a GIN index on the GCS object names
|
2113
|
-
_gcs_objects_idx = Index(
|
2114
|
-
"upload_jobs_gcs_gcs_file_map_idx", gcs_file_map, postgresql_using="gin"
|
2115
|
-
)
|
2014
|
+
_gcs_objects_idx = Index("upload_jobs_gcs_gcs_file_map_idx", gcs_file_map, postgresql_using="gin")
|
2116
2015
|
|
2117
2016
|
@hybrid_property
|
2118
2017
|
def status(self):
|
@@ -2126,9 +2025,7 @@ class UploadJobs(CommonColumns):
|
|
2126
2025
|
old_status = self.status or UploadJobStatus.STARTED.value
|
2127
2026
|
is_manifest = self.upload_type in prism.SUPPORTED_MANIFESTS
|
2128
2027
|
if not UploadJobStatus.is_valid_transition(old_status, status, is_manifest):
|
2129
|
-
raise ValueError(
|
2130
|
-
f"Upload job with status {self.status} can't transition to status {status}"
|
2131
|
-
)
|
2028
|
+
raise ValueError(f"Upload job with status {self.status} can't transition to status {status}")
|
2132
2029
|
self._status = status
|
2133
2030
|
|
2134
2031
|
def _set_status_no_validation(self, status: str):
|
@@ -2168,9 +2065,7 @@ class UploadJobs(CommonColumns):
|
|
2168
2065
|
assert prism.PROTOCOL_ID_FIELD_NAME in metadata, "metadata must have a trial ID"
|
2169
2066
|
|
2170
2067
|
is_manifest_upload = upload_type in prism.SUPPORTED_MANIFESTS
|
2171
|
-
assert
|
2172
|
-
gcs_file_map is not None or is_manifest_upload
|
2173
|
-
), "assay/analysis uploads must have a gcs_file_map"
|
2068
|
+
assert gcs_file_map is not None or is_manifest_upload, "assay/analysis uploads must have a gcs_file_map"
|
2174
2069
|
|
2175
2070
|
trial_id = metadata[prism.PROTOCOL_ID_FIELD_NAME]
|
2176
2071
|
|
@@ -2221,9 +2116,7 @@ class UploadJobs(CommonColumns):
|
|
2221
2116
|
job.metadata_patch,
|
2222
2117
|
updated_artifact,
|
2223
2118
|
_,
|
2224
|
-
) = prism.merge_artifact_extra_metadata(
|
2225
|
-
job.metadata_patch, uuid, job.upload_type, file
|
2226
|
-
)
|
2119
|
+
) = prism.merge_artifact_extra_metadata(job.metadata_patch, uuid, job.upload_type, file)
|
2227
2120
|
logger.info("Updated md for %s: %s", uuid, updated_artifact.keys())
|
2228
2121
|
|
2229
2122
|
# A workaround fix for JSON field modifications not being tracked
|
@@ -2245,25 +2138,14 @@ class UploadJobs(CommonColumns):
|
|
2245
2138
|
@classmethod
|
2246
2139
|
@with_default_session
|
2247
2140
|
def find_first_manifest_job(cls, trial_id: str, session):
|
2248
|
-
return (
|
2249
|
-
session.query(UploadJobs)
|
2250
|
-
.filter_by(trial_id=trial_id, gcs_xlsx_uri="")
|
2251
|
-
.order_by(text("id ASC"))
|
2252
|
-
.first()
|
2253
|
-
)
|
2141
|
+
return session.query(UploadJobs).filter_by(trial_id=trial_id, gcs_xlsx_uri="").order_by(text("id ASC")).first()
|
2254
2142
|
|
2255
2143
|
@with_default_session
|
2256
|
-
def ingestion_success(
|
2257
|
-
self, trial, session: Session, commit: bool = False, send_email: bool = False
|
2258
|
-
):
|
2144
|
+
def ingestion_success(self, trial, session: Session, commit: bool = False, send_email: bool = False):
|
2259
2145
|
"""Set own status to reflect successful merge and trigger email notifying CIDC admins."""
|
2260
2146
|
# Do status update if the transition is valid
|
2261
|
-
if not UploadJobStatus.is_valid_transition(
|
2262
|
-
self.status
|
2263
|
-
):
|
2264
|
-
raise Exception(
|
2265
|
-
f"Cannot declare ingestion success given current status: {self.status}"
|
2266
|
-
)
|
2147
|
+
if not UploadJobStatus.is_valid_transition(self.status, UploadJobStatus.MERGE_COMPLETED.value):
|
2148
|
+
raise Exception(f"Cannot declare ingestion success given current status: {self.status}")
|
2267
2149
|
self.status = UploadJobStatus.MERGE_COMPLETED.value
|
2268
2150
|
|
2269
2151
|
if commit:
|
@@ -2279,11 +2161,7 @@ class FilesToFileGroups(BaseModel):
|
|
2279
2161
|
"""
|
2280
2162
|
|
2281
2163
|
__tablename__ = "files_to_file_groups"
|
2282
|
-
__table_args__ = (
|
2283
|
-
PrimaryKeyConstraint(
|
2284
|
-
"file_group_id", "file_id", name="pk_files_to_file_groups"
|
2285
|
-
),
|
2286
|
-
)
|
2164
|
+
__table_args__ = (PrimaryKeyConstraint("file_group_id", "file_id", name="pk_files_to_file_groups"),)
|
2287
2165
|
file_group_id = Column(ForeignKey("file_groups.id"), primary_key=True)
|
2288
2166
|
file_id = Column(ForeignKey("downloadable_files.id"), primary_key=True)
|
2289
2167
|
_created = Column(DateTime, default=func.now(), nullable=False)
|
@@ -2506,9 +2384,7 @@ class DownloadableFiles(CommonColumns):
|
|
2506
2384
|
return downloadable_files_for_query, files_to_file_groups_for_query
|
2507
2385
|
|
2508
2386
|
@classmethod
|
2509
|
-
def _convert_list_results(
|
2510
|
-
cls, downloadable_files_for_query: Table, query_files: List
|
2511
|
-
):
|
2387
|
+
def _convert_list_results(cls, downloadable_files_for_query: Table, query_files: List):
|
2512
2388
|
"""Converts the results of a SQLalchemy expression language query into actual DownloadableFiles
|
2513
2389
|
objects. This is necessary since the UI depends on some of the derived properties in
|
2514
2390
|
DownloadableFiles.
|
@@ -2546,9 +2422,7 @@ class DownloadableFiles(CommonColumns):
|
|
2546
2422
|
where_clauses.append(downloadable_files_for_query.c.trial_id.in_(trial_ids))
|
2547
2423
|
if facets:
|
2548
2424
|
facet_groups = get_facet_groups_for_paths(facets)
|
2549
|
-
where_clauses.append(
|
2550
|
-
downloadable_files_for_query.c.facet_group.in_(facet_groups)
|
2551
|
-
)
|
2425
|
+
where_clauses.append(downloadable_files_for_query.c.facet_group.in_(facet_groups))
|
2552
2426
|
|
2553
2427
|
if user and not is_admin:
|
2554
2428
|
permissions = Permissions.find_for_user(user.id)
|
@@ -2564,27 +2438,15 @@ class DownloadableFiles(CommonColumns):
|
|
2564
2438
|
elif permission.file_group_id is None:
|
2565
2439
|
where_clauses.append(
|
2566
2440
|
sql_and(
|
2567
|
-
(
|
2568
|
-
|
2569
|
-
== permission.trial_id
|
2570
|
-
),
|
2571
|
-
(
|
2572
|
-
downloadable_files_for_query.c.upload_type
|
2573
|
-
== permission.upload_type
|
2574
|
-
),
|
2441
|
+
(downloadable_files_for_query.c.trial_id == permission.trial_id),
|
2442
|
+
(downloadable_files_for_query.c.upload_type == permission.upload_type),
|
2575
2443
|
)
|
2576
2444
|
)
|
2577
2445
|
else:
|
2578
2446
|
where_clauses.append(
|
2579
2447
|
sql_and(
|
2580
|
-
(
|
2581
|
-
|
2582
|
-
== permission.trial_id
|
2583
|
-
),
|
2584
|
-
(
|
2585
|
-
files_to_file_groups_for_query.c.file_group_id
|
2586
|
-
== permission.file_group_id
|
2587
|
-
),
|
2448
|
+
(downloadable_files_for_query.c.trial_id == permission.trial_id),
|
2449
|
+
(files_to_file_groups_for_query.c.file_group_id == permission.file_group_id),
|
2588
2450
|
)
|
2589
2451
|
)
|
2590
2452
|
|
@@ -2598,9 +2460,7 @@ class DownloadableFiles(CommonColumns):
|
|
2598
2460
|
)
|
2599
2461
|
)
|
2600
2462
|
if full_type_perms:
|
2601
|
-
where_clauses.append(
|
2602
|
-
downloadable_files_for_query.c.upload_type.in_(full_type_perms)
|
2603
|
-
)
|
2463
|
+
where_clauses.append(downloadable_files_for_query.c.upload_type.in_(full_type_perms))
|
2604
2464
|
|
2605
2465
|
# Need to be careful about return logic. Empty results could be because the user
|
2606
2466
|
# is an admin, whereas None means the user has no permissions to view any files.
|
@@ -2609,6 +2469,55 @@ class DownloadableFiles(CommonColumns):
|
|
2609
2469
|
|
2610
2470
|
return None
|
2611
2471
|
|
2472
|
+
@classmethod
|
2473
|
+
def _generate_where_clause_with_permissions(cls, user: Users) -> BooleanClauseList:
|
2474
|
+
"""
|
2475
|
+
Returns a where clause for DownloadableFiles filtered down to only the files the user
|
2476
|
+
has access to based on their permissions and role.
|
2477
|
+
|
2478
|
+
The generated clause will have this form
|
2479
|
+
WHERE
|
2480
|
+
downloadable_files.trial_id IN ('x', 'y', ...) AND upload_type != 'clinical_data' <- trial-level permissions
|
2481
|
+
OR
|
2482
|
+
upload_type IN ('u', 'v', ...)) <- upload-type-level permissions
|
2483
|
+
OR
|
2484
|
+
trial_id = '5' AND upload_type = 'mif' <- regular permissions
|
2485
|
+
OR
|
2486
|
+
trial_id = '6' and upload_type = 'hande'
|
2487
|
+
...
|
2488
|
+
"""
|
2489
|
+
# From the perspective of viewing files, NCI Biobank users are admins.
|
2490
|
+
if user.is_admin_or_nci_user():
|
2491
|
+
return true() # Admin has full permissions to all
|
2492
|
+
|
2493
|
+
permissions = Permissions.find_for_user(user.id)
|
2494
|
+
|
2495
|
+
full_access_trial_ids = [p.trial_id for p in permissions if not p.upload_type]
|
2496
|
+
full_access_upload_types = [p.upload_type for p in permissions if not p.trial_id]
|
2497
|
+
regular_permissions = [p for p in permissions if p.trial_id and p.upload_type]
|
2498
|
+
|
2499
|
+
full_access_trial_clause = sql_and(
|
2500
|
+
DownloadableFiles.trial_id.in_(full_access_trial_ids),
|
2501
|
+
DownloadableFiles.upload_type != "clinical_data",
|
2502
|
+
)
|
2503
|
+
|
2504
|
+
full_access_upload_type_clause = sql_or(DownloadableFiles.upload_type.in_(full_access_upload_types))
|
2505
|
+
|
2506
|
+
regular_permission_clauses = [
|
2507
|
+
sql_and(
|
2508
|
+
DownloadableFiles.trial_id == p.trial_id,
|
2509
|
+
DownloadableFiles.upload_type == p.upload_type,
|
2510
|
+
)
|
2511
|
+
for p in regular_permissions
|
2512
|
+
]
|
2513
|
+
clause = sql_or(
|
2514
|
+
full_access_trial_clause,
|
2515
|
+
full_access_upload_type_clause,
|
2516
|
+
*regular_permission_clauses,
|
2517
|
+
)
|
2518
|
+
|
2519
|
+
return clause
|
2520
|
+
|
2612
2521
|
@classmethod
|
2613
2522
|
@with_default_session
|
2614
2523
|
def list_with_permissions(
|
@@ -2648,26 +2557,18 @@ class DownloadableFiles(CommonColumns):
|
|
2648
2557
|
if where_clauses:
|
2649
2558
|
|
2650
2559
|
# No where clause (the user is likely an admin).
|
2651
|
-
statement = select([downloadable_files_for_query]).select_from(
|
2652
|
-
downloadable_files_for_query
|
2653
|
-
)
|
2560
|
+
statement = select([downloadable_files_for_query]).select_from(downloadable_files_for_query)
|
2654
2561
|
|
2655
2562
|
else:
|
2656
2563
|
statement = (
|
2657
2564
|
select([downloadable_files_for_query])
|
2658
2565
|
.where(sql_and(*where_clauses))
|
2659
|
-
.select_from(
|
2660
|
-
downloadable_files_for_query.outerjoin(
|
2661
|
-
files_to_file_groups_for_query
|
2662
|
-
)
|
2663
|
-
)
|
2566
|
+
.select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
|
2664
2567
|
)
|
2665
2568
|
|
2666
2569
|
if sort_field:
|
2667
2570
|
sort_attribute = getattr(cls, sort_field)
|
2668
|
-
field_with_dir = (
|
2669
|
-
asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
|
2670
|
-
)
|
2571
|
+
field_with_dir = asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
|
2671
2572
|
statement = statement.order_by(field_with_dir)
|
2672
2573
|
|
2673
2574
|
# Enforce positive page numbers
|
@@ -2724,28 +2625,22 @@ class DownloadableFiles(CommonColumns):
|
|
2724
2625
|
if where_clauses:
|
2725
2626
|
|
2726
2627
|
# No where clause (the user is likely an admin).
|
2727
|
-
statement = select(
|
2728
|
-
|
2729
|
-
)
|
2628
|
+
statement = select([func.count(downloadable_files_for_query.c.id)]).select_from(
|
2629
|
+
downloadable_files_for_query
|
2630
|
+
)
|
2730
2631
|
|
2731
2632
|
else:
|
2732
2633
|
statement = (
|
2733
2634
|
select([func.count(downloadable_files_for_query.c.id)])
|
2734
2635
|
.where(sql_and(*where_clauses))
|
2735
|
-
.select_from(
|
2736
|
-
downloadable_files_for_query.outerjoin(
|
2737
|
-
files_to_file_groups_for_query
|
2738
|
-
)
|
2739
|
-
)
|
2636
|
+
.select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
|
2740
2637
|
)
|
2741
2638
|
|
2742
2639
|
return session.execute(statement).fetchone()[0]
|
2743
2640
|
|
2744
2641
|
@classmethod
|
2745
2642
|
@with_default_session
|
2746
|
-
def count_by_facet_with_permissions(
|
2747
|
-
cls, session: Session, trial_ids: List[str] = None, user: Users = None
|
2748
|
-
):
|
2643
|
+
def count_by_facet_with_permissions(cls, session: Session, trial_ids: List[str] = None, user: Users = None):
|
2749
2644
|
"""
|
2750
2645
|
Returns a map of facet_group to a count of the number of files that the given user
|
2751
2646
|
has permissions to view.
|
@@ -2789,11 +2684,7 @@ class DownloadableFiles(CommonColumns):
|
|
2789
2684
|
]
|
2790
2685
|
)
|
2791
2686
|
.where(sql_and(*where_clauses))
|
2792
|
-
.select_from(
|
2793
|
-
downloadable_files_for_query.outerjoin(
|
2794
|
-
files_to_file_groups_for_query
|
2795
|
-
)
|
2796
|
-
)
|
2687
|
+
.select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
|
2797
2688
|
)
|
2798
2689
|
|
2799
2690
|
statement = statement.group_by(downloadable_files_for_query.c.facet_group)
|
@@ -2837,35 +2728,35 @@ class DownloadableFiles(CommonColumns):
|
|
2837
2728
|
else:
|
2838
2729
|
statement = (
|
2839
2730
|
select([downloadable_files_for_query.c.object_url])
|
2840
|
-
.where(
|
2841
|
-
|
2842
|
-
)
|
2843
|
-
.select_from(
|
2844
|
-
downloadable_files_for_query.outerjoin(
|
2845
|
-
files_to_file_groups_for_query
|
2846
|
-
)
|
2847
|
-
)
|
2731
|
+
.where(sql_and(*where_clauses, downloadable_files_for_query.c.id.in_(ids)))
|
2732
|
+
.select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
|
2848
2733
|
)
|
2849
2734
|
|
2850
2735
|
return [row[0] for row in session.execute(statement).fetchall()]
|
2851
2736
|
|
2852
2737
|
@classmethod
|
2853
|
-
|
2854
|
-
|
2855
|
-
|
2738
|
+
@with_default_session
|
2739
|
+
def filter_object_ids_by_permissions(cls, user: Users, ids: Iterable[int], session: Session) -> Iterable[int]:
|
2740
|
+
"""
|
2741
|
+
Takes a list of object ids and filters it to return only those object ids the user has permission for.
|
2742
|
+
"""
|
2743
|
+
|
2744
|
+
where_clause = DownloadableFiles._generate_where_clause_with_permissions(user)
|
2745
|
+
statement = select([DownloadableFiles.id]).where(sql_and(DownloadableFiles.id.in_(ids), where_clause))
|
2746
|
+
|
2747
|
+
return [row[0] for row in session.execute(statement).fetchall()]
|
2748
|
+
|
2749
|
+
@classmethod
|
2750
|
+
def _generate_trial_file_counts(cls, downloadable_files: Iterable) -> Dict[str, int]:
|
2856
2751
|
results = defaultdict(lambda: 0)
|
2857
2752
|
for downloadable_file in downloadable_files:
|
2858
2753
|
if downloadable_file.data_category:
|
2859
|
-
results[downloadable_file.trial_id] =
|
2860
|
-
results[downloadable_file.trial_id] + 1
|
2861
|
-
)
|
2754
|
+
results[downloadable_file.trial_id] = results[downloadable_file.trial_id] + 1
|
2862
2755
|
return results
|
2863
2756
|
|
2864
2757
|
@classmethod
|
2865
2758
|
@with_default_session
|
2866
|
-
def remove_participants_and_samples_info_files(
|
2867
|
-
cls, trial_id: str, session: Session
|
2868
|
-
):
|
2759
|
+
def remove_participants_and_samples_info_files(cls, trial_id: str, session: Session):
|
2869
2760
|
"""
|
2870
2761
|
Remove participants info and samples info downloadable files
|
2871
2762
|
"""
|
@@ -2913,27 +2804,19 @@ class DownloadableFiles(CommonColumns):
|
|
2913
2804
|
if not where_clauses:
|
2914
2805
|
|
2915
2806
|
# No where clause (the user is likely an admin).
|
2916
|
-
statement = select([downloadable_files_for_query]).select_from(
|
2917
|
-
downloadable_files_for_query
|
2918
|
-
)
|
2807
|
+
statement = select([downloadable_files_for_query]).select_from(downloadable_files_for_query)
|
2919
2808
|
|
2920
2809
|
else:
|
2921
2810
|
statement = (
|
2922
2811
|
select([downloadable_files_for_query])
|
2923
2812
|
.where(sql_and(*where_clauses))
|
2924
|
-
.select_from(
|
2925
|
-
downloadable_files_for_query.outerjoin(
|
2926
|
-
files_to_file_groups_for_query
|
2927
|
-
)
|
2928
|
-
)
|
2813
|
+
.select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
|
2929
2814
|
)
|
2930
2815
|
|
2931
2816
|
downloadable_files = DownloadableFiles._convert_list_results(
|
2932
2817
|
downloadable_files_for_query, session.execute(statement).fetchall()
|
2933
2818
|
)
|
2934
|
-
trial_file_counts = DownloadableFiles._generate_trial_file_counts(
|
2935
|
-
downloadable_files
|
2936
|
-
)
|
2819
|
+
trial_file_counts = DownloadableFiles._generate_trial_file_counts(downloadable_files)
|
2937
2820
|
return build_trial_facets(trial_file_counts)
|
2938
2821
|
|
2939
2822
|
@with_default_session
|
@@ -2958,13 +2841,9 @@ class DownloadableFiles(CommonColumns):
|
|
2958
2841
|
"trial_id": self.trial_id,
|
2959
2842
|
"id": self.id,
|
2960
2843
|
}
|
2961
|
-
related_files = result_proxy_to_models(
|
2962
|
-
session.execute(query, params), DownloadableFiles
|
2963
|
-
)
|
2844
|
+
related_files = result_proxy_to_models(session.execute(query, params), DownloadableFiles)
|
2964
2845
|
else:
|
2965
|
-
not_sample_specific = not_(
|
2966
|
-
literal_column("additional_metadata::text").like('%.cimac_id":%')
|
2967
|
-
)
|
2846
|
+
not_sample_specific = not_(literal_column("additional_metadata::text").like('%.cimac_id":%'))
|
2968
2847
|
related_files = (
|
2969
2848
|
session.query(DownloadableFiles)
|
2970
2849
|
.filter(
|
@@ -3020,9 +2899,7 @@ class DownloadableFiles(CommonColumns):
|
|
3020
2899
|
full_type_perms.append(perm.upload_type)
|
3021
2900
|
else:
|
3022
2901
|
trial_type_perms.append((perm.trial_id, perm.upload_type))
|
3023
|
-
df_tuples = tuple_(
|
3024
|
-
DownloadableFiles.trial_id, DownloadableFiles.upload_type
|
3025
|
-
)
|
2902
|
+
df_tuples = tuple_(DownloadableFiles.trial_id, DownloadableFiles.upload_type)
|
3026
2903
|
file_filters.append(
|
3027
2904
|
or_(
|
3028
2905
|
# don't include clinical_data in cross-trial permission
|
@@ -3071,16 +2948,9 @@ class DownloadableFiles(CommonColumns):
|
|
3071
2948
|
etag = make_etag(filtered_metadata.values())
|
3072
2949
|
|
3073
2950
|
object_url = filtered_metadata["object_url"]
|
3074
|
-
df = (
|
3075
|
-
session.query(DownloadableFiles)
|
3076
|
-
.filter_by(object_url=object_url)
|
3077
|
-
.with_for_update()
|
3078
|
-
.first()
|
3079
|
-
)
|
2951
|
+
df = session.query(DownloadableFiles).filter_by(object_url=object_url).with_for_update().first()
|
3080
2952
|
if df:
|
3081
|
-
df = session.merge(
|
3082
|
-
DownloadableFiles(id=df.id, _etag=etag, **filtered_metadata)
|
3083
|
-
)
|
2953
|
+
df = session.merge(DownloadableFiles(id=df.id, _etag=etag, **filtered_metadata))
|
3084
2954
|
else:
|
3085
2955
|
df = DownloadableFiles(_etag=etag, **filtered_metadata)
|
3086
2956
|
|
@@ -3109,12 +2979,7 @@ class DownloadableFiles(CommonColumns):
|
|
3109
2979
|
"""
|
3110
2980
|
|
3111
2981
|
# trying to find existing one
|
3112
|
-
df = (
|
3113
|
-
session.query(DownloadableFiles)
|
3114
|
-
.filter_by(object_url=blob.name)
|
3115
|
-
.with_for_update()
|
3116
|
-
.first()
|
3117
|
-
)
|
2982
|
+
df = session.query(DownloadableFiles).filter_by(object_url=blob.name).with_for_update().first()
|
3118
2983
|
if not df:
|
3119
2984
|
df = DownloadableFiles()
|
3120
2985
|
|
@@ -3147,18 +3012,14 @@ class DownloadableFiles(CommonColumns):
|
|
3147
3012
|
|
3148
3013
|
@classmethod
|
3149
3014
|
@with_default_session
|
3150
|
-
def list_object_urls(
|
3151
|
-
cls, ids: List[int], session: Session, filter_: Callable[[Query], Query]
|
3152
|
-
) -> List[str]:
|
3015
|
+
def list_object_urls(cls, ids: List[int], session: Session, filter_: Callable[[Query], Query]) -> List[str]:
|
3153
3016
|
"""Get all object_urls for a batch of downloadable file record IDs"""
|
3154
3017
|
query = session.query(cls.object_url).filter(cls.id.in_(ids))
|
3155
3018
|
query = filter_(query)
|
3156
3019
|
return [r[0] for r in query.all()]
|
3157
3020
|
|
3158
3021
|
@classmethod
|
3159
|
-
def build_file_bundle_query(
|
3160
|
-
cls, allowed_upload_types: Optional[List[str]]
|
3161
|
-
) -> Query:
|
3022
|
+
def build_file_bundle_query(cls) -> Query:
|
3162
3023
|
"""
|
3163
3024
|
Build a query that selects nested file bundles from the downloadable files table.
|
3164
3025
|
The `file_bundles` query below should produce one bundle per unique `trial_id` that
|
@@ -3173,8 +3034,6 @@ class DownloadableFiles(CommonColumns):
|
|
3173
3034
|
}
|
3174
3035
|
```
|
3175
3036
|
where "type" is something like `"Olink"` or `"Participants Info"` and "purpose" is a `FilePurpose` string.
|
3176
|
-
|
3177
|
-
If `allowed_upload_types` is provided, the query will filter by files that only have an `upload_type` that appear in the list.
|
3178
3037
|
"""
|
3179
3038
|
tid_col, type_col, purp_col, ids_col, purps_col = (
|
3180
3039
|
literal_column("trial_id"),
|
@@ -3184,28 +3043,24 @@ class DownloadableFiles(CommonColumns):
|
|
3184
3043
|
literal_column("purposes"),
|
3185
3044
|
)
|
3186
3045
|
|
3187
|
-
id_bundles =
|
3188
|
-
|
3189
|
-
|
3190
|
-
|
3191
|
-
|
3192
|
-
|
3193
|
-
|
3194
|
-
|
3195
|
-
|
3196
|
-
|
3197
|
-
|
3198
|
-
|
3199
|
-
id_bundles = id_bundles.alias("id_bundles")
|
3200
|
-
|
3046
|
+
id_bundles = (
|
3047
|
+
select(
|
3048
|
+
[
|
3049
|
+
cls.trial_id,
|
3050
|
+
cls.data_category_prefix.label(type_col.key),
|
3051
|
+
cls.file_purpose.label(purp_col.key),
|
3052
|
+
func.json_agg(cls.id).label(ids_col.key),
|
3053
|
+
]
|
3054
|
+
)
|
3055
|
+
.group_by(cls.trial_id, cls.data_category_prefix, cls.file_purpose)
|
3056
|
+
.alias("id_bundles")
|
3057
|
+
)
|
3201
3058
|
purpose_bundles = (
|
3202
3059
|
select(
|
3203
3060
|
[
|
3204
3061
|
tid_col,
|
3205
3062
|
type_col,
|
3206
|
-
func.json_object_agg(
|
3207
|
-
func.coalesce(purp_col, "miscellaneous"), ids_col
|
3208
|
-
).label(purps_col.key),
|
3063
|
+
func.json_object_agg(func.coalesce(purp_col, "miscellaneous"), ids_col).label(purps_col.key),
|
3209
3064
|
]
|
3210
3065
|
)
|
3211
3066
|
.select_from(id_bundles)
|
@@ -3216,9 +3071,7 @@ class DownloadableFiles(CommonColumns):
|
|
3216
3071
|
select(
|
3217
3072
|
[
|
3218
3073
|
tid_col.label(tid_col.key),
|
3219
|
-
func.json_object_agg(
|
3220
|
-
func.coalesce(type_col, "other"), purps_col
|
3221
|
-
).label("file_bundle"),
|
3074
|
+
func.json_object_agg(func.coalesce(type_col, "other"), purps_col).label("file_bundle"),
|
3222
3075
|
]
|
3223
3076
|
)
|
3224
3077
|
.select_from(purpose_bundles)
|
@@ -3229,9 +3082,7 @@ class DownloadableFiles(CommonColumns):
|
|
3229
3082
|
|
3230
3083
|
@classmethod
|
3231
3084
|
@with_default_session
|
3232
|
-
def get_total_bytes(
|
3233
|
-
cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q
|
3234
|
-
) -> int:
|
3085
|
+
def get_total_bytes(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q) -> int:
|
3235
3086
|
"""Get the total number of bytes of data stored across all files."""
|
3236
3087
|
filtered_query = filter_(session.query(func.sum(cls.file_size_bytes)))
|
3237
3088
|
total_bytes = filtered_query.one()[0]
|
@@ -3240,9 +3091,7 @@ class DownloadableFiles(CommonColumns):
|
|
3240
3091
|
|
3241
3092
|
@classmethod
|
3242
3093
|
@with_default_session
|
3243
|
-
def get_trial_facets(
|
3244
|
-
cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q
|
3245
|
-
):
|
3094
|
+
def get_trial_facets(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q):
|
3246
3095
|
trial_file_counts = cls.count_by(
|
3247
3096
|
cls.trial_id,
|
3248
3097
|
session=session,
|
@@ -3256,12 +3105,8 @@ class DownloadableFiles(CommonColumns):
|
|
3256
3105
|
# TODO fix this
|
3257
3106
|
@classmethod
|
3258
3107
|
@with_default_session
|
3259
|
-
def get_data_category_facets(
|
3260
|
-
cls, session
|
3261
|
-
):
|
3262
|
-
facet_group_file_counts = cls.count_by(
|
3263
|
-
cls.facet_group, session=session, filter_=filter_
|
3264
|
-
)
|
3108
|
+
def get_data_category_facets(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q):
|
3109
|
+
facet_group_file_counts = cls.count_by(cls.facet_group, session=session, filter_=filter_)
|
3265
3110
|
data_category_facets = build_data_category_facets(facet_group_file_counts)
|
3266
3111
|
return data_category_facets
|
3267
3112
|
|
@@ -3277,10 +3122,7 @@ class DownloadableFiles(CommonColumns):
|
|
3277
3122
|
# Query clause for computing a downloadable file's data category.
|
3278
3123
|
# Used above in the DownloadableFiles.data_category computed property.
|
3279
3124
|
DATA_CATEGORY_CASE_CLAUSE = case(
|
3280
|
-
[
|
3281
|
-
(DownloadableFiles.facet_group == k, v)
|
3282
|
-
for k, v in facet_groups_to_categories.items()
|
3283
|
-
]
|
3125
|
+
[(DownloadableFiles.facet_group == k, v) for k, v in facet_groups_to_categories.items()]
|
3284
3126
|
)
|
3285
3127
|
|
3286
3128
|
# Query clause for computing a downloadable file's file purpose.
|
@@ -3293,9 +3135,7 @@ FILE_PURPOSE_CASE_CLAUSE = case(
|
|
3293
3135
|
)
|
3294
3136
|
|
3295
3137
|
|
3296
|
-
def result_proxy_to_models(
|
3297
|
-
result_proxy: ResultProxy, model: BaseModel
|
3298
|
-
) -> List[BaseModel]:
|
3138
|
+
def result_proxy_to_models(result_proxy: ResultProxy, model: BaseModel) -> List[BaseModel]:
|
3299
3139
|
"""Materialize a sqlalchemy `result_proxy` iterable as a list of `model` instances"""
|
3300
3140
|
return [model(**dict(row_proxy)) for row_proxy in result_proxy.all()]
|
3301
3141
|
|