nci-cidc-api-modules 1.1.11__py3-none-any.whl → 1.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cidc_api/models/models.py CHANGED
@@ -28,6 +28,7 @@ import hashlib
28
28
  import os
29
29
  import re
30
30
  from collections import defaultdict
31
+ from copy import deepcopy
31
32
  from datetime import datetime, timedelta
32
33
  from enum import Enum as EnumBaseClass
33
34
  from functools import wraps
@@ -77,6 +78,7 @@ from sqlalchemy import (
77
78
  or_,
78
79
  Table,
79
80
  MetaData,
81
+ true,
80
82
  )
81
83
  from sqlalchemy.dialects.postgresql import JSONB, UUID
82
84
  from sqlalchemy.engine import ResultProxy
@@ -92,10 +94,11 @@ from sqlalchemy.sql import (
92
94
  # instead of the sqlalchemy.sql versions we are importing here. The solution is to
93
95
  # break up this giant file.
94
96
  and_ as sql_and,
95
- # or_ as sql_or, # NOT USED
97
+ or_ as sql_or,
96
98
  # select, # ALREADY IMPORTED
97
99
  text,
98
100
  )
101
+ from sqlalchemy.sql.elements import BooleanClauseList
99
102
  from sqlalchemy.sql.functions import coalesce
100
103
  from werkzeug.exceptions import BadRequest
101
104
 
@@ -184,11 +187,7 @@ class CommonColumns(BaseModel): # type: ignore
184
187
  if hasattr(b, "__table__"):
185
188
  columns_to_check.extend(b.__table__.columns)
186
189
 
187
- ret = {
188
- c.name: getattr(self, c.name)
189
- for c in columns_to_check
190
- if hasattr(self, c.name)
191
- }
190
+ ret = {c.name: getattr(self, c.name) for c in columns_to_check if hasattr(self, c.name)}
192
191
  ret = {k: v for k, v in ret.items() if v is not None}
193
192
  return ret
194
193
 
@@ -269,16 +268,12 @@ class CommonColumns(BaseModel): # type: ignore
269
268
  if sort_field:
270
269
  # Get the attribute from the class, in case this is a hybrid attribute
271
270
  sort_attribute = getattr(cls, sort_field)
272
- field_with_dir = (
273
- asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
274
- )
271
+ field_with_dir = asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
275
272
  query = query.order_by(field_with_dir)
276
273
  if sort_field != "id":
277
274
  # When sorting, need to guarantee unique order for offset/limit pagination to produce
278
275
  # consistent results. Adding secondary "id" sort field to ensure unique order.
279
- secondary_field_with_dir = (
280
- asc("id") if sort_direction == "asc" else desc("id")
281
- )
276
+ secondary_field_with_dir = asc("id") if sort_direction == "asc" else desc("id")
282
277
  query = query.order_by(secondary_field_with_dir)
283
278
 
284
279
  # Apply filter function
@@ -299,9 +294,7 @@ class CommonColumns(BaseModel): # type: ignore
299
294
 
300
295
  @classmethod
301
296
  @with_default_session
302
- def count_by(
303
- cls, expr, session: Session, filter_: Callable[[Query], Query] = lambda q: q
304
- ) -> Dict[str, int]:
297
+ def count_by(cls, expr, session: Session, filter_: Callable[[Query], Query] = lambda q: q) -> Dict[str, int]:
305
298
  """
306
299
  Return a dictionary mapping results of `expr` to the number of times each result
307
300
  occurs in the table related to this model. E.g., for the `UploadJobs` model,
@@ -326,9 +319,7 @@ class CommonColumns(BaseModel): # type: ignore
326
319
  filter_: Callable[[Query], Query] = lambda q: q,
327
320
  ):
328
321
  """Get a list of distinct values for the given column."""
329
- assert (
330
- column_name in cls.__table__.columns.keys()
331
- ), f"{cls.__tablename__} has no column {column_name}"
322
+ assert column_name in cls.__table__.columns.keys(), f"{cls.__tablename__} has no column {column_name}"
332
323
 
333
324
  base_query = session.query(getattr(cls, column_name))
334
325
  filtered_query = filter_(base_query)
@@ -342,9 +333,7 @@ class CommonColumns(BaseModel): # type: ignore
342
333
  @classmethod
343
334
  def get_unique_columns(cls):
344
335
  """Get a list of all the unique columns in this table."""
345
- return [
346
- column for column in cls.__table__.c if column.unique or column.primary_key
347
- ]
336
+ return [column for column in cls.__table__.c if column.unique or column.primary_key]
348
337
 
349
338
 
350
339
  class CIDCRole(EnumBaseClass):
@@ -392,6 +381,11 @@ class Users(CommonColumns):
392
381
  """Returns true if this user is an NCI Biobank user."""
393
382
  return self.role == CIDCRole.NCI_BIOBANK_USER.value
394
383
 
384
+ def is_admin_or_nci_user(self) -> bool:
385
+ """Returns true if this user is a CIDC admin or NCI Biobank user. These users
386
+ share full access to much of the system."""
387
+ return self.is_admin() or self.is_nci_user()
388
+
395
389
  def has_download_permissions(self) -> bool:
396
390
  """Returns false if this user is a Network Viewer or PACT User."""
397
391
  return self.role not in (
@@ -434,9 +428,7 @@ class Users(CommonColumns):
434
428
  user = Users.find_by_email(email)
435
429
  if not user:
436
430
  logger.info("Creating new user with email %s", email)
437
- user = Users(
438
- email=email, contact_email=email, first_n=first_n, last_n=last_n
439
- )
431
+ user = Users(email=email, contact_email=email, first_n=first_n, last_n=last_n)
440
432
  user.insert(session=session)
441
433
  return user
442
434
 
@@ -450,9 +442,7 @@ class Users(CommonColumns):
450
442
  user_inactivity_cutoff = datetime.today() - timedelta(days=INACTIVE_USER_DAYS)
451
443
  update_query = (
452
444
  update(Users)
453
- .where(
454
- and_(Users._accessed < user_inactivity_cutoff, Users.disabled == False)
455
- )
445
+ .where(and_(Users._accessed < user_inactivity_cutoff, Users.disabled == False))
456
446
  .values(disabled=True)
457
447
  .returning(Users.id)
458
448
  )
@@ -460,9 +450,7 @@ class Users(CommonColumns):
460
450
  if commit:
461
451
  session.commit()
462
452
 
463
- disabled_users = [
464
- Users.find_by_id(uid, session=session) for uid in disabled_user_ids
465
- ]
453
+ disabled_users = [Users.find_by_id(uid, session=session) for uid in disabled_user_ids]
466
454
  for u in disabled_users:
467
455
  Permissions.revoke_user_permissions(u, session=session)
468
456
  revoke_bigquery_access(u.email)
@@ -501,15 +489,13 @@ class Users(CommonColumns):
501
489
  .union_all(
502
490
  # Handle admins separately, since they can view all data for all
503
491
  # trials even if they have no permissions assigned to them.
504
- session.query(
505
- *user_columns, TrialMetadata.trial_id, literal("*,clinical_data")
506
- ).filter(Users.role == CIDCRole.ADMIN.value)
492
+ session.query(*user_columns, TrialMetadata.trial_id, literal("*,clinical_data")).filter(
493
+ Users.role == CIDCRole.ADMIN.value
494
+ )
507
495
  )
508
496
  )
509
497
 
510
- df = pd.DataFrame(
511
- query, columns=["email", "organization", "role", "trial_id", "permissions"]
512
- ).fillna("*")
498
+ df = pd.DataFrame(query, columns=["email", "organization", "role", "trial_id", "permissions"]).fillna("*")
513
499
 
514
500
  with pd.ExcelWriter(
515
501
  io
@@ -630,19 +616,11 @@ class Permissions(CommonColumns):
630
616
 
631
617
  NOTE: values provided to the `commit` argument will be ignored. This method always commits.
632
618
  """
633
- if (
634
- self.upload_type == self.EVERY
635
- and self.trial_id == self.EVERY
636
- and not self.file_group_id
637
- ):
638
- raise ValueError(
639
- "A permission must have a trial id, upload type, or file group."
640
- )
619
+ if self.upload_type == self.EVERY and self.trial_id == self.EVERY and not self.file_group_id:
620
+ raise ValueError("A permission must have a trial id, upload type, or file group.")
641
621
 
642
622
  if self.file_group_id and self.upload_type != "file_group":
643
- raise ValueError(
644
- "If a permission has a file group, its upload_type must be set to file_group"
645
- )
623
+ raise ValueError("If a permission has a file group, its upload_type must be set to file_group")
646
624
 
647
625
  grantee = Users.find_by_id(self.granted_to_user, session=session)
648
626
  if grantee is None:
@@ -724,11 +702,7 @@ class Permissions(CommonColumns):
724
702
  super().insert(session=session, commit=True)
725
703
 
726
704
  # Don't make any GCS changes if this user doesn't have download access, is disabled, or isn't approved
727
- if (
728
- not grantee.has_download_permissions()
729
- or grantee.disabled
730
- or grantee.approval_date is None
731
- ):
705
+ if not grantee.has_download_permissions() or grantee.disabled or grantee.approval_date is None:
732
706
  # TODO: pact users do not have download permissions currently
733
707
  return
734
708
 
@@ -736,9 +710,7 @@ class Permissions(CommonColumns):
736
710
  # Grant ACL download permissions in GCS
737
711
 
738
712
  if self.upload_type == "file_group":
739
- Permissions.grant_download_access_to_file_group(
740
- grantee.email, file_group
741
- )
713
+ Permissions.grant_download_access_to_file_group(grantee.email, file_group)
742
714
  else:
743
715
 
744
716
  # if they have any download permissions, they need the CIDC Lister role
@@ -746,9 +718,7 @@ class Permissions(CommonColumns):
746
718
  grant_download_access(grantee.email, self.trial_id, self.upload_type)
747
719
  # Remove permissions staged for deletion, if any
748
720
  for perm in perms_to_delete:
749
- revoke_download_access(
750
- grantee.email, perm.trial_id, perm.upload_type
751
- )
721
+ revoke_download_access(grantee.email, perm.trial_id, perm.upload_type)
752
722
  except Exception as e:
753
723
  # Add back deleted permissions, if any
754
724
  for perm in perms_to_delete:
@@ -760,9 +730,7 @@ class Permissions(CommonColumns):
760
730
  raise IAMException("IAM grant failed.") from e
761
731
 
762
732
  @with_default_session
763
- def delete(
764
- self, deleted_by: Union[Users, int], session: Session, commit: bool = True
765
- ) -> None:
733
+ def delete(self, deleted_by: Union[Users, int], session: Session, commit: bool = True) -> None:
766
734
  """
767
735
  Delete this permission record from the database and revoke the corresponding IAM policy binding
768
736
  on the GCS data bucket.
@@ -795,9 +763,7 @@ class Permissions(CommonColumns):
795
763
  revoke_lister_access(grantee.email)
796
764
 
797
765
  except Exception as e:
798
- raise IAMException(
799
- "IAM revoke failed, and permission db record not removed."
800
- ) from e
766
+ raise IAMException("IAM revoke failed, and permission db record not removed.") from e
801
767
 
802
768
  info_message = f"admin-action: {deleted_by_user.email} removed from {grantee.email} the permission {self.upload_type or 'all assays'} on {self.trial_id or 'all trials'}"
803
769
  logger.info(info_message)
@@ -841,10 +807,7 @@ class Permissions(CommonColumns):
841
807
  # if getting EVERY, return all
842
808
  | (upload_type == Permissions.EVERY)
843
809
  # if permission is EVERY, don't return if looking for clinical_data
844
- | (
845
- (Permissions.upload_type == Permissions.EVERY)
846
- & (upload_type != "clinical_data")
847
- )
810
+ | ((Permissions.upload_type == Permissions.EVERY) & (upload_type != "clinical_data"))
848
811
  )
849
812
  ),
850
813
  )
@@ -864,23 +827,16 @@ class Permissions(CommonColumns):
864
827
  permissions_list: List[Permissions] = []
865
828
  for upload in upload_type:
866
829
  permissions_list.extend(
867
- Permissions.get_for_trial_type(
868
- trial_id=trial_id, upload_type=upload, session=session
869
- )
830
+ Permissions.get_for_trial_type(trial_id=trial_id, upload_type=upload, session=session)
870
831
  )
871
832
 
872
- permissions_dict: Dict[str, Dict[str, List[Permissions]]] = defaultdict(
873
- lambda: defaultdict(list)
874
- )
833
+ permissions_dict: Dict[str, Dict[str, List[Permissions]]] = defaultdict(lambda: defaultdict(list))
875
834
  for perm in permissions_list:
876
835
  permissions_dict[perm.trial_id][perm.upload_type].append(perm)
877
836
 
878
837
  user_dict: Dict[str, Dict[str, List[Users]]] = {
879
838
  trial: {
880
- upload: [
881
- Users.find_by_id(id=perm.granted_to_user, session=session)
882
- for perm in perms
883
- ]
839
+ upload: [Users.find_by_id(id=perm.granted_to_user, session=session) for perm in perms]
884
840
  for upload, perms in upload_dict.items()
885
841
  }
886
842
  for trial, upload_dict in permissions_dict.items()
@@ -895,11 +851,7 @@ class Permissions(CommonColumns):
895
851
  for trial, upload_dict in user_dict.items()
896
852
  }
897
853
  # remove any trial that doesn't have any uploads in it
898
- user_email_dict = {
899
- trial: upload_dict
900
- for trial, upload_dict in user_email_dict.items()
901
- if len(upload_dict)
902
- }
854
+ user_email_dict = {trial: upload_dict for trial, upload_dict in user_email_dict.items() if len(upload_dict)}
903
855
  return user_email_dict
904
856
 
905
857
  @staticmethod
@@ -919,14 +871,8 @@ class Permissions(CommonColumns):
919
871
  session.query(Permissions)
920
872
  .filter(
921
873
  Permissions.granted_to_user == user_id,
922
- (
923
- (Permissions.trial_id == trial_id)
924
- & (Permissions.upload_type == upload_type)
925
- )
926
- | (
927
- (Permissions.trial_id == Permissions.EVERY)
928
- & (Permissions.upload_type == upload_type)
929
- )
874
+ ((Permissions.trial_id == trial_id) & (Permissions.upload_type == upload_type))
875
+ | ((Permissions.trial_id == Permissions.EVERY) & (Permissions.upload_type == upload_type))
930
876
  | (
931
877
  (Permissions.trial_id == trial_id)
932
878
  # if permission is EVERY, don't return if looking for clinical_data
@@ -949,11 +895,7 @@ class Permissions(CommonColumns):
949
895
  )
950
896
  if results:
951
897
  file_group_ids = {file_group.id for file_group in file_groups}
952
- results = [
953
- result
954
- for result in results
955
- if result.file_group_id in file_group_ids
956
- ]
898
+ results = [result for result in results if result.file_group_id in file_group_ids]
957
899
 
958
900
  results = results and results or None
959
901
  return results
@@ -997,9 +939,7 @@ class Permissions(CommonColumns):
997
939
  upload_type=[p.upload_type for p in trial_perms],
998
940
  )
999
941
  for perm in file_group_perms:
1000
- file_group: FileGroups = FileGroups.find_by_id(
1001
- perm.file_group_id, session=session
1002
- )
942
+ file_group: FileGroups = FileGroups.find_by_id(perm.file_group_id, session=session)
1003
943
  Permissions.grant_download_access_to_file_group(user.email, file_group)
1004
944
 
1005
945
  # Regrant all of the user's intake bucket upload permissions, if they have any
@@ -1039,9 +979,7 @@ class Permissions(CommonColumns):
1039
979
 
1040
980
  @classmethod
1041
981
  @with_default_session
1042
- def grant_download_permissions_for_upload_job(
1043
- cls, upload: "UploadJobs", session: Session
1044
- ) -> None:
982
+ def grant_download_permissions_for_upload_job(cls, upload: "UploadJobs", session: Session) -> None:
1045
983
  """
1046
984
  For a given UploadJob, issue all relevant Permissions on Google
1047
985
  Loads all cross-trial permissions for the upload_type
@@ -1059,21 +997,14 @@ class Permissions(CommonColumns):
1059
997
  filters.append(cls.upload_type == upload.upload_type)
1060
998
  else:
1061
999
  # upload.upload_type can't be None
1062
- filters.append(
1063
- or_(cls.upload_type == upload.upload_type, cls.upload_type == None)
1064
- )
1000
+ filters.append(or_(cls.upload_type == upload.upload_type, cls.upload_type == None))
1065
1001
 
1066
1002
  perms = session.query(cls).filter(*filters).all()
1067
1003
  user_email_list: List[str] = []
1068
1004
 
1069
1005
  for perm in perms:
1070
1006
  user = Users.find_by_id(perm.granted_to_user, session=session)
1071
- if (
1072
- user.is_admin()
1073
- or user.is_nci_user()
1074
- or user.disabled
1075
- or user.email in user_email_list
1076
- ):
1007
+ if user.is_admin() or user.is_nci_user() or user.disabled or user.email in user_email_list:
1077
1008
  continue
1078
1009
 
1079
1010
  user_email_list.append(user.email)
@@ -1102,10 +1033,8 @@ class ValidationMultiError(Exception):
1102
1033
  """Holds multiple jsonschema.ValidationErrors"""
1103
1034
 
1104
1035
 
1105
- trial_metadata_validator: json_validation._Validator = (
1106
- json_validation.load_and_validate_schema(
1107
- "clinical_trial.json", return_validator=True
1108
- )
1036
+ trial_metadata_validator: json_validation._Validator = json_validation.load_and_validate_schema(
1037
+ "clinical_trial.json", return_validator=True
1109
1038
  )
1110
1039
 
1111
1040
  FileBundle = Dict[str, Dict[FilePurpose, List[int]]]
@@ -1121,13 +1050,13 @@ class TrialMetadata(CommonColumns):
1121
1050
  _metadata_idx = Index("metadata_idx", metadata_json, postgresql_using="gin")
1122
1051
 
1123
1052
  @staticmethod
1124
- def validate_metadata_json(metadata_json: dict) -> dict:
1125
- # Prior to running trial_metadata_validator.iter_error_messages on the metadata_json,
1126
- # strip out unnecessary manifest data for the validation so that existing manifest data
1127
- # that no longer conform to the post-CSMS-integration schema can be kept.
1053
+ def validate_metadata_json(metadata_json: dict, strip_metadata=True) -> dict:
1054
+ # Prior to running trial_metadata_validator.iter_error_messages on the metadata_json, if
1055
+ # strip_metadata=True, will strip out unnecessary manifest data for the validation so that
1056
+ # existing manifest data that no longer conform to the post-CSMS-integration schema can be kept.
1128
1057
  # See more details in the strip_metadata_for_validation function docs.
1129
- metadata_to_validate = json_validation.strip_metadata_for_validation(
1130
- metadata_json
1058
+ metadata_to_validate = (
1059
+ json_validation.strip_metadata_for_validation(metadata_json) if strip_metadata else metadata_json
1131
1060
  )
1132
1061
  errs = trial_metadata_validator.iter_error_messages(metadata_to_validate)
1133
1062
  messages = list(f"'metadata_json': {err}" for err in errs)
@@ -1163,49 +1092,34 @@ class TrialMetadata(CommonColumns):
1163
1092
  Find a trial by its CIMAC id.
1164
1093
  """
1165
1094
  try:
1166
- trial = (
1167
- session.query(TrialMetadata)
1168
- .filter_by(trial_id=trial_id)
1169
- .with_for_update()
1170
- .one()
1171
- )
1095
+ trial = session.query(TrialMetadata).filter_by(trial_id=trial_id).with_for_update().one()
1172
1096
  except NoResultFound as e:
1173
1097
  raise NoResultFound(f"No trial found with id {trial_id}") from e
1174
1098
  return trial
1175
1099
 
1176
1100
  @staticmethod
1177
1101
  @with_default_session
1178
- def patch_assays(
1179
- trial_id: str, assay_patch: dict, session: Session, commit: bool = False
1180
- ):
1102
+ def patch_assays(trial_id: str, assay_patch: dict, session: Session, commit: bool = False):
1181
1103
  """
1182
1104
  Applies assay updates to the metadata object from the trial with id `trial_id`.
1183
1105
 
1184
1106
  TODO: apply this update directly to the not-yet-existent TrialMetadata.manifest field
1185
1107
  """
1186
- return TrialMetadata._patch_trial_metadata(
1187
- trial_id, assay_patch, session=session, commit=commit
1188
- )
1108
+ return TrialMetadata._patch_trial_metadata(trial_id, assay_patch, session=session, commit=commit)
1189
1109
 
1190
1110
  @staticmethod
1191
1111
  @with_default_session
1192
- def patch_manifest(
1193
- trial_id: str, manifest_patch: dict, session: Session, commit: bool = False
1194
- ):
1112
+ def patch_manifest(trial_id: str, manifest_patch: dict, session: Session, commit: bool = False):
1195
1113
  """
1196
1114
  Applies manifest updates to the metadata object from the trial with id `trial_id`.
1197
1115
 
1198
1116
  TODO: apply this update directly to the not-yet-existent TrialMetadata.assays field
1199
1117
  """
1200
- return TrialMetadata._patch_trial_metadata(
1201
- trial_id, manifest_patch, session=session, commit=commit
1202
- )
1118
+ return TrialMetadata._patch_trial_metadata(trial_id, manifest_patch, session=session, commit=commit)
1203
1119
 
1204
1120
  @staticmethod
1205
1121
  @with_default_session
1206
- def _patch_trial_metadata(
1207
- trial_id: str, json_patch: dict, session: Session, commit: bool = False
1208
- ):
1122
+ def _patch_trial_metadata(trial_id: str, json_patch: dict, session: Session, commit: bool = False):
1209
1123
  """
1210
1124
  Applies updates to the metadata object from the trial with id `trial_id`
1211
1125
  and commits current session.
@@ -1217,9 +1131,7 @@ class TrialMetadata(CommonColumns):
1217
1131
  trial = TrialMetadata.select_for_update_by_trial_id(trial_id, session=session)
1218
1132
 
1219
1133
  # Merge assay metadata into the existing clinical trial metadata
1220
- updated_metadata, errs = prism.merge_clinical_trial_metadata(
1221
- json_patch, trial.metadata_json
1222
- )
1134
+ updated_metadata, errs = prism.merge_clinical_trial_metadata(json_patch, trial.metadata_json)
1223
1135
  if errs:
1224
1136
  raise ValidationMultiError(errs)
1225
1137
  # Save updates to trial record
@@ -1234,9 +1146,7 @@ class TrialMetadata(CommonColumns):
1234
1146
 
1235
1147
  @staticmethod
1236
1148
  @with_default_session
1237
- def create(
1238
- trial_id: str, metadata_json: dict, session: Session, commit: bool = True
1239
- ):
1149
+ def create(trial_id: str, metadata_json: dict, session: Session, commit: bool = True):
1240
1150
  """
1241
1151
  Create a new clinical trial metadata record.
1242
1152
  """
@@ -1248,9 +1158,7 @@ class TrialMetadata(CommonColumns):
1248
1158
  return trial
1249
1159
 
1250
1160
  @staticmethod
1251
- def merge_gcs_artifact(
1252
- metadata: dict, upload_type: str, uuid: str, gcs_object: Blob
1253
- ):
1161
+ def merge_gcs_artifact(metadata: dict, upload_type: str, uuid: str, gcs_object: Blob):
1254
1162
  return prism.merge_artifact(
1255
1163
  ct=metadata,
1256
1164
  assay_type=upload_type, # assay_type is the old name for upload_type
@@ -1263,9 +1171,7 @@ class TrialMetadata(CommonColumns):
1263
1171
  )
1264
1172
 
1265
1173
  @staticmethod
1266
- def merge_gcs_artifacts(
1267
- metadata: dict, upload_type: str, uuids_and_gcs_objects: List[Tuple[str, Blob]]
1268
- ):
1174
+ def merge_gcs_artifacts(metadata: dict, upload_type: str, uuids_and_gcs_objects: List[Tuple[str, Blob]]):
1269
1175
  return prism.merge_artifacts(
1270
1176
  metadata,
1271
1177
  [
@@ -1354,32 +1260,16 @@ class TrialMetadata(CommonColumns):
1354
1260
  subqueries = []
1355
1261
 
1356
1262
  if include_file_bundles:
1357
- allowed_upload_types = []
1358
- if user and not user.is_admin() and not user.is_nci_user():
1359
- permissions = Permissions.find_for_user(user.id)
1360
- # An 'empty' upload_type means full trial-level access
1361
- allowed_upload_types = [
1362
- p.upload_type for p in permissions if p.upload_type
1363
- ]
1364
- logger.info(
1365
- f"Restricting file bundle for user {user.id} to {allowed_upload_types=}"
1366
- )
1263
+ file_bundle_query = DownloadableFiles.build_file_bundle_query()
1367
1264
 
1368
- file_bundle_query = DownloadableFiles.build_file_bundle_query(
1369
- allowed_upload_types
1370
- )
1371
1265
  columns.append(file_bundle_query.c.file_bundle)
1372
1266
  subqueries.append(file_bundle_query)
1373
1267
 
1374
1268
  if include_counts:
1375
1269
  trial_summaries: List[dict] = cls.get_summaries()
1376
1270
 
1377
- participant_counts: Dict[str, int] = {
1378
- t["trial_id"]: t["total_participants"] for t in trial_summaries
1379
- }
1380
- sample_counts: Dict[str, int] = {
1381
- t["trial_id"]: t["total_samples"] for t in trial_summaries
1382
- }
1271
+ participant_counts: Dict[str, int] = {t["trial_id"]: t["total_participants"] for t in trial_summaries}
1272
+ sample_counts: Dict[str, int] = {t["trial_id"]: t["total_samples"] for t in trial_summaries}
1383
1273
 
1384
1274
  # Combine all query components
1385
1275
  query = session.query(*columns)
@@ -1387,6 +1277,7 @@ class TrialMetadata(CommonColumns):
1387
1277
  # Each subquery will have a trial_id column and one record per trial id
1388
1278
  query = query.outerjoin(subquery, cls.trial_id == subquery.c.trial_id)
1389
1279
 
1280
+ query = query.order_by(cls.trial_id)
1390
1281
  query = cls._add_pagination_filters(query, **pagination_args)
1391
1282
 
1392
1283
  trials = []
@@ -1402,25 +1293,43 @@ class TrialMetadata(CommonColumns):
1402
1293
  setattr(trial, column, value)
1403
1294
 
1404
1295
  if include_counts:
1405
- setattr(
1406
- trial, "num_participants", participant_counts.get(trial.trial_id, 0)
1407
- )
1296
+ setattr(trial, "num_participants", participant_counts.get(trial.trial_id, 0))
1408
1297
  setattr(trial, "num_samples", sample_counts.get(trial.trial_id, 0))
1409
1298
 
1410
1299
  if include_file_bundles and hasattr(trial, "file_bundle"):
1411
- for assay in trial.file_bundle:
1300
+ # File bundle has all existing object ids. Remove ones that aren't allowed by permissions.
1301
+
1302
+ # Gather all object ids in the file bundle
1303
+ all_object_ids = set()
1304
+
1305
+ for assay, purposes in trial.file_bundle.items():
1306
+ for purpose, object_ids in purposes.items():
1307
+ all_object_ids = all_object_ids.union(object_ids)
1308
+ # Remove any impermissible object ids
1309
+ filtered_object_ids = DownloadableFiles.filter_object_ids_by_permissions(user, all_object_ids)
1310
+ logger.debug(f"Filtered object ids: {len(all_object_ids)} -> {len(filtered_object_ids)}")
1311
+
1312
+ for assay, purposes in trial.file_bundle.items():
1412
1313
  size_results = {}
1413
- for files_list_key in trial.file_bundle[assay]:
1414
- ids = trial.file_bundle[assay][files_list_key]
1415
- size_results[f"{files_list_key}_size"] = (
1416
- DownloadableFiles.get_total_bytes(
1417
- filter_=lambda q: q.filter(
1418
- DownloadableFiles.id.in_(ids)
1419
- )
1420
- )
1421
- )
1314
+ for purpose, object_ids in purposes.items():
1315
+ # Only allow object ids that are permitted
1316
+ permitted_object_ids = list(set(object_ids).intersection(filtered_object_ids))
1317
+ trial.file_bundle[assay][purpose] = permitted_object_ids
1318
+ if permitted_object_ids:
1319
+ # For any files left in the purpose, get their total size
1320
+ filter_ = lambda q: q.filter(DownloadableFiles.id.in_(permitted_object_ids))
1321
+ size_results[f"{purpose}_size"] = DownloadableFiles.get_total_bytes(filter_=filter_)
1422
1322
  trial.file_bundle[assay].update(size_results)
1423
1323
 
1324
+ # Trim the file bundle
1325
+ for assay, purposes in deepcopy(trial.file_bundle).items():
1326
+ for purpose, object_ids in purposes.items():
1327
+ if not object_ids:
1328
+ # No file ids left in the purpose after filtering. Remove this purpose from the bundle.
1329
+ del trial.file_bundle[assay][purpose]
1330
+ if not trial.file_bundle[assay]:
1331
+ del trial.file_bundle[assay]
1332
+
1424
1333
  trials.append(trial)
1425
1334
 
1426
1335
  return trials
@@ -2017,9 +1926,7 @@ class TrialMetadata(CommonColumns):
2017
1926
  summaries_query = "SELECT result FROM trial_summaries_mv"
2018
1927
  # Retrieve trial-level summary results from data cached in trial_summaries_mv materialized view.
2019
1928
  # The source of the SQL query used in trial_summaries_mv is get_summaries_query()
2020
- summaries = [
2021
- summary for (summary,) in session.execute(summaries_query) if summary
2022
- ]
1929
+ summaries = [summary for (summary,) in session.execute(summaries_query) if summary]
2023
1930
 
2024
1931
  # Shortcut to impute 0 values for assays where trials don't yet have data
2025
1932
  summaries = pd.DataFrame(summaries).fillna(0).to_dict("records")
@@ -2037,9 +1944,7 @@ class UploadJobStatus(EnumBaseClass):
2037
1944
  MERGE_FAILED = "merge-failed"
2038
1945
 
2039
1946
  @classmethod
2040
- def is_valid_transition(
2041
- cls, current: str, target: str, is_manifest: bool = False
2042
- ) -> bool:
1947
+ def is_valid_transition(cls, current: str, target: str, is_manifest: bool = False) -> bool:
2043
1948
  """
2044
1949
  Enforce logic about which state transitions are valid. E.g.,
2045
1950
  an upload whose status is "merge-completed" should never be updated
@@ -2084,9 +1989,7 @@ class UploadJobs(CommonColumns):
2084
1989
  )
2085
1990
 
2086
1991
  # The current status of the upload job
2087
- _status = Column(
2088
- "status", Enum(*UPLOAD_STATUSES, name="upload_job_status"), nullable=False
2089
- )
1992
+ _status = Column("status", Enum(*UPLOAD_STATUSES, name="upload_job_status"), nullable=False)
2090
1993
  # A long, random identifier for this upload job
2091
1994
  token = Column(UUID, server_default=text("gen_random_uuid()"), nullable=False)
2092
1995
  # Text containing feedback on why the upload status is what it is
@@ -2108,9 +2011,7 @@ class UploadJobs(CommonColumns):
2108
2011
  trial_id = Column(String, nullable=False, index=True)
2109
2012
 
2110
2013
  # Create a GIN index on the GCS object names
2111
- _gcs_objects_idx = Index(
2112
- "upload_jobs_gcs_gcs_file_map_idx", gcs_file_map, postgresql_using="gin"
2113
- )
2014
+ _gcs_objects_idx = Index("upload_jobs_gcs_gcs_file_map_idx", gcs_file_map, postgresql_using="gin")
2114
2015
 
2115
2016
  @hybrid_property
2116
2017
  def status(self):
@@ -2124,9 +2025,7 @@ class UploadJobs(CommonColumns):
2124
2025
  old_status = self.status or UploadJobStatus.STARTED.value
2125
2026
  is_manifest = self.upload_type in prism.SUPPORTED_MANIFESTS
2126
2027
  if not UploadJobStatus.is_valid_transition(old_status, status, is_manifest):
2127
- raise ValueError(
2128
- f"Upload job with status {self.status} can't transition to status {status}"
2129
- )
2028
+ raise ValueError(f"Upload job with status {self.status} can't transition to status {status}")
2130
2029
  self._status = status
2131
2030
 
2132
2031
  def _set_status_no_validation(self, status: str):
@@ -2166,9 +2065,7 @@ class UploadJobs(CommonColumns):
2166
2065
  assert prism.PROTOCOL_ID_FIELD_NAME in metadata, "metadata must have a trial ID"
2167
2066
 
2168
2067
  is_manifest_upload = upload_type in prism.SUPPORTED_MANIFESTS
2169
- assert (
2170
- gcs_file_map is not None or is_manifest_upload
2171
- ), "assay/analysis uploads must have a gcs_file_map"
2068
+ assert gcs_file_map is not None or is_manifest_upload, "assay/analysis uploads must have a gcs_file_map"
2172
2069
 
2173
2070
  trial_id = metadata[prism.PROTOCOL_ID_FIELD_NAME]
2174
2071
 
@@ -2219,9 +2116,7 @@ class UploadJobs(CommonColumns):
2219
2116
  job.metadata_patch,
2220
2117
  updated_artifact,
2221
2118
  _,
2222
- ) = prism.merge_artifact_extra_metadata(
2223
- job.metadata_patch, uuid, job.upload_type, file
2224
- )
2119
+ ) = prism.merge_artifact_extra_metadata(job.metadata_patch, uuid, job.upload_type, file)
2225
2120
  logger.info("Updated md for %s: %s", uuid, updated_artifact.keys())
2226
2121
 
2227
2122
  # A workaround fix for JSON field modifications not being tracked
@@ -2243,25 +2138,14 @@ class UploadJobs(CommonColumns):
2243
2138
  @classmethod
2244
2139
  @with_default_session
2245
2140
  def find_first_manifest_job(cls, trial_id: str, session):
2246
- return (
2247
- session.query(UploadJobs)
2248
- .filter_by(trial_id=trial_id, gcs_xlsx_uri="")
2249
- .order_by(text("id ASC"))
2250
- .first()
2251
- )
2141
+ return session.query(UploadJobs).filter_by(trial_id=trial_id, gcs_xlsx_uri="").order_by(text("id ASC")).first()
2252
2142
 
2253
2143
  @with_default_session
2254
- def ingestion_success(
2255
- self, trial, session: Session, commit: bool = False, send_email: bool = False
2256
- ):
2144
+ def ingestion_success(self, trial, session: Session, commit: bool = False, send_email: bool = False):
2257
2145
  """Set own status to reflect successful merge and trigger email notifying CIDC admins."""
2258
2146
  # Do status update if the transition is valid
2259
- if not UploadJobStatus.is_valid_transition(
2260
- self.status, UploadJobStatus.MERGE_COMPLETED.value
2261
- ):
2262
- raise Exception(
2263
- f"Cannot declare ingestion success given current status: {self.status}"
2264
- )
2147
+ if not UploadJobStatus.is_valid_transition(self.status, UploadJobStatus.MERGE_COMPLETED.value):
2148
+ raise Exception(f"Cannot declare ingestion success given current status: {self.status}")
2265
2149
  self.status = UploadJobStatus.MERGE_COMPLETED.value
2266
2150
 
2267
2151
  if commit:
@@ -2277,11 +2161,7 @@ class FilesToFileGroups(BaseModel):
2277
2161
  """
2278
2162
 
2279
2163
  __tablename__ = "files_to_file_groups"
2280
- __table_args__ = (
2281
- PrimaryKeyConstraint(
2282
- "file_group_id", "file_id", name="pk_files_to_file_groups"
2283
- ),
2284
- )
2164
+ __table_args__ = (PrimaryKeyConstraint("file_group_id", "file_id", name="pk_files_to_file_groups"),)
2285
2165
  file_group_id = Column(ForeignKey("file_groups.id"), primary_key=True)
2286
2166
  file_id = Column(ForeignKey("downloadable_files.id"), primary_key=True)
2287
2167
  _created = Column(DateTime, default=func.now(), nullable=False)
@@ -2504,9 +2384,7 @@ class DownloadableFiles(CommonColumns):
2504
2384
  return downloadable_files_for_query, files_to_file_groups_for_query
2505
2385
 
2506
2386
  @classmethod
2507
- def _convert_list_results(
2508
- cls, downloadable_files_for_query: Table, query_files: List
2509
- ):
2387
+ def _convert_list_results(cls, downloadable_files_for_query: Table, query_files: List):
2510
2388
  """Converts the results of a SQLalchemy expression language query into actual DownloadableFiles
2511
2389
  objects. This is necessary since the UI depends on some of the derived properties in
2512
2390
  DownloadableFiles.
@@ -2544,9 +2422,7 @@ class DownloadableFiles(CommonColumns):
2544
2422
  where_clauses.append(downloadable_files_for_query.c.trial_id.in_(trial_ids))
2545
2423
  if facets:
2546
2424
  facet_groups = get_facet_groups_for_paths(facets)
2547
- where_clauses.append(
2548
- downloadable_files_for_query.c.facet_group.in_(facet_groups)
2549
- )
2425
+ where_clauses.append(downloadable_files_for_query.c.facet_group.in_(facet_groups))
2550
2426
 
2551
2427
  if user and not is_admin:
2552
2428
  permissions = Permissions.find_for_user(user.id)
@@ -2562,27 +2438,15 @@ class DownloadableFiles(CommonColumns):
2562
2438
  elif permission.file_group_id is None:
2563
2439
  where_clauses.append(
2564
2440
  sql_and(
2565
- (
2566
- downloadable_files_for_query.c.trial_id
2567
- == permission.trial_id
2568
- ),
2569
- (
2570
- downloadable_files_for_query.c.upload_type
2571
- == permission.upload_type
2572
- ),
2441
+ (downloadable_files_for_query.c.trial_id == permission.trial_id),
2442
+ (downloadable_files_for_query.c.upload_type == permission.upload_type),
2573
2443
  )
2574
2444
  )
2575
2445
  else:
2576
2446
  where_clauses.append(
2577
2447
  sql_and(
2578
- (
2579
- downloadable_files_for_query.c.trial_id
2580
- == permission.trial_id
2581
- ),
2582
- (
2583
- files_to_file_groups_for_query.c.file_group_id
2584
- == permission.file_group_id
2585
- ),
2448
+ (downloadable_files_for_query.c.trial_id == permission.trial_id),
2449
+ (files_to_file_groups_for_query.c.file_group_id == permission.file_group_id),
2586
2450
  )
2587
2451
  )
2588
2452
 
@@ -2596,9 +2460,7 @@ class DownloadableFiles(CommonColumns):
2596
2460
  )
2597
2461
  )
2598
2462
  if full_type_perms:
2599
- where_clauses.append(
2600
- downloadable_files_for_query.c.upload_type.in_(full_type_perms)
2601
- )
2463
+ where_clauses.append(downloadable_files_for_query.c.upload_type.in_(full_type_perms))
2602
2464
 
2603
2465
  # Need to be careful about return logic. Empty results could be because the user
2604
2466
  # is an admin, whereas None means the user has no permissions to view any files.
@@ -2607,6 +2469,55 @@ class DownloadableFiles(CommonColumns):
2607
2469
 
2608
2470
  return None
2609
2471
 
2472
+ @classmethod
2473
+ def _generate_where_clause_with_permissions(cls, user: Users) -> BooleanClauseList:
2474
+ """
2475
+ Returns a where clause for DownloadableFiles filtered down to only the files the user
2476
+ has access to based on their permissions and role.
2477
+
2478
+ The generated clause will have this form
2479
+ WHERE
2480
+ downloadable_files.trial_id IN ('x', 'y', ...) AND upload_type != 'clinical_data' <- trial-level permissions
2481
+ OR
2482
+ upload_type IN ('u', 'v', ...)) <- upload-type-level permissions
2483
+ OR
2484
+ trial_id = '5' AND upload_type = 'mif' <- regular permissions
2485
+ OR
2486
+ trial_id = '6' and upload_type = 'hande'
2487
+ ...
2488
+ """
2489
+ # From the perspective of viewing files, NCI Biobank users are admins.
2490
+ if user.is_admin_or_nci_user():
2491
+ return true() # Admin has full permissions to all
2492
+
2493
+ permissions = Permissions.find_for_user(user.id)
2494
+
2495
+ full_access_trial_ids = [p.trial_id for p in permissions if not p.upload_type]
2496
+ full_access_upload_types = [p.upload_type for p in permissions if not p.trial_id]
2497
+ regular_permissions = [p for p in permissions if p.trial_id and p.upload_type]
2498
+
2499
+ full_access_trial_clause = sql_and(
2500
+ DownloadableFiles.trial_id.in_(full_access_trial_ids),
2501
+ DownloadableFiles.upload_type != "clinical_data",
2502
+ )
2503
+
2504
+ full_access_upload_type_clause = sql_or(DownloadableFiles.upload_type.in_(full_access_upload_types))
2505
+
2506
+ regular_permission_clauses = [
2507
+ sql_and(
2508
+ DownloadableFiles.trial_id == p.trial_id,
2509
+ DownloadableFiles.upload_type == p.upload_type,
2510
+ )
2511
+ for p in regular_permissions
2512
+ ]
2513
+ clause = sql_or(
2514
+ full_access_trial_clause,
2515
+ full_access_upload_type_clause,
2516
+ *regular_permission_clauses,
2517
+ )
2518
+
2519
+ return clause
2520
+
2610
2521
  @classmethod
2611
2522
  @with_default_session
2612
2523
  def list_with_permissions(
@@ -2646,26 +2557,18 @@ class DownloadableFiles(CommonColumns):
2646
2557
  if where_clauses:
2647
2558
 
2648
2559
  # No where clause (the user is likely an admin).
2649
- statement = select([downloadable_files_for_query]).select_from(
2650
- downloadable_files_for_query
2651
- )
2560
+ statement = select([downloadable_files_for_query]).select_from(downloadable_files_for_query)
2652
2561
 
2653
2562
  else:
2654
2563
  statement = (
2655
2564
  select([downloadable_files_for_query])
2656
2565
  .where(sql_and(*where_clauses))
2657
- .select_from(
2658
- downloadable_files_for_query.outerjoin(
2659
- files_to_file_groups_for_query
2660
- )
2661
- )
2566
+ .select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
2662
2567
  )
2663
2568
 
2664
2569
  if sort_field:
2665
2570
  sort_attribute = getattr(cls, sort_field)
2666
- field_with_dir = (
2667
- asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
2668
- )
2571
+ field_with_dir = asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
2669
2572
  statement = statement.order_by(field_with_dir)
2670
2573
 
2671
2574
  # Enforce positive page numbers
@@ -2722,28 +2625,22 @@ class DownloadableFiles(CommonColumns):
2722
2625
  if where_clauses:
2723
2626
 
2724
2627
  # No where clause (the user is likely an admin).
2725
- statement = select(
2726
- [func.count(downloadable_files_for_query.c.id)]
2727
- ).select_from(downloadable_files_for_query)
2628
+ statement = select([func.count(downloadable_files_for_query.c.id)]).select_from(
2629
+ downloadable_files_for_query
2630
+ )
2728
2631
 
2729
2632
  else:
2730
2633
  statement = (
2731
2634
  select([func.count(downloadable_files_for_query.c.id)])
2732
2635
  .where(sql_and(*where_clauses))
2733
- .select_from(
2734
- downloadable_files_for_query.outerjoin(
2735
- files_to_file_groups_for_query
2736
- )
2737
- )
2636
+ .select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
2738
2637
  )
2739
2638
 
2740
2639
  return session.execute(statement).fetchone()[0]
2741
2640
 
2742
2641
  @classmethod
2743
2642
  @with_default_session
2744
- def count_by_facet_with_permissions(
2745
- cls, session: Session, trial_ids: List[str] = None, user: Users = None
2746
- ):
2643
+ def count_by_facet_with_permissions(cls, session: Session, trial_ids: List[str] = None, user: Users = None):
2747
2644
  """
2748
2645
  Returns a map of facet_group to a count of the number of files that the given user
2749
2646
  has permissions to view.
@@ -2787,11 +2684,7 @@ class DownloadableFiles(CommonColumns):
2787
2684
  ]
2788
2685
  )
2789
2686
  .where(sql_and(*where_clauses))
2790
- .select_from(
2791
- downloadable_files_for_query.outerjoin(
2792
- files_to_file_groups_for_query
2793
- )
2794
- )
2687
+ .select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
2795
2688
  )
2796
2689
 
2797
2690
  statement = statement.group_by(downloadable_files_for_query.c.facet_group)
@@ -2835,35 +2728,35 @@ class DownloadableFiles(CommonColumns):
2835
2728
  else:
2836
2729
  statement = (
2837
2730
  select([downloadable_files_for_query.c.object_url])
2838
- .where(
2839
- sql_and(*where_clauses, downloadable_files_for_query.c.id.in_(ids))
2840
- )
2841
- .select_from(
2842
- downloadable_files_for_query.outerjoin(
2843
- files_to_file_groups_for_query
2844
- )
2845
- )
2731
+ .where(sql_and(*where_clauses, downloadable_files_for_query.c.id.in_(ids)))
2732
+ .select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
2846
2733
  )
2847
2734
 
2848
2735
  return [row[0] for row in session.execute(statement).fetchall()]
2849
2736
 
2850
2737
  @classmethod
2851
- def _generate_trial_file_counts(
2852
- cls, downloadable_files: Iterable
2853
- ) -> Dict[str, int]:
2738
+ @with_default_session
2739
+ def filter_object_ids_by_permissions(cls, user: Users, ids: Iterable[int], session: Session) -> Iterable[int]:
2740
+ """
2741
+ Takes a list of object ids and filters it to return only those object ids the user has permission for.
2742
+ """
2743
+
2744
+ where_clause = DownloadableFiles._generate_where_clause_with_permissions(user)
2745
+ statement = select([DownloadableFiles.id]).where(sql_and(DownloadableFiles.id.in_(ids), where_clause))
2746
+
2747
+ return [row[0] for row in session.execute(statement).fetchall()]
2748
+
2749
+ @classmethod
2750
+ def _generate_trial_file_counts(cls, downloadable_files: Iterable) -> Dict[str, int]:
2854
2751
  results = defaultdict(lambda: 0)
2855
2752
  for downloadable_file in downloadable_files:
2856
2753
  if downloadable_file.data_category:
2857
- results[downloadable_file.trial_id] = (
2858
- results[downloadable_file.trial_id] + 1
2859
- )
2754
+ results[downloadable_file.trial_id] = results[downloadable_file.trial_id] + 1
2860
2755
  return results
2861
2756
 
2862
2757
  @classmethod
2863
2758
  @with_default_session
2864
- def remove_participants_and_samples_info_files(
2865
- cls, trial_id: str, session: Session
2866
- ):
2759
+ def remove_participants_and_samples_info_files(cls, trial_id: str, session: Session):
2867
2760
  """
2868
2761
  Remove participants info and samples info downloadable files
2869
2762
  """
@@ -2911,27 +2804,19 @@ class DownloadableFiles(CommonColumns):
2911
2804
  if not where_clauses:
2912
2805
 
2913
2806
  # No where clause (the user is likely an admin).
2914
- statement = select([downloadable_files_for_query]).select_from(
2915
- downloadable_files_for_query
2916
- )
2807
+ statement = select([downloadable_files_for_query]).select_from(downloadable_files_for_query)
2917
2808
 
2918
2809
  else:
2919
2810
  statement = (
2920
2811
  select([downloadable_files_for_query])
2921
2812
  .where(sql_and(*where_clauses))
2922
- .select_from(
2923
- downloadable_files_for_query.outerjoin(
2924
- files_to_file_groups_for_query
2925
- )
2926
- )
2813
+ .select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
2927
2814
  )
2928
2815
 
2929
2816
  downloadable_files = DownloadableFiles._convert_list_results(
2930
2817
  downloadable_files_for_query, session.execute(statement).fetchall()
2931
2818
  )
2932
- trial_file_counts = DownloadableFiles._generate_trial_file_counts(
2933
- downloadable_files
2934
- )
2819
+ trial_file_counts = DownloadableFiles._generate_trial_file_counts(downloadable_files)
2935
2820
  return build_trial_facets(trial_file_counts)
2936
2821
 
2937
2822
  @with_default_session
@@ -2956,13 +2841,9 @@ class DownloadableFiles(CommonColumns):
2956
2841
  "trial_id": self.trial_id,
2957
2842
  "id": self.id,
2958
2843
  }
2959
- related_files = result_proxy_to_models(
2960
- session.execute(query, params), DownloadableFiles
2961
- )
2844
+ related_files = result_proxy_to_models(session.execute(query, params), DownloadableFiles)
2962
2845
  else:
2963
- not_sample_specific = not_(
2964
- literal_column("additional_metadata::text").like('%.cimac_id":%')
2965
- )
2846
+ not_sample_specific = not_(literal_column("additional_metadata::text").like('%.cimac_id":%'))
2966
2847
  related_files = (
2967
2848
  session.query(DownloadableFiles)
2968
2849
  .filter(
@@ -3018,9 +2899,7 @@ class DownloadableFiles(CommonColumns):
3018
2899
  full_type_perms.append(perm.upload_type)
3019
2900
  else:
3020
2901
  trial_type_perms.append((perm.trial_id, perm.upload_type))
3021
- df_tuples = tuple_(
3022
- DownloadableFiles.trial_id, DownloadableFiles.upload_type
3023
- )
2902
+ df_tuples = tuple_(DownloadableFiles.trial_id, DownloadableFiles.upload_type)
3024
2903
  file_filters.append(
3025
2904
  or_(
3026
2905
  # don't include clinical_data in cross-trial permission
@@ -3069,16 +2948,9 @@ class DownloadableFiles(CommonColumns):
3069
2948
  etag = make_etag(filtered_metadata.values())
3070
2949
 
3071
2950
  object_url = filtered_metadata["object_url"]
3072
- df = (
3073
- session.query(DownloadableFiles)
3074
- .filter_by(object_url=object_url)
3075
- .with_for_update()
3076
- .first()
3077
- )
2951
+ df = session.query(DownloadableFiles).filter_by(object_url=object_url).with_for_update().first()
3078
2952
  if df:
3079
- df = session.merge(
3080
- DownloadableFiles(id=df.id, _etag=etag, **filtered_metadata)
3081
- )
2953
+ df = session.merge(DownloadableFiles(id=df.id, _etag=etag, **filtered_metadata))
3082
2954
  else:
3083
2955
  df = DownloadableFiles(_etag=etag, **filtered_metadata)
3084
2956
 
@@ -3107,12 +2979,7 @@ class DownloadableFiles(CommonColumns):
3107
2979
  """
3108
2980
 
3109
2981
  # trying to find existing one
3110
- df = (
3111
- session.query(DownloadableFiles)
3112
- .filter_by(object_url=blob.name)
3113
- .with_for_update()
3114
- .first()
3115
- )
2982
+ df = session.query(DownloadableFiles).filter_by(object_url=blob.name).with_for_update().first()
3116
2983
  if not df:
3117
2984
  df = DownloadableFiles()
3118
2985
 
@@ -3145,18 +3012,14 @@ class DownloadableFiles(CommonColumns):
3145
3012
 
3146
3013
  @classmethod
3147
3014
  @with_default_session
3148
- def list_object_urls(
3149
- cls, ids: List[int], session: Session, filter_: Callable[[Query], Query]
3150
- ) -> List[str]:
3015
+ def list_object_urls(cls, ids: List[int], session: Session, filter_: Callable[[Query], Query]) -> List[str]:
3151
3016
  """Get all object_urls for a batch of downloadable file record IDs"""
3152
3017
  query = session.query(cls.object_url).filter(cls.id.in_(ids))
3153
3018
  query = filter_(query)
3154
3019
  return [r[0] for r in query.all()]
3155
3020
 
3156
3021
  @classmethod
3157
- def build_file_bundle_query(
3158
- cls, allowed_upload_types: Optional[List[str]]
3159
- ) -> Query:
3022
+ def build_file_bundle_query(cls) -> Query:
3160
3023
  """
3161
3024
  Build a query that selects nested file bundles from the downloadable files table.
3162
3025
  The `file_bundles` query below should produce one bundle per unique `trial_id` that
@@ -3171,8 +3034,6 @@ class DownloadableFiles(CommonColumns):
3171
3034
  }
3172
3035
  ```
3173
3036
  where "type" is something like `"Olink"` or `"Participants Info"` and "purpose" is a `FilePurpose` string.
3174
-
3175
- If `allowed_upload_types` is provided, the query will filter by files that only have an `upload_type` that appear in the list.
3176
3037
  """
3177
3038
  tid_col, type_col, purp_col, ids_col, purps_col = (
3178
3039
  literal_column("trial_id"),
@@ -3182,28 +3043,24 @@ class DownloadableFiles(CommonColumns):
3182
3043
  literal_column("purposes"),
3183
3044
  )
3184
3045
 
3185
- id_bundles = select(
3186
- [
3187
- cls.trial_id,
3188
- cls.data_category_prefix.label(type_col.key),
3189
- cls.file_purpose.label(purp_col.key),
3190
- func.json_agg(cls.id).label(ids_col.key),
3191
- ]
3192
- ).group_by(cls.trial_id, cls.data_category_prefix, cls.file_purpose)
3193
-
3194
- # Restrict files from appearing in the file bundle if the user doesn't have permissions for them
3195
- if allowed_upload_types:
3196
- id_bundles = id_bundles.filter(cls.upload_type.in_(allowed_upload_types))
3197
- id_bundles = id_bundles.alias("id_bundles")
3198
-
3046
+ id_bundles = (
3047
+ select(
3048
+ [
3049
+ cls.trial_id,
3050
+ cls.data_category_prefix.label(type_col.key),
3051
+ cls.file_purpose.label(purp_col.key),
3052
+ func.json_agg(cls.id).label(ids_col.key),
3053
+ ]
3054
+ )
3055
+ .group_by(cls.trial_id, cls.data_category_prefix, cls.file_purpose)
3056
+ .alias("id_bundles")
3057
+ )
3199
3058
  purpose_bundles = (
3200
3059
  select(
3201
3060
  [
3202
3061
  tid_col,
3203
3062
  type_col,
3204
- func.json_object_agg(
3205
- func.coalesce(purp_col, "miscellaneous"), ids_col
3206
- ).label(purps_col.key),
3063
+ func.json_object_agg(func.coalesce(purp_col, "miscellaneous"), ids_col).label(purps_col.key),
3207
3064
  ]
3208
3065
  )
3209
3066
  .select_from(id_bundles)
@@ -3214,9 +3071,7 @@ class DownloadableFiles(CommonColumns):
3214
3071
  select(
3215
3072
  [
3216
3073
  tid_col.label(tid_col.key),
3217
- func.json_object_agg(
3218
- func.coalesce(type_col, "other"), purps_col
3219
- ).label("file_bundle"),
3074
+ func.json_object_agg(func.coalesce(type_col, "other"), purps_col).label("file_bundle"),
3220
3075
  ]
3221
3076
  )
3222
3077
  .select_from(purpose_bundles)
@@ -3227,9 +3082,7 @@ class DownloadableFiles(CommonColumns):
3227
3082
 
3228
3083
  @classmethod
3229
3084
  @with_default_session
3230
- def get_total_bytes(
3231
- cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q
3232
- ) -> int:
3085
+ def get_total_bytes(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q) -> int:
3233
3086
  """Get the total number of bytes of data stored across all files."""
3234
3087
  filtered_query = filter_(session.query(func.sum(cls.file_size_bytes)))
3235
3088
  total_bytes = filtered_query.one()[0]
@@ -3238,9 +3091,7 @@ class DownloadableFiles(CommonColumns):
3238
3091
 
3239
3092
  @classmethod
3240
3093
  @with_default_session
3241
- def get_trial_facets(
3242
- cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q
3243
- ):
3094
+ def get_trial_facets(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q):
3244
3095
  trial_file_counts = cls.count_by(
3245
3096
  cls.trial_id,
3246
3097
  session=session,
@@ -3254,12 +3105,8 @@ class DownloadableFiles(CommonColumns):
3254
3105
  # TODO fix this
3255
3106
  @classmethod
3256
3107
  @with_default_session
3257
- def get_data_category_facets(
3258
- cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q
3259
- ):
3260
- facet_group_file_counts = cls.count_by(
3261
- cls.facet_group, session=session, filter_=filter_
3262
- )
3108
+ def get_data_category_facets(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q):
3109
+ facet_group_file_counts = cls.count_by(cls.facet_group, session=session, filter_=filter_)
3263
3110
  data_category_facets = build_data_category_facets(facet_group_file_counts)
3264
3111
  return data_category_facets
3265
3112
 
@@ -3275,10 +3122,7 @@ class DownloadableFiles(CommonColumns):
3275
3122
  # Query clause for computing a downloadable file's data category.
3276
3123
  # Used above in the DownloadableFiles.data_category computed property.
3277
3124
  DATA_CATEGORY_CASE_CLAUSE = case(
3278
- [
3279
- (DownloadableFiles.facet_group == k, v)
3280
- for k, v in facet_groups_to_categories.items()
3281
- ]
3125
+ [(DownloadableFiles.facet_group == k, v) for k, v in facet_groups_to_categories.items()]
3282
3126
  )
3283
3127
 
3284
3128
  # Query clause for computing a downloadable file's file purpose.
@@ -3291,9 +3135,7 @@ FILE_PURPOSE_CASE_CLAUSE = case(
3291
3135
  )
3292
3136
 
3293
3137
 
3294
- def result_proxy_to_models(
3295
- result_proxy: ResultProxy, model: BaseModel
3296
- ) -> List[BaseModel]:
3138
+ def result_proxy_to_models(result_proxy: ResultProxy, model: BaseModel) -> List[BaseModel]:
3297
3139
  """Materialize a sqlalchemy `result_proxy` iterable as a list of `model` instances"""
3298
3140
  return [model(**dict(row_proxy)) for row_proxy in result_proxy.all()]
3299
3141