nci-cidc-api-modules 1.1.12__py3-none-any.whl → 1.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cidc_api/models/models.py CHANGED
@@ -28,6 +28,7 @@ import hashlib
28
28
  import os
29
29
  import re
30
30
  from collections import defaultdict
31
+ from copy import deepcopy
31
32
  from datetime import datetime, timedelta
32
33
  from enum import Enum as EnumBaseClass
33
34
  from functools import wraps
@@ -77,6 +78,7 @@ from sqlalchemy import (
77
78
  or_,
78
79
  Table,
79
80
  MetaData,
81
+ true,
80
82
  )
81
83
  from sqlalchemy.dialects.postgresql import JSONB, UUID
82
84
  from sqlalchemy.engine import ResultProxy
@@ -92,10 +94,11 @@ from sqlalchemy.sql import (
92
94
  # instead of the sqlalchemy.sql versions we are importing here. The solution is to
93
95
  # break up this giant file.
94
96
  and_ as sql_and,
95
- # or_ as sql_or, # NOT USED
97
+ or_ as sql_or,
96
98
  # select, # ALREADY IMPORTED
97
99
  text,
98
100
  )
101
+ from sqlalchemy.sql.elements import BooleanClauseList
99
102
  from sqlalchemy.sql.functions import coalesce
100
103
  from werkzeug.exceptions import BadRequest
101
104
 
@@ -184,11 +187,7 @@ class CommonColumns(BaseModel): # type: ignore
184
187
  if hasattr(b, "__table__"):
185
188
  columns_to_check.extend(b.__table__.columns)
186
189
 
187
- ret = {
188
- c.name: getattr(self, c.name)
189
- for c in columns_to_check
190
- if hasattr(self, c.name)
191
- }
190
+ ret = {c.name: getattr(self, c.name) for c in columns_to_check if hasattr(self, c.name)}
192
191
  ret = {k: v for k, v in ret.items() if v is not None}
193
192
  return ret
194
193
 
@@ -269,16 +268,12 @@ class CommonColumns(BaseModel): # type: ignore
269
268
  if sort_field:
270
269
  # Get the attribute from the class, in case this is a hybrid attribute
271
270
  sort_attribute = getattr(cls, sort_field)
272
- field_with_dir = (
273
- asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
274
- )
271
+ field_with_dir = asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
275
272
  query = query.order_by(field_with_dir)
276
273
  if sort_field != "id":
277
274
  # When sorting, need to guarantee unique order for offset/limit pagination to produce
278
275
  # consistent results. Adding secondary "id" sort field to ensure unique order.
279
- secondary_field_with_dir = (
280
- asc("id") if sort_direction == "asc" else desc("id")
281
- )
276
+ secondary_field_with_dir = asc("id") if sort_direction == "asc" else desc("id")
282
277
  query = query.order_by(secondary_field_with_dir)
283
278
 
284
279
  # Apply filter function
@@ -299,9 +294,7 @@ class CommonColumns(BaseModel): # type: ignore
299
294
 
300
295
  @classmethod
301
296
  @with_default_session
302
- def count_by(
303
- cls, expr, session: Session, filter_: Callable[[Query], Query] = lambda q: q
304
- ) -> Dict[str, int]:
297
+ def count_by(cls, expr, session: Session, filter_: Callable[[Query], Query] = lambda q: q) -> Dict[str, int]:
305
298
  """
306
299
  Return a dictionary mapping results of `expr` to the number of times each result
307
300
  occurs in the table related to this model. E.g., for the `UploadJobs` model,
@@ -326,9 +319,7 @@ class CommonColumns(BaseModel): # type: ignore
326
319
  filter_: Callable[[Query], Query] = lambda q: q,
327
320
  ):
328
321
  """Get a list of distinct values for the given column."""
329
- assert (
330
- column_name in cls.__table__.columns.keys()
331
- ), f"{cls.__tablename__} has no column {column_name}"
322
+ assert column_name in cls.__table__.columns.keys(), f"{cls.__tablename__} has no column {column_name}"
332
323
 
333
324
  base_query = session.query(getattr(cls, column_name))
334
325
  filtered_query = filter_(base_query)
@@ -342,9 +333,7 @@ class CommonColumns(BaseModel): # type: ignore
342
333
  @classmethod
343
334
  def get_unique_columns(cls):
344
335
  """Get a list of all the unique columns in this table."""
345
- return [
346
- column for column in cls.__table__.c if column.unique or column.primary_key
347
- ]
336
+ return [column for column in cls.__table__.c if column.unique or column.primary_key]
348
337
 
349
338
 
350
339
  class CIDCRole(EnumBaseClass):
@@ -392,6 +381,11 @@ class Users(CommonColumns):
392
381
  """Returns true if this user is an NCI Biobank user."""
393
382
  return self.role == CIDCRole.NCI_BIOBANK_USER.value
394
383
 
384
+ def is_admin_or_nci_user(self) -> bool:
385
+ """Returns true if this user is a CIDC admin or NCI Biobank user. These users
386
+ share full access to much of the system."""
387
+ return self.is_admin() or self.is_nci_user()
388
+
395
389
  def has_download_permissions(self) -> bool:
396
390
  """Returns false if this user is a Network Viewer or PACT User."""
397
391
  return self.role not in (
@@ -434,9 +428,7 @@ class Users(CommonColumns):
434
428
  user = Users.find_by_email(email)
435
429
  if not user:
436
430
  logger.info("Creating new user with email %s", email)
437
- user = Users(
438
- email=email, contact_email=email, first_n=first_n, last_n=last_n
439
- )
431
+ user = Users(email=email, contact_email=email, first_n=first_n, last_n=last_n)
440
432
  user.insert(session=session)
441
433
  return user
442
434
 
@@ -450,9 +442,7 @@ class Users(CommonColumns):
450
442
  user_inactivity_cutoff = datetime.today() - timedelta(days=INACTIVE_USER_DAYS)
451
443
  update_query = (
452
444
  update(Users)
453
- .where(
454
- and_(Users._accessed < user_inactivity_cutoff, Users.disabled == False)
455
- )
445
+ .where(and_(Users._accessed < user_inactivity_cutoff, Users.disabled == False))
456
446
  .values(disabled=True)
457
447
  .returning(Users.id)
458
448
  )
@@ -460,9 +450,7 @@ class Users(CommonColumns):
460
450
  if commit:
461
451
  session.commit()
462
452
 
463
- disabled_users = [
464
- Users.find_by_id(uid, session=session) for uid in disabled_user_ids
465
- ]
453
+ disabled_users = [Users.find_by_id(uid, session=session) for uid in disabled_user_ids]
466
454
  for u in disabled_users:
467
455
  Permissions.revoke_user_permissions(u, session=session)
468
456
  revoke_bigquery_access(u.email)
@@ -501,15 +489,13 @@ class Users(CommonColumns):
501
489
  .union_all(
502
490
  # Handle admins separately, since they can view all data for all
503
491
  # trials even if they have no permissions assigned to them.
504
- session.query(
505
- *user_columns, TrialMetadata.trial_id, literal("*,clinical_data")
506
- ).filter(Users.role == CIDCRole.ADMIN.value)
492
+ session.query(*user_columns, TrialMetadata.trial_id, literal("*,clinical_data")).filter(
493
+ Users.role == CIDCRole.ADMIN.value
494
+ )
507
495
  )
508
496
  )
509
497
 
510
- df = pd.DataFrame(
511
- query, columns=["email", "organization", "role", "trial_id", "permissions"]
512
- ).fillna("*")
498
+ df = pd.DataFrame(query, columns=["email", "organization", "role", "trial_id", "permissions"]).fillna("*")
513
499
 
514
500
  with pd.ExcelWriter(
515
501
  io
@@ -630,19 +616,11 @@ class Permissions(CommonColumns):
630
616
 
631
617
  NOTE: values provided to the `commit` argument will be ignored. This method always commits.
632
618
  """
633
- if (
634
- self.upload_type == self.EVERY
635
- and self.trial_id == self.EVERY
636
- and not self.file_group_id
637
- ):
638
- raise ValueError(
639
- "A permission must have a trial id, upload type, or file group."
640
- )
619
+ if self.upload_type == self.EVERY and self.trial_id == self.EVERY and not self.file_group_id:
620
+ raise ValueError("A permission must have a trial id, upload type, or file group.")
641
621
 
642
622
  if self.file_group_id and self.upload_type != "file_group":
643
- raise ValueError(
644
- "If a permission has a file group, its upload_type must be set to file_group"
645
- )
623
+ raise ValueError("If a permission has a file group, its upload_type must be set to file_group")
646
624
 
647
625
  grantee = Users.find_by_id(self.granted_to_user, session=session)
648
626
  if grantee is None:
@@ -724,11 +702,7 @@ class Permissions(CommonColumns):
724
702
  super().insert(session=session, commit=True)
725
703
 
726
704
  # Don't make any GCS changes if this user doesn't have download access, is disabled, or isn't approved
727
- if (
728
- not grantee.has_download_permissions()
729
- or grantee.disabled
730
- or grantee.approval_date is None
731
- ):
705
+ if not grantee.has_download_permissions() or grantee.disabled or grantee.approval_date is None:
732
706
  # TODO: pact users do not have download permissions currently
733
707
  return
734
708
 
@@ -736,9 +710,7 @@ class Permissions(CommonColumns):
736
710
  # Grant ACL download permissions in GCS
737
711
 
738
712
  if self.upload_type == "file_group":
739
- Permissions.grant_download_access_to_file_group(
740
- grantee.email, file_group
741
- )
713
+ Permissions.grant_download_access_to_file_group(grantee.email, file_group)
742
714
  else:
743
715
 
744
716
  # if they have any download permissions, they need the CIDC Lister role
@@ -746,9 +718,7 @@ class Permissions(CommonColumns):
746
718
  grant_download_access(grantee.email, self.trial_id, self.upload_type)
747
719
  # Remove permissions staged for deletion, if any
748
720
  for perm in perms_to_delete:
749
- revoke_download_access(
750
- grantee.email, perm.trial_id, perm.upload_type
751
- )
721
+ revoke_download_access(grantee.email, perm.trial_id, perm.upload_type)
752
722
  except Exception as e:
753
723
  # Add back deleted permissions, if any
754
724
  for perm in perms_to_delete:
@@ -760,9 +730,7 @@ class Permissions(CommonColumns):
760
730
  raise IAMException("IAM grant failed.") from e
761
731
 
762
732
  @with_default_session
763
- def delete(
764
- self, deleted_by: Union[Users, int], session: Session, commit: bool = True
765
- ) -> None:
733
+ def delete(self, deleted_by: Union[Users, int], session: Session, commit: bool = True) -> None:
766
734
  """
767
735
  Delete this permission record from the database and revoke the corresponding IAM policy binding
768
736
  on the GCS data bucket.
@@ -795,9 +763,7 @@ class Permissions(CommonColumns):
795
763
  revoke_lister_access(grantee.email)
796
764
 
797
765
  except Exception as e:
798
- raise IAMException(
799
- "IAM revoke failed, and permission db record not removed."
800
- ) from e
766
+ raise IAMException("IAM revoke failed, and permission db record not removed.") from e
801
767
 
802
768
  info_message = f"admin-action: {deleted_by_user.email} removed from {grantee.email} the permission {self.upload_type or 'all assays'} on {self.trial_id or 'all trials'}"
803
769
  logger.info(info_message)
@@ -841,10 +807,7 @@ class Permissions(CommonColumns):
841
807
  # if getting EVERY, return all
842
808
  | (upload_type == Permissions.EVERY)
843
809
  # if permission is EVERY, don't return if looking for clinical_data
844
- | (
845
- (Permissions.upload_type == Permissions.EVERY)
846
- & (upload_type != "clinical_data")
847
- )
810
+ | ((Permissions.upload_type == Permissions.EVERY) & (upload_type != "clinical_data"))
848
811
  )
849
812
  ),
850
813
  )
@@ -864,23 +827,16 @@ class Permissions(CommonColumns):
864
827
  permissions_list: List[Permissions] = []
865
828
  for upload in upload_type:
866
829
  permissions_list.extend(
867
- Permissions.get_for_trial_type(
868
- trial_id=trial_id, upload_type=upload, session=session
869
- )
830
+ Permissions.get_for_trial_type(trial_id=trial_id, upload_type=upload, session=session)
870
831
  )
871
832
 
872
- permissions_dict: Dict[str, Dict[str, List[Permissions]]] = defaultdict(
873
- lambda: defaultdict(list)
874
- )
833
+ permissions_dict: Dict[str, Dict[str, List[Permissions]]] = defaultdict(lambda: defaultdict(list))
875
834
  for perm in permissions_list:
876
835
  permissions_dict[perm.trial_id][perm.upload_type].append(perm)
877
836
 
878
837
  user_dict: Dict[str, Dict[str, List[Users]]] = {
879
838
  trial: {
880
- upload: [
881
- Users.find_by_id(id=perm.granted_to_user, session=session)
882
- for perm in perms
883
- ]
839
+ upload: [Users.find_by_id(id=perm.granted_to_user, session=session) for perm in perms]
884
840
  for upload, perms in upload_dict.items()
885
841
  }
886
842
  for trial, upload_dict in permissions_dict.items()
@@ -895,11 +851,7 @@ class Permissions(CommonColumns):
895
851
  for trial, upload_dict in user_dict.items()
896
852
  }
897
853
  # remove any trial that doesn't have any uploads in it
898
- user_email_dict = {
899
- trial: upload_dict
900
- for trial, upload_dict in user_email_dict.items()
901
- if len(upload_dict)
902
- }
854
+ user_email_dict = {trial: upload_dict for trial, upload_dict in user_email_dict.items() if len(upload_dict)}
903
855
  return user_email_dict
904
856
 
905
857
  @staticmethod
@@ -919,14 +871,8 @@ class Permissions(CommonColumns):
919
871
  session.query(Permissions)
920
872
  .filter(
921
873
  Permissions.granted_to_user == user_id,
922
- (
923
- (Permissions.trial_id == trial_id)
924
- & (Permissions.upload_type == upload_type)
925
- )
926
- | (
927
- (Permissions.trial_id == Permissions.EVERY)
928
- & (Permissions.upload_type == upload_type)
929
- )
874
+ ((Permissions.trial_id == trial_id) & (Permissions.upload_type == upload_type))
875
+ | ((Permissions.trial_id == Permissions.EVERY) & (Permissions.upload_type == upload_type))
930
876
  | (
931
877
  (Permissions.trial_id == trial_id)
932
878
  # if permission is EVERY, don't return if looking for clinical_data
@@ -949,11 +895,7 @@ class Permissions(CommonColumns):
949
895
  )
950
896
  if results:
951
897
  file_group_ids = {file_group.id for file_group in file_groups}
952
- results = [
953
- result
954
- for result in results
955
- if result.file_group_id in file_group_ids
956
- ]
898
+ results = [result for result in results if result.file_group_id in file_group_ids]
957
899
 
958
900
  results = results and results or None
959
901
  return results
@@ -997,9 +939,7 @@ class Permissions(CommonColumns):
997
939
  upload_type=[p.upload_type for p in trial_perms],
998
940
  )
999
941
  for perm in file_group_perms:
1000
- file_group: FileGroups = FileGroups.find_by_id(
1001
- perm.file_group_id, session=session
1002
- )
942
+ file_group: FileGroups = FileGroups.find_by_id(perm.file_group_id, session=session)
1003
943
  Permissions.grant_download_access_to_file_group(user.email, file_group)
1004
944
 
1005
945
  # Regrant all of the user's intake bucket upload permissions, if they have any
@@ -1039,9 +979,7 @@ class Permissions(CommonColumns):
1039
979
 
1040
980
  @classmethod
1041
981
  @with_default_session
1042
- def grant_download_permissions_for_upload_job(
1043
- cls, upload: "UploadJobs", session: Session
1044
- ) -> None:
982
+ def grant_download_permissions_for_upload_job(cls, upload: "UploadJobs", session: Session) -> None:
1045
983
  """
1046
984
  For a given UploadJob, issue all relevant Permissions on Google
1047
985
  Loads all cross-trial permissions for the upload_type
@@ -1059,21 +997,14 @@ class Permissions(CommonColumns):
1059
997
  filters.append(cls.upload_type == upload.upload_type)
1060
998
  else:
1061
999
  # upload.upload_type can't be None
1062
- filters.append(
1063
- or_(cls.upload_type == upload.upload_type, cls.upload_type == None)
1064
- )
1000
+ filters.append(or_(cls.upload_type == upload.upload_type, cls.upload_type == None))
1065
1001
 
1066
1002
  perms = session.query(cls).filter(*filters).all()
1067
1003
  user_email_list: List[str] = []
1068
1004
 
1069
1005
  for perm in perms:
1070
1006
  user = Users.find_by_id(perm.granted_to_user, session=session)
1071
- if (
1072
- user.is_admin()
1073
- or user.is_nci_user()
1074
- or user.disabled
1075
- or user.email in user_email_list
1076
- ):
1007
+ if user.is_admin() or user.is_nci_user() or user.disabled or user.email in user_email_list:
1077
1008
  continue
1078
1009
 
1079
1010
  user_email_list.append(user.email)
@@ -1102,10 +1033,8 @@ class ValidationMultiError(Exception):
1102
1033
  """Holds multiple jsonschema.ValidationErrors"""
1103
1034
 
1104
1035
 
1105
- trial_metadata_validator: json_validation._Validator = (
1106
- json_validation.load_and_validate_schema(
1107
- "clinical_trial.json", return_validator=True
1108
- )
1036
+ trial_metadata_validator: json_validation._Validator = json_validation.load_and_validate_schema(
1037
+ "clinical_trial.json", return_validator=True
1109
1038
  )
1110
1039
 
1111
1040
  FileBundle = Dict[str, Dict[FilePurpose, List[int]]]
@@ -1127,9 +1056,7 @@ class TrialMetadata(CommonColumns):
1127
1056
  # existing manifest data that no longer conform to the post-CSMS-integration schema can be kept.
1128
1057
  # See more details in the strip_metadata_for_validation function docs.
1129
1058
  metadata_to_validate = (
1130
- json_validation.strip_metadata_for_validation(metadata_json)
1131
- if strip_metadata
1132
- else metadata_json
1059
+ json_validation.strip_metadata_for_validation(metadata_json) if strip_metadata else metadata_json
1133
1060
  )
1134
1061
  errs = trial_metadata_validator.iter_error_messages(metadata_to_validate)
1135
1062
  messages = list(f"'metadata_json': {err}" for err in errs)
@@ -1165,49 +1092,34 @@ class TrialMetadata(CommonColumns):
1165
1092
  Find a trial by its CIMAC id.
1166
1093
  """
1167
1094
  try:
1168
- trial = (
1169
- session.query(TrialMetadata)
1170
- .filter_by(trial_id=trial_id)
1171
- .with_for_update()
1172
- .one()
1173
- )
1095
+ trial = session.query(TrialMetadata).filter_by(trial_id=trial_id).with_for_update().one()
1174
1096
  except NoResultFound as e:
1175
1097
  raise NoResultFound(f"No trial found with id {trial_id}") from e
1176
1098
  return trial
1177
1099
 
1178
1100
  @staticmethod
1179
1101
  @with_default_session
1180
- def patch_assays(
1181
- trial_id: str, assay_patch: dict, session: Session, commit: bool = False
1182
- ):
1102
+ def patch_assays(trial_id: str, assay_patch: dict, session: Session, commit: bool = False):
1183
1103
  """
1184
1104
  Applies assay updates to the metadata object from the trial with id `trial_id`.
1185
1105
 
1186
1106
  TODO: apply this update directly to the not-yet-existent TrialMetadata.manifest field
1187
1107
  """
1188
- return TrialMetadata._patch_trial_metadata(
1189
- trial_id, assay_patch, session=session, commit=commit
1190
- )
1108
+ return TrialMetadata._patch_trial_metadata(trial_id, assay_patch, session=session, commit=commit)
1191
1109
 
1192
1110
  @staticmethod
1193
1111
  @with_default_session
1194
- def patch_manifest(
1195
- trial_id: str, manifest_patch: dict, session: Session, commit: bool = False
1196
- ):
1112
+ def patch_manifest(trial_id: str, manifest_patch: dict, session: Session, commit: bool = False):
1197
1113
  """
1198
1114
  Applies manifest updates to the metadata object from the trial with id `trial_id`.
1199
1115
 
1200
1116
  TODO: apply this update directly to the not-yet-existent TrialMetadata.assays field
1201
1117
  """
1202
- return TrialMetadata._patch_trial_metadata(
1203
- trial_id, manifest_patch, session=session, commit=commit
1204
- )
1118
+ return TrialMetadata._patch_trial_metadata(trial_id, manifest_patch, session=session, commit=commit)
1205
1119
 
1206
1120
  @staticmethod
1207
1121
  @with_default_session
1208
- def _patch_trial_metadata(
1209
- trial_id: str, json_patch: dict, session: Session, commit: bool = False
1210
- ):
1122
+ def _patch_trial_metadata(trial_id: str, json_patch: dict, session: Session, commit: bool = False):
1211
1123
  """
1212
1124
  Applies updates to the metadata object from the trial with id `trial_id`
1213
1125
  and commits current session.
@@ -1219,9 +1131,7 @@ class TrialMetadata(CommonColumns):
1219
1131
  trial = TrialMetadata.select_for_update_by_trial_id(trial_id, session=session)
1220
1132
 
1221
1133
  # Merge assay metadata into the existing clinical trial metadata
1222
- updated_metadata, errs = prism.merge_clinical_trial_metadata(
1223
- json_patch, trial.metadata_json
1224
- )
1134
+ updated_metadata, errs = prism.merge_clinical_trial_metadata(json_patch, trial.metadata_json)
1225
1135
  if errs:
1226
1136
  raise ValidationMultiError(errs)
1227
1137
  # Save updates to trial record
@@ -1236,9 +1146,7 @@ class TrialMetadata(CommonColumns):
1236
1146
 
1237
1147
  @staticmethod
1238
1148
  @with_default_session
1239
- def create(
1240
- trial_id: str, metadata_json: dict, session: Session, commit: bool = True
1241
- ):
1149
+ def create(trial_id: str, metadata_json: dict, session: Session, commit: bool = True):
1242
1150
  """
1243
1151
  Create a new clinical trial metadata record.
1244
1152
  """
@@ -1250,9 +1158,7 @@ class TrialMetadata(CommonColumns):
1250
1158
  return trial
1251
1159
 
1252
1160
  @staticmethod
1253
- def merge_gcs_artifact(
1254
- metadata: dict, upload_type: str, uuid: str, gcs_object: Blob
1255
- ):
1161
+ def merge_gcs_artifact(metadata: dict, upload_type: str, uuid: str, gcs_object: Blob):
1256
1162
  return prism.merge_artifact(
1257
1163
  ct=metadata,
1258
1164
  assay_type=upload_type, # assay_type is the old name for upload_type
@@ -1265,9 +1171,7 @@ class TrialMetadata(CommonColumns):
1265
1171
  )
1266
1172
 
1267
1173
  @staticmethod
1268
- def merge_gcs_artifacts(
1269
- metadata: dict, upload_type: str, uuids_and_gcs_objects: List[Tuple[str, Blob]]
1270
- ):
1174
+ def merge_gcs_artifacts(metadata: dict, upload_type: str, uuids_and_gcs_objects: List[Tuple[str, Blob]]):
1271
1175
  return prism.merge_artifacts(
1272
1176
  metadata,
1273
1177
  [
@@ -1356,32 +1260,16 @@ class TrialMetadata(CommonColumns):
1356
1260
  subqueries = []
1357
1261
 
1358
1262
  if include_file_bundles:
1359
- allowed_upload_types = []
1360
- if user and not user.is_admin() and not user.is_nci_user():
1361
- permissions = Permissions.find_for_user(user.id)
1362
- # An 'empty' upload_type means full trial-level access
1363
- allowed_upload_types = [
1364
- p.upload_type for p in permissions if p.upload_type
1365
- ]
1366
- logger.info(
1367
- f"Restricting file bundle for user {user.id} to {allowed_upload_types=}"
1368
- )
1263
+ file_bundle_query = DownloadableFiles.build_file_bundle_query()
1369
1264
 
1370
- file_bundle_query = DownloadableFiles.build_file_bundle_query(
1371
- allowed_upload_types
1372
- )
1373
1265
  columns.append(file_bundle_query.c.file_bundle)
1374
1266
  subqueries.append(file_bundle_query)
1375
1267
 
1376
1268
  if include_counts:
1377
1269
  trial_summaries: List[dict] = cls.get_summaries()
1378
1270
 
1379
- participant_counts: Dict[str, int] = {
1380
- t["trial_id"]: t["total_participants"] for t in trial_summaries
1381
- }
1382
- sample_counts: Dict[str, int] = {
1383
- t["trial_id"]: t["total_samples"] for t in trial_summaries
1384
- }
1271
+ participant_counts: Dict[str, int] = {t["trial_id"]: t["total_participants"] for t in trial_summaries}
1272
+ sample_counts: Dict[str, int] = {t["trial_id"]: t["total_samples"] for t in trial_summaries}
1385
1273
 
1386
1274
  # Combine all query components
1387
1275
  query = session.query(*columns)
@@ -1389,6 +1277,7 @@ class TrialMetadata(CommonColumns):
1389
1277
  # Each subquery will have a trial_id column and one record per trial id
1390
1278
  query = query.outerjoin(subquery, cls.trial_id == subquery.c.trial_id)
1391
1279
 
1280
+ query = query.order_by(cls.trial_id)
1392
1281
  query = cls._add_pagination_filters(query, **pagination_args)
1393
1282
 
1394
1283
  trials = []
@@ -1404,25 +1293,43 @@ class TrialMetadata(CommonColumns):
1404
1293
  setattr(trial, column, value)
1405
1294
 
1406
1295
  if include_counts:
1407
- setattr(
1408
- trial, "num_participants", participant_counts.get(trial.trial_id, 0)
1409
- )
1296
+ setattr(trial, "num_participants", participant_counts.get(trial.trial_id, 0))
1410
1297
  setattr(trial, "num_samples", sample_counts.get(trial.trial_id, 0))
1411
1298
 
1412
1299
  if include_file_bundles and hasattr(trial, "file_bundle"):
1413
- for assay in trial.file_bundle:
1300
+ # File bundle has all existing object ids. Remove ones that aren't allowed by permissions.
1301
+
1302
+ # Gather all object ids in the file bundle
1303
+ all_object_ids = set()
1304
+
1305
+ for assay, purposes in trial.file_bundle.items():
1306
+ for purpose, object_ids in purposes.items():
1307
+ all_object_ids = all_object_ids.union(object_ids)
1308
+ # Remove any impermissible object ids
1309
+ filtered_object_ids = DownloadableFiles.filter_object_ids_by_permissions(user, all_object_ids)
1310
+ logger.debug(f"Filtered object ids: {len(all_object_ids)} -> {len(filtered_object_ids)}")
1311
+
1312
+ for assay, purposes in trial.file_bundle.items():
1414
1313
  size_results = {}
1415
- for files_list_key in trial.file_bundle[assay]:
1416
- ids = trial.file_bundle[assay][files_list_key]
1417
- size_results[f"{files_list_key}_size"] = (
1418
- DownloadableFiles.get_total_bytes(
1419
- filter_=lambda q: q.filter(
1420
- DownloadableFiles.id.in_(ids)
1421
- )
1422
- )
1423
- )
1314
+ for purpose, object_ids in purposes.items():
1315
+ # Only allow object ids that are permitted
1316
+ permitted_object_ids = list(set(object_ids).intersection(filtered_object_ids))
1317
+ trial.file_bundle[assay][purpose] = permitted_object_ids
1318
+ if permitted_object_ids:
1319
+ # For any files left in the purpose, get their total size
1320
+ filter_ = lambda q: q.filter(DownloadableFiles.id.in_(permitted_object_ids))
1321
+ size_results[f"{purpose}_size"] = DownloadableFiles.get_total_bytes(filter_=filter_)
1424
1322
  trial.file_bundle[assay].update(size_results)
1425
1323
 
1324
+ # Trim the file bundle
1325
+ for assay, purposes in deepcopy(trial.file_bundle).items():
1326
+ for purpose, object_ids in purposes.items():
1327
+ if not object_ids:
1328
+ # No file ids left in the purpose after filtering. Remove this purpose from the bundle.
1329
+ del trial.file_bundle[assay][purpose]
1330
+ if not trial.file_bundle[assay]:
1331
+ del trial.file_bundle[assay]
1332
+
1426
1333
  trials.append(trial)
1427
1334
 
1428
1335
  return trials
@@ -2019,9 +1926,7 @@ class TrialMetadata(CommonColumns):
2019
1926
  summaries_query = "SELECT result FROM trial_summaries_mv"
2020
1927
  # Retrieve trial-level summary results from data cached in trial_summaries_mv materialized view.
2021
1928
  # The source of the SQL query used in trial_summaries_mv is get_summaries_query()
2022
- summaries = [
2023
- summary for (summary,) in session.execute(summaries_query) if summary
2024
- ]
1929
+ summaries = [summary for (summary,) in session.execute(summaries_query) if summary]
2025
1930
 
2026
1931
  # Shortcut to impute 0 values for assays where trials don't yet have data
2027
1932
  summaries = pd.DataFrame(summaries).fillna(0).to_dict("records")
@@ -2039,9 +1944,7 @@ class UploadJobStatus(EnumBaseClass):
2039
1944
  MERGE_FAILED = "merge-failed"
2040
1945
 
2041
1946
  @classmethod
2042
- def is_valid_transition(
2043
- cls, current: str, target: str, is_manifest: bool = False
2044
- ) -> bool:
1947
+ def is_valid_transition(cls, current: str, target: str, is_manifest: bool = False) -> bool:
2045
1948
  """
2046
1949
  Enforce logic about which state transitions are valid. E.g.,
2047
1950
  an upload whose status is "merge-completed" should never be updated
@@ -2086,9 +1989,7 @@ class UploadJobs(CommonColumns):
2086
1989
  )
2087
1990
 
2088
1991
  # The current status of the upload job
2089
- _status = Column(
2090
- "status", Enum(*UPLOAD_STATUSES, name="upload_job_status"), nullable=False
2091
- )
1992
+ _status = Column("status", Enum(*UPLOAD_STATUSES, name="upload_job_status"), nullable=False)
2092
1993
  # A long, random identifier for this upload job
2093
1994
  token = Column(UUID, server_default=text("gen_random_uuid()"), nullable=False)
2094
1995
  # Text containing feedback on why the upload status is what it is
@@ -2110,9 +2011,7 @@ class UploadJobs(CommonColumns):
2110
2011
  trial_id = Column(String, nullable=False, index=True)
2111
2012
 
2112
2013
  # Create a GIN index on the GCS object names
2113
- _gcs_objects_idx = Index(
2114
- "upload_jobs_gcs_gcs_file_map_idx", gcs_file_map, postgresql_using="gin"
2115
- )
2014
+ _gcs_objects_idx = Index("upload_jobs_gcs_gcs_file_map_idx", gcs_file_map, postgresql_using="gin")
2116
2015
 
2117
2016
  @hybrid_property
2118
2017
  def status(self):
@@ -2126,9 +2025,7 @@ class UploadJobs(CommonColumns):
2126
2025
  old_status = self.status or UploadJobStatus.STARTED.value
2127
2026
  is_manifest = self.upload_type in prism.SUPPORTED_MANIFESTS
2128
2027
  if not UploadJobStatus.is_valid_transition(old_status, status, is_manifest):
2129
- raise ValueError(
2130
- f"Upload job with status {self.status} can't transition to status {status}"
2131
- )
2028
+ raise ValueError(f"Upload job with status {self.status} can't transition to status {status}")
2132
2029
  self._status = status
2133
2030
 
2134
2031
  def _set_status_no_validation(self, status: str):
@@ -2168,9 +2065,7 @@ class UploadJobs(CommonColumns):
2168
2065
  assert prism.PROTOCOL_ID_FIELD_NAME in metadata, "metadata must have a trial ID"
2169
2066
 
2170
2067
  is_manifest_upload = upload_type in prism.SUPPORTED_MANIFESTS
2171
- assert (
2172
- gcs_file_map is not None or is_manifest_upload
2173
- ), "assay/analysis uploads must have a gcs_file_map"
2068
+ assert gcs_file_map is not None or is_manifest_upload, "assay/analysis uploads must have a gcs_file_map"
2174
2069
 
2175
2070
  trial_id = metadata[prism.PROTOCOL_ID_FIELD_NAME]
2176
2071
 
@@ -2221,9 +2116,7 @@ class UploadJobs(CommonColumns):
2221
2116
  job.metadata_patch,
2222
2117
  updated_artifact,
2223
2118
  _,
2224
- ) = prism.merge_artifact_extra_metadata(
2225
- job.metadata_patch, uuid, job.upload_type, file
2226
- )
2119
+ ) = prism.merge_artifact_extra_metadata(job.metadata_patch, uuid, job.upload_type, file)
2227
2120
  logger.info("Updated md for %s: %s", uuid, updated_artifact.keys())
2228
2121
 
2229
2122
  # A workaround fix for JSON field modifications not being tracked
@@ -2245,25 +2138,14 @@ class UploadJobs(CommonColumns):
2245
2138
  @classmethod
2246
2139
  @with_default_session
2247
2140
  def find_first_manifest_job(cls, trial_id: str, session):
2248
- return (
2249
- session.query(UploadJobs)
2250
- .filter_by(trial_id=trial_id, gcs_xlsx_uri="")
2251
- .order_by(text("id ASC"))
2252
- .first()
2253
- )
2141
+ return session.query(UploadJobs).filter_by(trial_id=trial_id, gcs_xlsx_uri="").order_by(text("id ASC")).first()
2254
2142
 
2255
2143
  @with_default_session
2256
- def ingestion_success(
2257
- self, trial, session: Session, commit: bool = False, send_email: bool = False
2258
- ):
2144
+ def ingestion_success(self, trial, session: Session, commit: bool = False, send_email: bool = False):
2259
2145
  """Set own status to reflect successful merge and trigger email notifying CIDC admins."""
2260
2146
  # Do status update if the transition is valid
2261
- if not UploadJobStatus.is_valid_transition(
2262
- self.status, UploadJobStatus.MERGE_COMPLETED.value
2263
- ):
2264
- raise Exception(
2265
- f"Cannot declare ingestion success given current status: {self.status}"
2266
- )
2147
+ if not UploadJobStatus.is_valid_transition(self.status, UploadJobStatus.MERGE_COMPLETED.value):
2148
+ raise Exception(f"Cannot declare ingestion success given current status: {self.status}")
2267
2149
  self.status = UploadJobStatus.MERGE_COMPLETED.value
2268
2150
 
2269
2151
  if commit:
@@ -2279,11 +2161,7 @@ class FilesToFileGroups(BaseModel):
2279
2161
  """
2280
2162
 
2281
2163
  __tablename__ = "files_to_file_groups"
2282
- __table_args__ = (
2283
- PrimaryKeyConstraint(
2284
- "file_group_id", "file_id", name="pk_files_to_file_groups"
2285
- ),
2286
- )
2164
+ __table_args__ = (PrimaryKeyConstraint("file_group_id", "file_id", name="pk_files_to_file_groups"),)
2287
2165
  file_group_id = Column(ForeignKey("file_groups.id"), primary_key=True)
2288
2166
  file_id = Column(ForeignKey("downloadable_files.id"), primary_key=True)
2289
2167
  _created = Column(DateTime, default=func.now(), nullable=False)
@@ -2506,9 +2384,7 @@ class DownloadableFiles(CommonColumns):
2506
2384
  return downloadable_files_for_query, files_to_file_groups_for_query
2507
2385
 
2508
2386
  @classmethod
2509
- def _convert_list_results(
2510
- cls, downloadable_files_for_query: Table, query_files: List
2511
- ):
2387
+ def _convert_list_results(cls, downloadable_files_for_query: Table, query_files: List):
2512
2388
  """Converts the results of a SQLalchemy expression language query into actual DownloadableFiles
2513
2389
  objects. This is necessary since the UI depends on some of the derived properties in
2514
2390
  DownloadableFiles.
@@ -2546,9 +2422,7 @@ class DownloadableFiles(CommonColumns):
2546
2422
  where_clauses.append(downloadable_files_for_query.c.trial_id.in_(trial_ids))
2547
2423
  if facets:
2548
2424
  facet_groups = get_facet_groups_for_paths(facets)
2549
- where_clauses.append(
2550
- downloadable_files_for_query.c.facet_group.in_(facet_groups)
2551
- )
2425
+ where_clauses.append(downloadable_files_for_query.c.facet_group.in_(facet_groups))
2552
2426
 
2553
2427
  if user and not is_admin:
2554
2428
  permissions = Permissions.find_for_user(user.id)
@@ -2564,27 +2438,15 @@ class DownloadableFiles(CommonColumns):
2564
2438
  elif permission.file_group_id is None:
2565
2439
  where_clauses.append(
2566
2440
  sql_and(
2567
- (
2568
- downloadable_files_for_query.c.trial_id
2569
- == permission.trial_id
2570
- ),
2571
- (
2572
- downloadable_files_for_query.c.upload_type
2573
- == permission.upload_type
2574
- ),
2441
+ (downloadable_files_for_query.c.trial_id == permission.trial_id),
2442
+ (downloadable_files_for_query.c.upload_type == permission.upload_type),
2575
2443
  )
2576
2444
  )
2577
2445
  else:
2578
2446
  where_clauses.append(
2579
2447
  sql_and(
2580
- (
2581
- downloadable_files_for_query.c.trial_id
2582
- == permission.trial_id
2583
- ),
2584
- (
2585
- files_to_file_groups_for_query.c.file_group_id
2586
- == permission.file_group_id
2587
- ),
2448
+ (downloadable_files_for_query.c.trial_id == permission.trial_id),
2449
+ (files_to_file_groups_for_query.c.file_group_id == permission.file_group_id),
2588
2450
  )
2589
2451
  )
2590
2452
 
@@ -2598,9 +2460,7 @@ class DownloadableFiles(CommonColumns):
2598
2460
  )
2599
2461
  )
2600
2462
  if full_type_perms:
2601
- where_clauses.append(
2602
- downloadable_files_for_query.c.upload_type.in_(full_type_perms)
2603
- )
2463
+ where_clauses.append(downloadable_files_for_query.c.upload_type.in_(full_type_perms))
2604
2464
 
2605
2465
  # Need to be careful about return logic. Empty results could be because the user
2606
2466
  # is an admin, whereas None means the user has no permissions to view any files.
@@ -2609,6 +2469,55 @@ class DownloadableFiles(CommonColumns):
2609
2469
 
2610
2470
  return None
2611
2471
 
2472
+ @classmethod
2473
+ def _generate_where_clause_with_permissions(cls, user: Users) -> BooleanClauseList:
2474
+ """
2475
+ Returns a where clause for DownloadableFiles filtered down to only the files the user
2476
+ has access to based on their permissions and role.
2477
+
2478
+ The generated clause will have this form
2479
+ WHERE
2480
+ downloadable_files.trial_id IN ('x', 'y', ...) AND upload_type != 'clinical_data' <- trial-level permissions
2481
+ OR
2482
+ upload_type IN ('u', 'v', ...)) <- upload-type-level permissions
2483
+ OR
2484
+ trial_id = '5' AND upload_type = 'mif' <- regular permissions
2485
+ OR
2486
+ trial_id = '6' and upload_type = 'hande'
2487
+ ...
2488
+ """
2489
+ # From the perspective of viewing files, NCI Biobank users are admins.
2490
+ if user.is_admin_or_nci_user():
2491
+ return true() # Admin has full permissions to all
2492
+
2493
+ permissions = Permissions.find_for_user(user.id)
2494
+
2495
+ full_access_trial_ids = [p.trial_id for p in permissions if not p.upload_type]
2496
+ full_access_upload_types = [p.upload_type for p in permissions if not p.trial_id]
2497
+ regular_permissions = [p for p in permissions if p.trial_id and p.upload_type]
2498
+
2499
+ full_access_trial_clause = sql_and(
2500
+ DownloadableFiles.trial_id.in_(full_access_trial_ids),
2501
+ DownloadableFiles.upload_type != "clinical_data",
2502
+ )
2503
+
2504
+ full_access_upload_type_clause = sql_or(DownloadableFiles.upload_type.in_(full_access_upload_types))
2505
+
2506
+ regular_permission_clauses = [
2507
+ sql_and(
2508
+ DownloadableFiles.trial_id == p.trial_id,
2509
+ DownloadableFiles.upload_type == p.upload_type,
2510
+ )
2511
+ for p in regular_permissions
2512
+ ]
2513
+ clause = sql_or(
2514
+ full_access_trial_clause,
2515
+ full_access_upload_type_clause,
2516
+ *regular_permission_clauses,
2517
+ )
2518
+
2519
+ return clause
2520
+
2612
2521
  @classmethod
2613
2522
  @with_default_session
2614
2523
  def list_with_permissions(
@@ -2648,26 +2557,18 @@ class DownloadableFiles(CommonColumns):
2648
2557
  if where_clauses:
2649
2558
 
2650
2559
  # No where clause (the user is likely an admin).
2651
- statement = select([downloadable_files_for_query]).select_from(
2652
- downloadable_files_for_query
2653
- )
2560
+ statement = select([downloadable_files_for_query]).select_from(downloadable_files_for_query)
2654
2561
 
2655
2562
  else:
2656
2563
  statement = (
2657
2564
  select([downloadable_files_for_query])
2658
2565
  .where(sql_and(*where_clauses))
2659
- .select_from(
2660
- downloadable_files_for_query.outerjoin(
2661
- files_to_file_groups_for_query
2662
- )
2663
- )
2566
+ .select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
2664
2567
  )
2665
2568
 
2666
2569
  if sort_field:
2667
2570
  sort_attribute = getattr(cls, sort_field)
2668
- field_with_dir = (
2669
- asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
2670
- )
2571
+ field_with_dir = asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
2671
2572
  statement = statement.order_by(field_with_dir)
2672
2573
 
2673
2574
  # Enforce positive page numbers
@@ -2724,28 +2625,22 @@ class DownloadableFiles(CommonColumns):
2724
2625
  if where_clauses:
2725
2626
 
2726
2627
  # No where clause (the user is likely an admin).
2727
- statement = select(
2728
- [func.count(downloadable_files_for_query.c.id)]
2729
- ).select_from(downloadable_files_for_query)
2628
+ statement = select([func.count(downloadable_files_for_query.c.id)]).select_from(
2629
+ downloadable_files_for_query
2630
+ )
2730
2631
 
2731
2632
  else:
2732
2633
  statement = (
2733
2634
  select([func.count(downloadable_files_for_query.c.id)])
2734
2635
  .where(sql_and(*where_clauses))
2735
- .select_from(
2736
- downloadable_files_for_query.outerjoin(
2737
- files_to_file_groups_for_query
2738
- )
2739
- )
2636
+ .select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
2740
2637
  )
2741
2638
 
2742
2639
  return session.execute(statement).fetchone()[0]
2743
2640
 
2744
2641
  @classmethod
2745
2642
  @with_default_session
2746
- def count_by_facet_with_permissions(
2747
- cls, session: Session, trial_ids: List[str] = None, user: Users = None
2748
- ):
2643
+ def count_by_facet_with_permissions(cls, session: Session, trial_ids: List[str] = None, user: Users = None):
2749
2644
  """
2750
2645
  Returns a map of facet_group to a count of the number of files that the given user
2751
2646
  has permissions to view.
@@ -2789,11 +2684,7 @@ class DownloadableFiles(CommonColumns):
2789
2684
  ]
2790
2685
  )
2791
2686
  .where(sql_and(*where_clauses))
2792
- .select_from(
2793
- downloadable_files_for_query.outerjoin(
2794
- files_to_file_groups_for_query
2795
- )
2796
- )
2687
+ .select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
2797
2688
  )
2798
2689
 
2799
2690
  statement = statement.group_by(downloadable_files_for_query.c.facet_group)
@@ -2837,35 +2728,35 @@ class DownloadableFiles(CommonColumns):
2837
2728
  else:
2838
2729
  statement = (
2839
2730
  select([downloadable_files_for_query.c.object_url])
2840
- .where(
2841
- sql_and(*where_clauses, downloadable_files_for_query.c.id.in_(ids))
2842
- )
2843
- .select_from(
2844
- downloadable_files_for_query.outerjoin(
2845
- files_to_file_groups_for_query
2846
- )
2847
- )
2731
+ .where(sql_and(*where_clauses, downloadable_files_for_query.c.id.in_(ids)))
2732
+ .select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
2848
2733
  )
2849
2734
 
2850
2735
  return [row[0] for row in session.execute(statement).fetchall()]
2851
2736
 
2852
2737
  @classmethod
2853
- def _generate_trial_file_counts(
2854
- cls, downloadable_files: Iterable
2855
- ) -> Dict[str, int]:
2738
+ @with_default_session
2739
+ def filter_object_ids_by_permissions(cls, user: Users, ids: Iterable[int], session: Session) -> Iterable[int]:
2740
+ """
2741
+ Takes a list of object ids and filters it to return only those object ids the user has permission for.
2742
+ """
2743
+
2744
+ where_clause = DownloadableFiles._generate_where_clause_with_permissions(user)
2745
+ statement = select([DownloadableFiles.id]).where(sql_and(DownloadableFiles.id.in_(ids), where_clause))
2746
+
2747
+ return [row[0] for row in session.execute(statement).fetchall()]
2748
+
2749
+ @classmethod
2750
+ def _generate_trial_file_counts(cls, downloadable_files: Iterable) -> Dict[str, int]:
2856
2751
  results = defaultdict(lambda: 0)
2857
2752
  for downloadable_file in downloadable_files:
2858
2753
  if downloadable_file.data_category:
2859
- results[downloadable_file.trial_id] = (
2860
- results[downloadable_file.trial_id] + 1
2861
- )
2754
+ results[downloadable_file.trial_id] = results[downloadable_file.trial_id] + 1
2862
2755
  return results
2863
2756
 
2864
2757
  @classmethod
2865
2758
  @with_default_session
2866
- def remove_participants_and_samples_info_files(
2867
- cls, trial_id: str, session: Session
2868
- ):
2759
+ def remove_participants_and_samples_info_files(cls, trial_id: str, session: Session):
2869
2760
  """
2870
2761
  Remove participants info and samples info downloadable files
2871
2762
  """
@@ -2913,27 +2804,19 @@ class DownloadableFiles(CommonColumns):
2913
2804
  if not where_clauses:
2914
2805
 
2915
2806
  # No where clause (the user is likely an admin).
2916
- statement = select([downloadable_files_for_query]).select_from(
2917
- downloadable_files_for_query
2918
- )
2807
+ statement = select([downloadable_files_for_query]).select_from(downloadable_files_for_query)
2919
2808
 
2920
2809
  else:
2921
2810
  statement = (
2922
2811
  select([downloadable_files_for_query])
2923
2812
  .where(sql_and(*where_clauses))
2924
- .select_from(
2925
- downloadable_files_for_query.outerjoin(
2926
- files_to_file_groups_for_query
2927
- )
2928
- )
2813
+ .select_from(downloadable_files_for_query.outerjoin(files_to_file_groups_for_query))
2929
2814
  )
2930
2815
 
2931
2816
  downloadable_files = DownloadableFiles._convert_list_results(
2932
2817
  downloadable_files_for_query, session.execute(statement).fetchall()
2933
2818
  )
2934
- trial_file_counts = DownloadableFiles._generate_trial_file_counts(
2935
- downloadable_files
2936
- )
2819
+ trial_file_counts = DownloadableFiles._generate_trial_file_counts(downloadable_files)
2937
2820
  return build_trial_facets(trial_file_counts)
2938
2821
 
2939
2822
  @with_default_session
@@ -2958,13 +2841,9 @@ class DownloadableFiles(CommonColumns):
2958
2841
  "trial_id": self.trial_id,
2959
2842
  "id": self.id,
2960
2843
  }
2961
- related_files = result_proxy_to_models(
2962
- session.execute(query, params), DownloadableFiles
2963
- )
2844
+ related_files = result_proxy_to_models(session.execute(query, params), DownloadableFiles)
2964
2845
  else:
2965
- not_sample_specific = not_(
2966
- literal_column("additional_metadata::text").like('%.cimac_id":%')
2967
- )
2846
+ not_sample_specific = not_(literal_column("additional_metadata::text").like('%.cimac_id":%'))
2968
2847
  related_files = (
2969
2848
  session.query(DownloadableFiles)
2970
2849
  .filter(
@@ -3020,9 +2899,7 @@ class DownloadableFiles(CommonColumns):
3020
2899
  full_type_perms.append(perm.upload_type)
3021
2900
  else:
3022
2901
  trial_type_perms.append((perm.trial_id, perm.upload_type))
3023
- df_tuples = tuple_(
3024
- DownloadableFiles.trial_id, DownloadableFiles.upload_type
3025
- )
2902
+ df_tuples = tuple_(DownloadableFiles.trial_id, DownloadableFiles.upload_type)
3026
2903
  file_filters.append(
3027
2904
  or_(
3028
2905
  # don't include clinical_data in cross-trial permission
@@ -3071,16 +2948,9 @@ class DownloadableFiles(CommonColumns):
3071
2948
  etag = make_etag(filtered_metadata.values())
3072
2949
 
3073
2950
  object_url = filtered_metadata["object_url"]
3074
- df = (
3075
- session.query(DownloadableFiles)
3076
- .filter_by(object_url=object_url)
3077
- .with_for_update()
3078
- .first()
3079
- )
2951
+ df = session.query(DownloadableFiles).filter_by(object_url=object_url).with_for_update().first()
3080
2952
  if df:
3081
- df = session.merge(
3082
- DownloadableFiles(id=df.id, _etag=etag, **filtered_metadata)
3083
- )
2953
+ df = session.merge(DownloadableFiles(id=df.id, _etag=etag, **filtered_metadata))
3084
2954
  else:
3085
2955
  df = DownloadableFiles(_etag=etag, **filtered_metadata)
3086
2956
 
@@ -3109,12 +2979,7 @@ class DownloadableFiles(CommonColumns):
3109
2979
  """
3110
2980
 
3111
2981
  # trying to find existing one
3112
- df = (
3113
- session.query(DownloadableFiles)
3114
- .filter_by(object_url=blob.name)
3115
- .with_for_update()
3116
- .first()
3117
- )
2982
+ df = session.query(DownloadableFiles).filter_by(object_url=blob.name).with_for_update().first()
3118
2983
  if not df:
3119
2984
  df = DownloadableFiles()
3120
2985
 
@@ -3147,18 +3012,14 @@ class DownloadableFiles(CommonColumns):
3147
3012
 
3148
3013
  @classmethod
3149
3014
  @with_default_session
3150
- def list_object_urls(
3151
- cls, ids: List[int], session: Session, filter_: Callable[[Query], Query]
3152
- ) -> List[str]:
3015
+ def list_object_urls(cls, ids: List[int], session: Session, filter_: Callable[[Query], Query]) -> List[str]:
3153
3016
  """Get all object_urls for a batch of downloadable file record IDs"""
3154
3017
  query = session.query(cls.object_url).filter(cls.id.in_(ids))
3155
3018
  query = filter_(query)
3156
3019
  return [r[0] for r in query.all()]
3157
3020
 
3158
3021
  @classmethod
3159
- def build_file_bundle_query(
3160
- cls, allowed_upload_types: Optional[List[str]]
3161
- ) -> Query:
3022
+ def build_file_bundle_query(cls) -> Query:
3162
3023
  """
3163
3024
  Build a query that selects nested file bundles from the downloadable files table.
3164
3025
  The `file_bundles` query below should produce one bundle per unique `trial_id` that
@@ -3173,8 +3034,6 @@ class DownloadableFiles(CommonColumns):
3173
3034
  }
3174
3035
  ```
3175
3036
  where "type" is something like `"Olink"` or `"Participants Info"` and "purpose" is a `FilePurpose` string.
3176
-
3177
- If `allowed_upload_types` is provided, the query will filter by files that only have an `upload_type` that appear in the list.
3178
3037
  """
3179
3038
  tid_col, type_col, purp_col, ids_col, purps_col = (
3180
3039
  literal_column("trial_id"),
@@ -3184,28 +3043,24 @@ class DownloadableFiles(CommonColumns):
3184
3043
  literal_column("purposes"),
3185
3044
  )
3186
3045
 
3187
- id_bundles = select(
3188
- [
3189
- cls.trial_id,
3190
- cls.data_category_prefix.label(type_col.key),
3191
- cls.file_purpose.label(purp_col.key),
3192
- func.json_agg(cls.id).label(ids_col.key),
3193
- ]
3194
- ).group_by(cls.trial_id, cls.data_category_prefix, cls.file_purpose)
3195
-
3196
- # Restrict files from appearing in the file bundle if the user doesn't have permissions for them
3197
- if allowed_upload_types:
3198
- id_bundles = id_bundles.filter(cls.upload_type.in_(allowed_upload_types))
3199
- id_bundles = id_bundles.alias("id_bundles")
3200
-
3046
+ id_bundles = (
3047
+ select(
3048
+ [
3049
+ cls.trial_id,
3050
+ cls.data_category_prefix.label(type_col.key),
3051
+ cls.file_purpose.label(purp_col.key),
3052
+ func.json_agg(cls.id).label(ids_col.key),
3053
+ ]
3054
+ )
3055
+ .group_by(cls.trial_id, cls.data_category_prefix, cls.file_purpose)
3056
+ .alias("id_bundles")
3057
+ )
3201
3058
  purpose_bundles = (
3202
3059
  select(
3203
3060
  [
3204
3061
  tid_col,
3205
3062
  type_col,
3206
- func.json_object_agg(
3207
- func.coalesce(purp_col, "miscellaneous"), ids_col
3208
- ).label(purps_col.key),
3063
+ func.json_object_agg(func.coalesce(purp_col, "miscellaneous"), ids_col).label(purps_col.key),
3209
3064
  ]
3210
3065
  )
3211
3066
  .select_from(id_bundles)
@@ -3216,9 +3071,7 @@ class DownloadableFiles(CommonColumns):
3216
3071
  select(
3217
3072
  [
3218
3073
  tid_col.label(tid_col.key),
3219
- func.json_object_agg(
3220
- func.coalesce(type_col, "other"), purps_col
3221
- ).label("file_bundle"),
3074
+ func.json_object_agg(func.coalesce(type_col, "other"), purps_col).label("file_bundle"),
3222
3075
  ]
3223
3076
  )
3224
3077
  .select_from(purpose_bundles)
@@ -3229,9 +3082,7 @@ class DownloadableFiles(CommonColumns):
3229
3082
 
3230
3083
  @classmethod
3231
3084
  @with_default_session
3232
- def get_total_bytes(
3233
- cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q
3234
- ) -> int:
3085
+ def get_total_bytes(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q) -> int:
3235
3086
  """Get the total number of bytes of data stored across all files."""
3236
3087
  filtered_query = filter_(session.query(func.sum(cls.file_size_bytes)))
3237
3088
  total_bytes = filtered_query.one()[0]
@@ -3240,9 +3091,7 @@ class DownloadableFiles(CommonColumns):
3240
3091
 
3241
3092
  @classmethod
3242
3093
  @with_default_session
3243
- def get_trial_facets(
3244
- cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q
3245
- ):
3094
+ def get_trial_facets(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q):
3246
3095
  trial_file_counts = cls.count_by(
3247
3096
  cls.trial_id,
3248
3097
  session=session,
@@ -3256,12 +3105,8 @@ class DownloadableFiles(CommonColumns):
3256
3105
  # TODO fix this
3257
3106
  @classmethod
3258
3107
  @with_default_session
3259
- def get_data_category_facets(
3260
- cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q
3261
- ):
3262
- facet_group_file_counts = cls.count_by(
3263
- cls.facet_group, session=session, filter_=filter_
3264
- )
3108
+ def get_data_category_facets(cls, session: Session, filter_: Callable[[Query], Query] = lambda q: q):
3109
+ facet_group_file_counts = cls.count_by(cls.facet_group, session=session, filter_=filter_)
3265
3110
  data_category_facets = build_data_category_facets(facet_group_file_counts)
3266
3111
  return data_category_facets
3267
3112
 
@@ -3277,10 +3122,7 @@ class DownloadableFiles(CommonColumns):
3277
3122
  # Query clause for computing a downloadable file's data category.
3278
3123
  # Used above in the DownloadableFiles.data_category computed property.
3279
3124
  DATA_CATEGORY_CASE_CLAUSE = case(
3280
- [
3281
- (DownloadableFiles.facet_group == k, v)
3282
- for k, v in facet_groups_to_categories.items()
3283
- ]
3125
+ [(DownloadableFiles.facet_group == k, v) for k, v in facet_groups_to_categories.items()]
3284
3126
  )
3285
3127
 
3286
3128
  # Query clause for computing a downloadable file's file purpose.
@@ -3293,9 +3135,7 @@ FILE_PURPOSE_CASE_CLAUSE = case(
3293
3135
  )
3294
3136
 
3295
3137
 
3296
- def result_proxy_to_models(
3297
- result_proxy: ResultProxy, model: BaseModel
3298
- ) -> List[BaseModel]:
3138
+ def result_proxy_to_models(result_proxy: ResultProxy, model: BaseModel) -> List[BaseModel]:
3299
3139
  """Materialize a sqlalchemy `result_proxy` iterable as a list of `model` instances"""
3300
3140
  return [model(**dict(row_proxy)) for row_proxy in result_proxy.all()]
3301
3141