nci-cidc-api-modules 1.0.0rc0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cidc_api/models/models.py CHANGED
@@ -4,6 +4,8 @@ __all__ = [
4
4
  "Column",
5
5
  "CommonColumns",
6
6
  "DownloadableFiles",
7
+ "FileGroups",
8
+ "FilesToFileGroups",
7
9
  "EXTRA_DATA_TYPES",
8
10
  "IntegrityError",
9
11
  "IAMException",
@@ -26,8 +28,6 @@ from collections import defaultdict
26
28
  import re
27
29
  import hashlib
28
30
  import os
29
-
30
- os.environ["TZ"] = "UTC"
31
31
  from datetime import datetime, timedelta
32
32
  from enum import Enum as EnumBaseClass
33
33
  from functools import wraps
@@ -42,6 +42,7 @@ from typing import (
42
42
  Union,
43
43
  Callable,
44
44
  Tuple,
45
+ Iterable,
45
46
  )
46
47
 
47
48
  import pandas as pd
@@ -59,8 +60,9 @@ from sqlalchemy import (
59
60
  Index,
60
61
  func,
61
62
  CheckConstraint,
63
+ ForeignKey,
62
64
  ForeignKeyConstraint,
63
- UniqueConstraint,
65
+ PrimaryKeyConstraint,
64
66
  tuple_,
65
67
  asc,
66
68
  desc,
@@ -71,16 +73,26 @@ from sqlalchemy import (
71
73
  not_,
72
74
  literal,
73
75
  or_,
76
+ Table,
77
+ MetaData,
74
78
  )
75
- from sqlalchemy.exc import IntegrityError
76
- from sqlalchemy.ext.hybrid import hybrid_property
77
- from sqlalchemy.orm import validates
79
+ from sqlalchemy.orm import relationship, validates
78
80
  from sqlalchemy.orm.attributes import flag_modified
79
81
  from sqlalchemy.orm.exc import NoResultFound
80
82
  from sqlalchemy.orm.session import Session
81
83
  from sqlalchemy.orm.query import Query
82
- from sqlalchemy.sql import text
84
+ from sqlalchemy.sql import (
85
+ # This is unfortunate but other code in this file relies on sqlalchemy.and_, or_, etc
86
+ # instead of the sqlalchemy.sql versions we are importing here. The solution is to
87
+ # break up this giant file.
88
+ and_ as sql_and,
89
+ # or_ as sql_or, # NOT USED
90
+ # select, # ALREADY IMPORTED
91
+ text,
92
+ )
83
93
  from sqlalchemy.sql.functions import coalesce
94
+ from sqlalchemy.exc import IntegrityError
95
+ from sqlalchemy.ext.hybrid import hybrid_property
84
96
  from sqlalchemy.dialects.postgresql import JSONB, UUID
85
97
  from sqlalchemy.engine import ResultProxy
86
98
 
@@ -105,6 +117,7 @@ from ..config.settings import (
105
117
  )
106
118
  from ..shared import emails
107
119
  from ..shared.gcloud_client import (
120
+ grant_download_access_to_blob_names,
108
121
  grant_lister_access,
109
122
  grant_download_access,
110
123
  publish_artifact_upload,
@@ -116,9 +129,15 @@ from ..shared.gcloud_client import (
116
129
  )
117
130
  from ..config.logging import get_logger
118
131
 
132
+
133
+ os.environ["TZ"] = "UTC"
119
134
  logger = get_logger(__name__)
120
135
 
121
136
 
137
+ class FileGroups:
138
+ pass
139
+
140
+
122
141
  def with_default_session(f):
123
142
  """
124
143
  For some `f` expecting a database session instance as a keyword argument,
@@ -160,7 +179,7 @@ class CommonColumns(BaseModel): # type: ignore
160
179
  [s for c in cls.__bases__ for s in _all_bases(c)]
161
180
  )
162
181
 
163
- columns_to_check = [c for c in type(self).__table__.columns]
182
+ columns_to_check = list(type(self).__table__.columns)
164
183
  for b in _all_bases(type(self)):
165
184
  if hasattr(b, "__table__"):
166
185
  columns_to_check.extend(b.__table__.columns)
@@ -312,7 +331,6 @@ class CommonColumns(BaseModel): # type: ignore
312
331
 
313
332
  def validate(self):
314
333
  """Run custom validations on attributes set on this instance."""
315
- pass
316
334
 
317
335
  @classmethod
318
336
  def get_unique_columns(cls):
@@ -408,7 +426,7 @@ class Users(CommonColumns):
408
426
 
409
427
  user = Users.find_by_email(email)
410
428
  if not user:
411
- logger.info(f"Creating new user with email {email}")
429
+ logger.info("Creating new user with email %s", email)
412
430
  user = Users(
413
431
  email=email, contact_email=email, first_n=first_n, last_n=last_n
414
432
  )
@@ -431,7 +449,7 @@ class Users(CommonColumns):
431
449
  .values(disabled=True)
432
450
  .returning(Users.id)
433
451
  )
434
- disabled_user_ids: List[int] = [uid for uid in session.execute(update_query)]
452
+ disabled_user_ids: List[int] = list(session.execute(update_query))
435
453
  if commit:
436
454
  session.commit()
437
455
 
@@ -503,7 +521,11 @@ class IAMException(Exception):
503
521
  pass
504
522
 
505
523
 
506
- EXTRA_DATA_TYPES = ["participants info", "samples info"]
524
+ EXTRA_DATA_TYPES = [
525
+ "participants info",
526
+ "samples info",
527
+ "file_group",
528
+ ]
507
529
  ALL_UPLOAD_TYPES = set(
508
530
  [
509
531
  *prism.SUPPORTED_MANIFESTS,
@@ -535,10 +557,14 @@ class Permissions(CommonColumns):
535
557
  name="ix_permissions_trial_id",
536
558
  ondelete="CASCADE",
537
559
  ),
538
- UniqueConstraint(
539
- "granted_to_user", "trial_id", "upload_type", name="unique_perms"
560
+ ForeignKeyConstraint(
561
+ ["file_group_id"],
562
+ ["file_groups.id"],
563
+ name="ix_permissions_file_group_id",
564
+ ondelete="CASCADE",
540
565
  ),
541
566
  CheckConstraint("trial_id is not null or upload_type is not null"),
567
+ CheckConstraint("upload_type <> 'file_group' OR file_group_id is NOT NULL"),
542
568
  )
543
569
  __mapper_args__ = {"confirm_deleted_rows": False}
544
570
 
@@ -548,6 +574,26 @@ class Permissions(CommonColumns):
548
574
  granted_to_user = Column(Integer, nullable=False, index=True)
549
575
  trial_id = Column(String, index=True)
550
576
  upload_type = Column(String)
577
+ file_group_id = Column(Integer, index=True)
578
+ file_group = relationship("FileGroups", back_populates="permissions")
579
+
580
+ __table_args__ = __table_args__ + (
581
+ Index(
582
+ "uix_upload_type_perms",
583
+ "granted_to_user",
584
+ "trial_id",
585
+ "upload_type",
586
+ unique=True,
587
+ postgresql_where=file_group_id.is_(None),
588
+ ),
589
+ Index(
590
+ "uix_file_group_perms",
591
+ "granted_to_user",
592
+ "file_group_id",
593
+ unique=True,
594
+ postgresql_where=file_group_id.isnot(None),
595
+ ),
596
+ )
551
597
 
552
598
  # Shorthand to make code related to trial- and upload-type-level permissions
553
599
  # easier to interpret.
@@ -577,8 +623,19 @@ class Permissions(CommonColumns):
577
623
 
578
624
  NOTE: values provided to the `commit` argument will be ignored. This method always commits.
579
625
  """
580
- if self.upload_type == self.EVERY and self.trial_id == self.EVERY:
581
- raise ValueError("A permission must have a trial id or upload type.")
626
+ if (
627
+ self.upload_type == self.EVERY
628
+ and self.trial_id == self.EVERY
629
+ and not self.file_group_id
630
+ ):
631
+ raise ValueError(
632
+ "A permission must have a trial id, upload type, or file group."
633
+ )
634
+
635
+ if self.file_group_id and self.upload_type != "file_group":
636
+ raise ValueError(
637
+ "If a permission has a file group, its upload_type must be set to file_group"
638
+ )
582
639
 
583
640
  grantee = Users.find_by_id(self.granted_to_user, session=session)
584
641
  if grantee is None:
@@ -595,7 +652,7 @@ class Permissions(CommonColumns):
595
652
  raise IntegrityError(
596
653
  params=None,
597
654
  statement=None,
598
- orig=f"`granted_by_user` user must be given",
655
+ orig="`granted_by_user` user must be given",
599
656
  )
600
657
  if grantor is None:
601
658
  raise IntegrityError(
@@ -603,36 +660,52 @@ class Permissions(CommonColumns):
603
660
  statement=None,
604
661
  orig=f"`granted_by_user` user must exist, but no user found with id {self.granted_by_user}",
605
662
  )
663
+ file_group: FileGroups = None
664
+ if self.upload_type == "file_group":
665
+ file_group = FileGroups.find_by_id(self.file_group_id, session=session)
666
+ if not file_group:
667
+ raise IntegrityError(
668
+ params=None,
669
+ statement=None,
670
+ orig=f"`file_group` must exist, but none found with id {self.file_group_id}",
671
+ )
606
672
 
607
- logger.info(
608
- f"admin-action: {grantor.email} gave {grantee.email} the permission {self.upload_type or 'all assays'} on {self.trial_id or 'all trials'}"
609
- )
673
+ info_message = f"admin-action: {grantor.email} gave {grantee.email} the permission {self.upload_type or 'all assays'}{'(' + file_group.name + ')' if file_group else ''} on {self.trial_id or 'all trials'}"
674
+ logger.info(info_message)
610
675
 
611
- # If this is a permission granting the user access to all trials for
612
- # a given upload type or all upload types for a given trial, delete
613
- # any related trial-upload type specific permissions to avoid
614
- # redundancy in the database and in conditional IAM bindings.
615
- perms_to_delete = (
616
- session.query(Permissions)
617
- .filter(
618
- Permissions.granted_to_user == self.granted_to_user,
619
- # If inserting a cross-trial perm, then select relevant
620
- # trial-specific perms for deletion.
621
- Permissions.trial_id != self.EVERY
622
- if self.trial_id == self.EVERY
623
- else Permissions.trial_id == self.trial_id,
624
- # If inserting a cross-upload type perm, then select relevant
625
- # upload type-specific perms for deletion. This does NOT
626
- # include clinical_data, just manifests/assays/analysis.
627
- and_(
628
- Permissions.upload_type != self.EVERY,
629
- Permissions.upload_type != "clinical_data",
676
+ if self.upload_type == "file_group":
677
+ # Do not delete past-assigned file_group permissions for this user.
678
+ perms_to_delete = []
679
+ else:
680
+ # If this is a permission granting the user access to all trials for
681
+ # a given upload type or all upload types for a given trial, delete
682
+ # any related trial-upload type specific permissions to avoid
683
+ # redundancy in the database and in conditional IAM bindings.
684
+ perms_to_delete = (
685
+ session.query(Permissions)
686
+ .filter(
687
+ Permissions.granted_to_user == self.granted_to_user,
688
+ # If inserting a cross-trial perm, then select relevant
689
+ # trial-specific perms for deletion.
690
+ (
691
+ Permissions.trial_id != self.EVERY
692
+ if self.trial_id == self.EVERY
693
+ else Permissions.trial_id == self.trial_id
694
+ ),
695
+ # If inserting a cross-upload type perm, then select relevant
696
+ # upload type-specific perms for deletion. This does NOT
697
+ # include clinical_data, just manifests/assays/analysis.
698
+ (
699
+ and_(
700
+ Permissions.upload_type != self.EVERY,
701
+ Permissions.upload_type != "clinical_data",
702
+ )
703
+ if self.upload_type == self.EVERY
704
+ else Permissions.upload_type == self.upload_type
705
+ ),
630
706
  )
631
- if self.upload_type == self.EVERY
632
- else Permissions.upload_type == self.upload_type,
707
+ .all()
633
708
  )
634
- .all()
635
- )
636
709
 
637
710
  # Add any related permission deletions to the insertion transaction.
638
711
  # If a delete operation fails, all other deletes and the insertion will
@@ -649,16 +722,26 @@ class Permissions(CommonColumns):
649
722
  or grantee.disabled
650
723
  or grantee.approval_date is None
651
724
  ):
725
+ # TODO: pact users do not have download permissions currently
652
726
  return
653
727
 
654
728
  try:
655
729
  # Grant ACL download permissions in GCS
656
- # if they have any download permissions, they need the CIDC Lister role
657
- grant_lister_access(grantee.email)
658
- grant_download_access(grantee.email, self.trial_id, self.upload_type)
659
- # Remove permissions staged for deletion, if any
660
- for perm in perms_to_delete:
661
- revoke_download_access(grantee.email, perm.trial_id, perm.upload_type)
730
+
731
+ if self.upload_type == "file_group":
732
+ Permissions.grant_download_access_to_file_group(
733
+ grantee.email, file_group
734
+ )
735
+ else:
736
+
737
+ # if they have any download permissions, they need the CIDC Lister role
738
+ grant_lister_access(grantee.email)
739
+ grant_download_access(grantee.email, self.trial_id, self.upload_type)
740
+ # Remove permissions staged for deletion, if any
741
+ for perm in perms_to_delete:
742
+ revoke_download_access(
743
+ grantee.email, perm.trial_id, perm.upload_type
744
+ )
662
745
  except Exception as e:
663
746
  # Add back deleted permissions, if any
664
747
  for perm in perms_to_delete:
@@ -697,7 +780,9 @@ class Permissions(CommonColumns):
697
780
  revoke_download_access(grantee.email, self.trial_id, self.upload_type)
698
781
 
699
782
  # If the permission to delete is the last one, also revoke Lister access
700
- filter_ = lambda q: q.filter(Permissions.granted_to_user == grantee.id)
783
+ def filter_(q):
784
+ return q.filter(Permissions.granted_to_user == grantee.id)
785
+
701
786
  if Permissions.count(session=session, filter_=filter_) <= 1:
702
787
  # this one hasn't been deleted yet, so 1 means this is the last one
703
788
  revoke_lister_access(grantee.email)
@@ -707,9 +792,8 @@ class Permissions(CommonColumns):
707
792
  "IAM revoke failed, and permission db record not removed."
708
793
  ) from e
709
794
 
710
- logger.info(
711
- f"admin-action: {deleted_by_user.email} removed from {grantee.email} the permission {self.upload_type or 'all assays'} on {self.trial_id or 'all trials'}"
712
- )
795
+ info_message = f"admin-action: {deleted_by_user.email} removed from {grantee.email} the permission {self.upload_type or 'all assays'} on {self.trial_id or 'all trials'}"
796
+ logger.info(info_message)
713
797
  super().delete(session=session, commit=True)
714
798
 
715
799
  @staticmethod
@@ -813,15 +897,18 @@ class Permissions(CommonColumns):
813
897
 
814
898
  @staticmethod
815
899
  @with_default_session
816
- def find_for_user_trial_type(
817
- user_id: int, trial_id: str, upload_type: str, session: Session
900
+ def find_for_user_trial_file(
901
+ user_id: int,
902
+ trial_id: str,
903
+ upload_type: str,
904
+ file_groups: Iterable,
905
+ session: Session,
818
906
  ) -> Optional["Permissions"]:
819
907
  """
820
- Check if a Permissions record exists for the given user, trial, and type.
821
- The result may be a trial- or assay-level permission that encompasses the
822
- given trial id or upload type.
908
+ Check if a Permissions record exists for the given user, trial, and file.
909
+ The result may be a trial-, assay-, or file-group-level permission.
823
910
  """
824
- return (
911
+ results = (
825
912
  session.query(Permissions)
826
913
  .filter(
827
914
  Permissions.granted_to_user == user_id,
@@ -843,6 +930,27 @@ class Permissions(CommonColumns):
843
930
  .first()
844
931
  )
845
932
 
933
+ # If nothing found so far, check file-group-level permissions.
934
+ if not results and file_groups:
935
+ results = (
936
+ session.query(Permissions)
937
+ .join(FileGroups, Permissions.file_group_id == FileGroups.id)
938
+ .filter(Permissions.granted_to_user == user_id)
939
+ .filter(Permissions.trial_id == trial_id)
940
+ .filter(Permissions.file_group_id == FileGroups.id)
941
+ .all()
942
+ )
943
+ if results:
944
+ file_group_ids = {file_group.id for file_group in file_groups}
945
+ results = [
946
+ result
947
+ for result in results
948
+ if result.file_group_id in file_group_ids
949
+ ]
950
+
951
+ results = results and results or None
952
+ return results
953
+
846
954
  @staticmethod
847
955
  @with_default_session
848
956
  def grant_user_permissions(user: Users, session: Session) -> None:
@@ -855,13 +963,23 @@ class Permissions(CommonColumns):
855
963
 
856
964
  perms = Permissions.find_for_user(user.id, session=session)
857
965
  # if they have any download permissions, they need the CIDC Lister role
858
- if len(perms):
966
+ # If a Permission's FileGroup is None, that implies the Permission is a
967
+ # trial/assay type and thus lister access is required.
968
+ if len(perms) and any(perm.file_group_id is None for perm in perms):
859
969
  grant_lister_access(user.email)
860
970
 
861
971
  # separate permissions by trial, as they are strictly non-overlapping
862
972
  perms_by_trial: Dict[str, List[Permissions]] = defaultdict(list)
973
+ # Or separate permissions by filegroup
974
+ file_group_perms = []
975
+ file_group_perms_ids = []
863
976
  for perm in perms:
864
- perms_by_trial[perm.trial_id].append(perm)
977
+ if perm.upload_type == "file_group":
978
+ if perm.file_group_id not in file_group_perms_ids:
979
+ file_group_perms.append(perm)
980
+ file_group_perms_ids.append(perm.file_group_id)
981
+ else:
982
+ perms_by_trial[perm.trial_id].append(perm)
865
983
  perms_by_trial = dict(perms_by_trial)
866
984
 
867
985
  for trial_id, trial_perms in perms_by_trial.items():
@@ -871,6 +989,11 @@ class Permissions(CommonColumns):
871
989
  trial_id=trial_id,
872
990
  upload_type=[p.upload_type for p in trial_perms],
873
991
  )
992
+ for perm in file_group_perms:
993
+ file_group: FileGroups = FileGroups.find_by_id(
994
+ perm.file_group_id, session=session
995
+ )
996
+ Permissions.grant_download_access_to_file_group(user.email, file_group)
874
997
 
875
998
  # Regrant all of the user's intake bucket upload permissions, if they have any
876
999
  refresh_intake_access(user.email)
@@ -945,9 +1068,9 @@ class Permissions(CommonColumns):
945
1068
  or user.email in user_email_list
946
1069
  ):
947
1070
  continue
948
- else:
949
- user_email_list.append(user.email)
950
- grant_lister_access(user.email)
1071
+
1072
+ user_email_list.append(user.email)
1073
+ grant_lister_access(user.email)
951
1074
 
952
1075
  if upload.upload_type in prism.SUPPORTED_SHIPPING_MANIFESTS:
953
1076
  # Passed with empty user email list because they will be queried for in CFn
@@ -957,125 +1080,20 @@ class Permissions(CommonColumns):
957
1080
  grant_download_access(user_email_list, upload.trial_id, upload.upload_type)
958
1081
 
959
1082
  @staticmethod
960
- @with_default_session
961
- def grant_download_permissions(
962
- trial_id: str, upload_type: str, session: Session
963
- ) -> None:
964
- Permissions._change_download_permissions(
965
- trial_id=trial_id, upload_type=upload_type, grant=True, session=session
966
- )
967
-
968
- @staticmethod
969
- @with_default_session
970
- def revoke_download_permissions(
971
- trial_id: str, upload_type: str, session: Session
972
- ) -> None:
973
- Permissions._change_download_permissions(
974
- trial_id=trial_id, upload_type=upload_type, grant=False, session=session
1083
+ def grant_download_access_to_file_group(user_email: str, file_group: FileGroups):
1084
+ # grant to individual blobs rather than calling grant_download_access(grantee, trial, upload_type)
1085
+ blob_names: list[str] = [df.object_url for df in file_group.downloadable_files]
1086
+ logger.info(
1087
+ "Granting access to %s for the following downloadables: %s",
1088
+ user_email,
1089
+ blob_names,
975
1090
  )
976
-
977
- @staticmethod
978
- @with_default_session
979
- def _change_download_permissions(
980
- trial_id: str, upload_type: str, grant: bool, session: Session
981
- ) -> None:
982
- """
983
- Allows for widespread granting/revoking of existing download permissions in GCS ACL
984
- Optionally filtered for specific trials and upload types
985
- If granting, also adds lister IAM permission for each user
986
- If revoking, DOES NOT remove lister IAM permission from any user
987
-
988
- Parameters
989
- ----------
990
- trial_id: str
991
- only affect permissions for this trial
992
- None for all trials
993
- upload_type: str
994
- only affect permissions for this upload type
995
- None for all upload types except clinical_data
996
- grant: bool
997
- whether to grant or remove the (filtered) permissions
998
- if True, adds lister IAM permission
999
- session: Session
1000
- filled by @with_default_session if not provided
1001
- """
1002
- filters = [
1003
- # set the condition for the join
1004
- Permissions.granted_to_user == Users.id,
1005
- # admins have blanket access via IAM
1006
- Users.role != CIDCRole.ADMIN.value,
1007
- ]
1008
- if grant:
1009
- # NCI users and disable aren't granted download permissions
1010
- # but we should be able to un-grant ie revoke them
1011
- filters.extend(
1012
- [
1013
- Users.role != CIDCRole.NCI_BIOBANK_USER.value,
1014
- Users.disabled == False,
1015
- ]
1016
- )
1017
- if trial_id:
1018
- filters.append(
1019
- or_(
1020
- Permissions.trial_id == trial_id, Permissions.trial_id == None
1021
- ), # null for cross-trial
1022
- )
1023
- if upload_type == "clinical_data":
1024
- # don't get null ie cross-assay
1025
- filters.append(
1026
- Permissions.upload_type == upload_type,
1027
- )
1028
- elif upload_type:
1029
- filters.append(
1030
- or_(
1031
- Permissions.upload_type == upload_type,
1032
- Permissions.upload_type == None,
1033
- ), # null for cross-assay
1034
- )
1035
- else: # null for cross-assay
1036
- filters.append(
1037
- # don't affect clinical_data
1038
- Permissions.upload_type
1039
- != "clinical_data",
1040
- )
1041
-
1042
- # List[Tuple[Permissions, Users]]
1043
- perms_and_users = session.query(Permissions, Users).filter(*filters).all()
1044
-
1045
- # group by trial and upload type
1046
- # Dict[str, Dict[str, List[str]]] = {trial_id: {upload_type: [user_email, ...], ...}, ...}
1047
- sorted_permissions = defaultdict(lambda: defaultdict(list))
1048
- # also handle user lister IAM permission if granting
1049
- already_listed: List[str] = []
1050
- for perm, user in perms_and_users:
1051
- # make sure we put it only for the desired scope
1052
- sorted_permissions[trial_id if trial_id else perm.trial_id][
1053
- upload_type if upload_type else perm.upload_type
1054
- ].append(user.email)
1055
-
1056
- # if granting things, grant_lister_access on every user
1057
- # idempotent, amounting to "add or refresh"
1058
- if grant and user.email not in already_listed:
1059
- grant_lister_access(user.email)
1060
- already_listed.append(user.email)
1061
- # if un-granting ie revoking things, don't call revoke_lister_access
1062
- # with the filtering, we don't know if the users have any other
1063
- # ACL permissions remaining that weren't affected here
1064
-
1065
- # now that we've filtered and separated, just do them all
1066
- # new values will override passed args
1067
- for trial_id, trial_perms in sorted_permissions.items():
1068
- for upload_type, users in trial_perms.items():
1069
- (grant_download_access if grant else revoke_download_access)(
1070
- users, trial_id, upload_type
1071
- )
1091
+ grant_download_access_to_blob_names([user_email], blob_names)
1072
1092
 
1073
1093
 
1074
1094
  class ValidationMultiError(Exception):
1075
1095
  """Holds multiple jsonschema.ValidationErrors"""
1076
1096
 
1077
- pass
1078
-
1079
1097
 
1080
1098
  trial_metadata_validator: json_validation._Validator = (
1081
1099
  json_validation.load_and_validate_schema(
@@ -1209,7 +1227,7 @@ class TrialMetadata(CommonColumns):
1209
1227
  Create a new clinical trial metadata record.
1210
1228
  """
1211
1229
 
1212
- logger.info(f"Creating new trial metadata with id {trial_id}")
1230
+ logger.info("Creating new trial metadata with id %s", trial_id)
1213
1231
  trial = TrialMetadata(trial_id=trial_id, metadata_json=metadata_json)
1214
1232
  trial.insert(session=session, commit=commit)
1215
1233
 
@@ -1401,7 +1419,10 @@ class TrialMetadata(CommonColumns):
1401
1419
  session.commit()
1402
1420
 
1403
1421
  @classmethod
1404
- def build_trial_filter(cls, user: Users, trial_ids: List[str] = []):
1422
+ def build_trial_filter(cls, user: Users, trial_ids: List[str] = None):
1423
+ if trial_ids is None:
1424
+ trial_ids = []
1425
+
1405
1426
  filters = []
1406
1427
  if trial_ids:
1407
1428
  filters.append(cls.trial_id.in_(trial_ids))
@@ -1662,7 +1683,7 @@ class TrialMetadata(CommonColumns):
1662
1683
  trial_metadata,
1663
1684
  jsonb_array_elements(metadata_json->'participants') participant,
1664
1685
  jsonb_array_elements(participant->'samples') sample
1665
-
1686
+
1666
1687
  where
1667
1688
  sample->>'processed_sample_derivative' = 'Tumor DNA'
1668
1689
  or
@@ -1778,7 +1799,7 @@ class TrialMetadata(CommonColumns):
1778
1799
  trial_id,
1779
1800
  jsonb_object_agg(key, value) as value
1780
1801
  from (
1781
- select
1802
+ select
1782
1803
  trial_id,
1783
1804
  key,
1784
1805
  jsonb_agg(sample) as value
@@ -1896,7 +1917,7 @@ class TrialMetadata(CommonColumns):
1896
1917
  select
1897
1918
  trial_id,
1898
1919
  key,
1899
- count(cimac_id) as num_sample
1920
+ count(distinct cimac_id) as num_sample
1900
1921
  from (
1901
1922
  {generic_assay_subquery}
1902
1923
  union all
@@ -1996,7 +2017,7 @@ class UploadJobs(CommonColumns):
1996
2017
  __tablename__ = "upload_jobs"
1997
2018
  # An upload job must contain a gcs_file_map is it isn't a manifest upload
1998
2019
  __table_args__ = (
1999
- CheckConstraint(f"multifile = true OR gcs_file_map != null"),
2020
+ CheckConstraint("multifile = true OR gcs_file_map != null"),
2000
2021
  ForeignKeyConstraint(
2001
2022
  ["uploader_email"],
2002
2023
  ["users.email"],
@@ -2138,10 +2159,10 @@ class UploadJobs(CommonColumns):
2138
2159
  if job is None or job.status == UploadJobStatus.MERGE_COMPLETED.value:
2139
2160
  raise ValueError(f"Upload job {job_id} doesn't exist or is already merged")
2140
2161
 
2141
- logger.info(f"About to merge extra md to {job.id}/{job.status}")
2162
+ logger.info("About to merge extra md to %s/%s", job.id, job.status)
2142
2163
 
2143
2164
  for uuid, file in files.items():
2144
- logger.info(f"About to parse/merge extra md on {uuid}")
2165
+ logger.info("About to parse/merge extra md on %s", uuid)
2145
2166
  (
2146
2167
  job.metadata_patch,
2147
2168
  updated_artifact,
@@ -2149,14 +2170,14 @@ class UploadJobs(CommonColumns):
2149
2170
  ) = prism.merge_artifact_extra_metadata(
2150
2171
  job.metadata_patch, uuid, job.upload_type, file
2151
2172
  )
2152
- logger.info(f"Updated md for {uuid}: {updated_artifact.keys()}")
2173
+ logger.info("Updated md for %s: %s", uuid, updated_artifact.keys())
2153
2174
 
2154
2175
  # A workaround fix for JSON field modifications not being tracked
2155
2176
  # by SQLalchemy for some reason. Using MutableDict.as_mutable(JSON)
2156
2177
  # in the model doesn't seem to help.
2157
2178
  flag_modified(job, "metadata_patch")
2158
2179
 
2159
- logger.info(f"Updated {job.id}/{job.status} patch: {job.metadata_patch}")
2180
+ logger.info("Updated %s/%s patch: %s", job.id, job.status, job.metadata_patch)
2160
2181
  session.commit()
2161
2182
 
2162
2183
  @classmethod
@@ -2188,6 +2209,62 @@ class UploadJobs(CommonColumns):
2188
2209
  self.alert_upload_success(trial)
2189
2210
 
2190
2211
 
2212
+ class FilesToFileGroups(BaseModel):
2213
+ """
2214
+ Mapping table between files (DownloadableFiles) and FileGroups.
2215
+ """
2216
+
2217
+ __tablename__ = "files_to_file_groups"
2218
+ __table_args__ = (
2219
+ PrimaryKeyConstraint(
2220
+ "file_group_id", "file_id", name="pk_files_to_file_groups"
2221
+ ),
2222
+ )
2223
+ file_group_id = Column(ForeignKey("file_groups.id"), primary_key=True)
2224
+ file_id = Column(ForeignKey("downloadable_files.id"), primary_key=True)
2225
+ _created = Column(DateTime, default=func.now(), nullable=False)
2226
+
2227
+ # Not currently used, but may be useful in the future.
2228
+ sort_order: int = Column(Integer)
2229
+
2230
+ @with_default_session
2231
+ def insert(self, session: Session, commit: bool = True):
2232
+ """For compatibility with CommonColumns"""
2233
+ session.add(self)
2234
+ if commit:
2235
+ session.commit()
2236
+
2237
+
2238
+ class FileGroups(CommonColumns):
2239
+ """
2240
+ Entity representing an arbitrary grouping of files. With the current permissions
2241
+ scheme, file groups always represent groups of files within a single trial, although
2242
+ this may not be true in the future.
2243
+
2244
+ File groups are currently used to restrict PACT users' access to subsets of files
2245
+ within a trial that do not correspond to assay types or facets. In the future, we
2246
+ may use them to represent collections of data published in a given study.
2247
+ """
2248
+
2249
+ __tablename__ = "file_groups"
2250
+
2251
+ name: Column = Column(String, nullable=False, unique=True)
2252
+ display_name: Column = Column(String, nullable=True)
2253
+ downloadable_files = relationship(
2254
+ "DownloadableFiles",
2255
+ secondary="files_to_file_groups",
2256
+ back_populates="file_groups",
2257
+ )
2258
+
2259
+ permissions = relationship("Permissions", back_populates="file_group")
2260
+
2261
+ @classmethod
2262
+ @with_default_session
2263
+ def find_by_name(cls, name: str, session: Session):
2264
+ """Find the record with this name"""
2265
+ return session.query(cls).filter_by(name=name).first()
2266
+
2267
+
2191
2268
  class DownloadableFiles(CommonColumns):
2192
2269
  """
2193
2270
  Store required fields from:
@@ -2232,6 +2309,12 @@ class DownloadableFiles(CommonColumns):
2232
2309
  file_name = Column(String, nullable=True)
2233
2310
  data_format = Column(String, nullable=True)
2234
2311
 
2312
+ file_groups = relationship(
2313
+ "FileGroups",
2314
+ secondary="files_to_file_groups",
2315
+ back_populates="downloadable_files",
2316
+ )
2317
+
2235
2318
  FILE_EXT_REGEX = r"\.([^./]*(\.gz)?)$"
2236
2319
 
2237
2320
  @hybrid_property
@@ -2240,15 +2323,15 @@ class DownloadableFiles(CommonColumns):
2240
2323
  return match.group(1) if match else None
2241
2324
 
2242
2325
  @file_ext.expression
2243
- def file_ext(cls):
2244
- return func.substring(cls.object_url, cls.FILE_EXT_REGEX)
2326
+ def file_ext(self):
2327
+ return func.substring(self.object_url, self.FILE_EXT_REGEX)
2245
2328
 
2246
2329
  @hybrid_property
2247
2330
  def data_category(self):
2248
2331
  return facet_groups_to_categories.get(self.facet_group)
2249
2332
 
2250
2333
  @data_category.expression
2251
- def data_category(cls):
2334
+ def data_category(self):
2252
2335
  return DATA_CATEGORY_CASE_CLAUSE
2253
2336
 
2254
2337
  @hybrid_property
@@ -2262,7 +2345,7 @@ class DownloadableFiles(CommonColumns):
2262
2345
  return self.data_category.split(FACET_NAME_DELIM, 1)[0]
2263
2346
 
2264
2347
  @data_category_prefix.expression
2265
- def data_category_prefix(cls):
2348
+ def data_category_prefix(self):
2266
2349
  return func.split_part(DATA_CATEGORY_CASE_CLAUSE, FACET_NAME_DELIM, 1)
2267
2350
 
2268
2351
  @hybrid_property
@@ -2270,7 +2353,7 @@ class DownloadableFiles(CommonColumns):
2270
2353
  return details_dict.get(self.facet_group).file_purpose
2271
2354
 
2272
2355
  @file_purpose.expression
2273
- def file_purpose(cls):
2356
+ def file_purpose(self):
2274
2357
  return FILE_PURPOSE_CASE_CLAUSE
2275
2358
 
2276
2359
  @property
@@ -2298,6 +2381,473 @@ class DownloadableFiles(CommonColumns):
2298
2381
  def check_additional_metadata_default(self, key, value):
2299
2382
  return {} if value in ["null", None, {}] else value
2300
2383
 
2384
+ @classmethod
2385
+ def _generate_query_objects(cls):
2386
+
2387
+ # This is really unfortunate and a huge duplication of code from the column
2388
+ # definitions in this class, but it seemed to be the only way to get the
2389
+ # SQLAlchemy Expression Language API to work.
2390
+ metadata = MetaData()
2391
+ downloadable_files_for_query = Table(
2392
+ "downloadable_files",
2393
+ metadata,
2394
+ Column("_created", BigInteger, nullable=False),
2395
+ Column("_updated", DateTime, default=func.now(), nullable=False),
2396
+ Column("_etag", String(40), nullable=False),
2397
+ Column("id", Integer, primary_key=True, autoincrement=True, nullable=False),
2398
+ Column("file_size_bytes", BigInteger, nullable=False),
2399
+ Column("uploaded_timestamp", DateTime, nullable=False),
2400
+ Column("facet_group", String, nullable=False),
2401
+ Column("additional_metadata", JSONB, nullable=False),
2402
+ Column("upload_type", String, nullable=False),
2403
+ Column("md5_hash", String, nullable=True),
2404
+ Column("crc32c_hash", String, nullable=True),
2405
+ Column("trial_id", String, nullable=False),
2406
+ Column("object_url", String, nullable=False, index=True, unique=True),
2407
+ Column("visible", Boolean, default=True),
2408
+ Column("analysis_friendly", Boolean, default=False),
2409
+ Column("clustergrammer", JSONB, nullable=True),
2410
+ Column("ihc_combined_plot", JSONB, nullable=True),
2411
+ Column("file_name", String, nullable=True),
2412
+ Column("data_format", String, nullable=True),
2413
+ )
2414
+
2415
+ # TODO(jcallaway): consider this reflection-based approach instead. It doesn't
2416
+ # currently work because of the relationship()s and foreign keys, among other
2417
+ # problems.
2418
+ # columns = []
2419
+ # attributes = inspect.getmembers(CommonColumns, lambda x: not (inspect.isroutine(x)))
2420
+ # for k, v in attributes:
2421
+ # if not(k.startswith('__') and k.endswith('__')) and isinstance(v, Column):
2422
+ # v.name = k
2423
+ # columns.append(v)
2424
+ # attributes = inspect.getmembers(DownloadableFiles, lambda x: not (inspect.isroutine(x)))
2425
+ # for k, v in attributes:
2426
+ # if not (k.startswith('__') and k.endswith('__')) and isinstance(v, Column):
2427
+ # v.name = k
2428
+ # columns.append(v)
2429
+ # downloadable_files_for_query = Table("downloadable_files", metadata, *columns)
2430
+
2431
+ files_to_file_groups_for_query = Table(
2432
+ "files_to_file_groups",
2433
+ metadata,
2434
+ Column("_created", BigInteger, nullable=False),
2435
+ Column("_updated", DateTime, default=func.now(), nullable=False),
2436
+ Column("_etag", String(40), nullable=False),
2437
+ Column("id", Integer, primary_key=True, autoincrement=True, nullable=False),
2438
+ Column("file_group_id", Integer, nullable=False),
2439
+ Column("file_id", ForeignKey("downloadable_files.id")),
2440
+ )
2441
+
2442
+ return downloadable_files_for_query, files_to_file_groups_for_query
2443
+
2444
+ @classmethod
2445
+ def _convert_list_results(
2446
+ cls, downloadable_files_for_query: Table, query_files: List
2447
+ ):
2448
+ """Converts the results of a SQLalchemy expression language query into actual DownloadableFiles
2449
+ objects. This is necessary since the UI depends on some of the derived properties in
2450
+ DownloadableFiles.
2451
+ """
2452
+ results = []
2453
+ for query_file in query_files:
2454
+ args = {}
2455
+ for column in downloadable_files_for_query.c:
2456
+ args[column.name] = getattr(query_file, column.name)
2457
+ results.append(DownloadableFiles(**args))
2458
+
2459
+ return results
2460
+
2461
+ @classmethod
2462
+ def _generate_where_clauses(
2463
+ cls,
2464
+ downloadable_files_for_query: Table,
2465
+ files_to_file_groups_for_query: Table,
2466
+ trial_ids: List[str],
2467
+ facets: List[List[str]],
2468
+ user: Users,
2469
+ ):
2470
+ """
2471
+ Returns a list of where clauses for use by list_with_permissions() and count_with_permissions().
2472
+ If the list is empty, this means to execute the query without a where clause (likely because
2473
+ the user is an admin). If None is returned instead of a list, this means that the user has no
2474
+ permissions to view any files specified.
2475
+ """
2476
+
2477
+ # From the perspective of viewing files, NCI Biobank users are admins.
2478
+ is_admin = user and (user.is_admin() or user.is_nci_user())
2479
+
2480
+ where_clauses = []
2481
+ if trial_ids:
2482
+ where_clauses.append(downloadable_files_for_query.c.trial_id.in_(trial_ids))
2483
+ if facets:
2484
+ facet_groups = get_facet_groups_for_paths(facets)
2485
+ where_clauses.append(
2486
+ downloadable_files_for_query.c.facet_group.in_(facet_groups)
2487
+ )
2488
+
2489
+ if user and not is_admin:
2490
+ permissions = Permissions.find_for_user(user.id)
2491
+ full_trial_perms, full_type_perms = [], []
2492
+ for permission in permissions:
2493
+
2494
+ # If upload_type or trial_id is null, that means to grant permissions to
2495
+ # *all* files for that upload type or trial.
2496
+ if permission.upload_type is None:
2497
+ full_trial_perms.append(permission.trial_id)
2498
+ elif permission.trial_id is None:
2499
+ full_type_perms.append(permission.upload_type)
2500
+ elif permission.file_group_id is None:
2501
+ where_clauses.append(
2502
+ sql_and(
2503
+ (
2504
+ downloadable_files_for_query.c.trial_id
2505
+ == permission.trial_id
2506
+ ),
2507
+ (
2508
+ downloadable_files_for_query.c.upload_type
2509
+ == permission.upload_type
2510
+ ),
2511
+ )
2512
+ )
2513
+ else:
2514
+ where_clauses.append(
2515
+ sql_and(
2516
+ (
2517
+ downloadable_files_for_query.c.trial_id
2518
+ == permission.trial_id
2519
+ ),
2520
+ (
2521
+ files_to_file_groups_for_query.c.file_group_id
2522
+ == permission.file_group_id
2523
+ ),
2524
+ )
2525
+ )
2526
+
2527
+ if full_trial_perms:
2528
+
2529
+ # don't include clinical_data in cross-trial permission
2530
+ where_clauses.append(
2531
+ sql_and(
2532
+ downloadable_files_for_query.c.trial_id.in_(full_trial_perms),
2533
+ (downloadable_files_for_query.c.upload_type != "clinical_data"),
2534
+ )
2535
+ )
2536
+ if full_type_perms:
2537
+ where_clauses.append(
2538
+ downloadable_files_for_query.c.upload_type.in_(full_type_perms)
2539
+ )
2540
+
2541
+ # Need to be careful about return logic. Empty results could be because the user
2542
+ # is an admin, whereas None means the user has no permissions to view any files.
2543
+ if is_admin or where_clauses:
2544
+ return where_clauses
2545
+
2546
+ return None
2547
+
2548
+ @classmethod
2549
+ @with_default_session
2550
+ def list_with_permissions(
2551
+ cls,
2552
+ session: Session,
2553
+ trial_ids: List[str] = None,
2554
+ facets: List[List[str]] = None,
2555
+ page_num: int = 0,
2556
+ page_size: int = PAGINATION_PAGE_SIZE,
2557
+ sort_field: Optional[str] = None,
2558
+ sort_direction: Optional[str] = None,
2559
+ user: Users = None,
2560
+ ):
2561
+ """List records in this table, with pagination support."""
2562
+ if trial_ids is None:
2563
+ trial_ids = []
2564
+
2565
+ if facets is None:
2566
+ facets = []
2567
+
2568
+ (
2569
+ downloadable_files_for_query,
2570
+ files_to_file_groups_for_query,
2571
+ ) = DownloadableFiles._generate_query_objects()
2572
+ where_clauses = DownloadableFiles._generate_where_clauses(
2573
+ downloadable_files_for_query,
2574
+ files_to_file_groups_for_query,
2575
+ trial_ids,
2576
+ facets,
2577
+ user,
2578
+ )
2579
+ if where_clauses is None:
2580
+
2581
+ # User doesn't have permissions to view any files; no need to issue a query.
2582
+ return {}
2583
+
2584
+ if where_clauses:
2585
+
2586
+ # No where clause (the user is likely an admin).
2587
+ statement = select([downloadable_files_for_query]).select_from(
2588
+ downloadable_files_for_query
2589
+ )
2590
+
2591
+ else:
2592
+ statement = (
2593
+ select([downloadable_files_for_query])
2594
+ .where(sql_and(*where_clauses))
2595
+ .select_from(
2596
+ downloadable_files_for_query.outerjoin(
2597
+ files_to_file_groups_for_query
2598
+ )
2599
+ )
2600
+ )
2601
+
2602
+ if sort_field:
2603
+ sort_attribute = getattr(cls, sort_field)
2604
+ field_with_dir = (
2605
+ asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
2606
+ )
2607
+ statement = statement.order_by(field_with_dir)
2608
+
2609
+ # Enforce positive page numbers
2610
+ page_num = 0 if page_num < 0 else page_num
2611
+
2612
+ # Enforce maximum page size
2613
+ page_size = min(page_size, MAX_PAGINATION_PAGE_SIZE)
2614
+ statement = statement.limit(page_size).offset(page_num * page_size)
2615
+
2616
+ return DownloadableFiles._convert_list_results(
2617
+ downloadable_files_for_query, session.execute(statement).fetchall()
2618
+ )
2619
+
2620
+ @classmethod
2621
+ @with_default_session
2622
+ def count_with_permissions(
2623
+ cls,
2624
+ session: Session,
2625
+ trial_ids: List[str] = None,
2626
+ facets: List[List[str]] = None,
2627
+ # page_num: int = 0,
2628
+ # page_size: int = PAGINATION_PAGE_SIZE,
2629
+ # sort_field: Optional[str] = None,
2630
+ # sort_direction: Optional[str] = None,
2631
+ user: Users = None,
2632
+ ):
2633
+ """
2634
+ Return the total number of records that would be returned by equivalent calls to
2635
+ list_with_permissions() (disregarding results paging).
2636
+ """
2637
+
2638
+ if trial_ids is None:
2639
+ trial_ids = []
2640
+
2641
+ if facets is None:
2642
+ facets = []
2643
+
2644
+ (
2645
+ downloadable_files_for_query,
2646
+ files_to_file_groups_for_query,
2647
+ ) = DownloadableFiles._generate_query_objects()
2648
+ where_clauses = DownloadableFiles._generate_where_clauses(
2649
+ downloadable_files_for_query,
2650
+ files_to_file_groups_for_query,
2651
+ trial_ids,
2652
+ facets,
2653
+ user,
2654
+ )
2655
+ if where_clauses is None:
2656
+
2657
+ # User doesn't have permissions to view any files; no need to issue a query.
2658
+ return 0
2659
+
2660
+ if where_clauses:
2661
+
2662
+ # No where clause (the user is likely an admin).
2663
+ statement = select(
2664
+ [func.count(downloadable_files_for_query.c.id)]
2665
+ ).select_from(downloadable_files_for_query)
2666
+
2667
+ else:
2668
+ statement = (
2669
+ select([func.count(downloadable_files_for_query.c.id)])
2670
+ .where(sql_and(*where_clauses))
2671
+ .select_from(
2672
+ downloadable_files_for_query.outerjoin(
2673
+ files_to_file_groups_for_query
2674
+ )
2675
+ )
2676
+ )
2677
+
2678
+ return session.execute(statement).fetchone()[0]
2679
+
2680
+ @classmethod
2681
+ @with_default_session
2682
+ def count_by_facet_with_permissions(
2683
+ cls, session: Session, trial_ids: List[str] = None, user: Users = None
2684
+ ):
2685
+ """
2686
+ Returns a map of facet_group to a count of the number of files that the given user
2687
+ has permissions to view.
2688
+ """
2689
+
2690
+ if trial_ids is None:
2691
+ trial_ids = []
2692
+
2693
+ (
2694
+ downloadable_files_for_query,
2695
+ files_to_file_groups_for_query,
2696
+ ) = DownloadableFiles._generate_query_objects()
2697
+ where_clauses = DownloadableFiles._generate_where_clauses(
2698
+ downloadable_files_for_query,
2699
+ files_to_file_groups_for_query,
2700
+ trial_ids,
2701
+ None,
2702
+ user,
2703
+ )
2704
+ if where_clauses is None:
2705
+
2706
+ # User doesn't have permissions to view any files; no need to issue a query.
2707
+ return {}
2708
+
2709
+ if not where_clauses:
2710
+
2711
+ # No where clause (the user is likely an admin).
2712
+ statement = select(
2713
+ [
2714
+ downloadable_files_for_query.c.facet_group,
2715
+ func.count(downloadable_files_for_query.c.id),
2716
+ ]
2717
+ ).select_from(downloadable_files_for_query)
2718
+
2719
+ else:
2720
+ statement = (
2721
+ select(
2722
+ [
2723
+ downloadable_files_for_query.c.facet_group,
2724
+ func.count(downloadable_files_for_query.c.id),
2725
+ ]
2726
+ )
2727
+ .where(sql_and(*where_clauses))
2728
+ .select_from(
2729
+ downloadable_files_for_query.outerjoin(
2730
+ files_to_file_groups_for_query
2731
+ )
2732
+ )
2733
+ )
2734
+
2735
+ statement = statement.group_by(downloadable_files_for_query.c.facet_group)
2736
+ results = session.execute(statement).fetchall()
2737
+ return dict(results)
2738
+
2739
+ @classmethod
2740
+ @with_default_session
2741
+ def list_object_urls_with_permissions(
2742
+ cls, session: Session, user: Users = None, ids: Iterable[int] = None
2743
+ ) -> Iterable[str]:
2744
+ """
2745
+ Checks that the given user has access to all the files identified by ids, and returns
2746
+ the object_urls for those the user has permission for.
2747
+ """
2748
+ (
2749
+ downloadable_files_for_query,
2750
+ files_to_file_groups_for_query,
2751
+ ) = DownloadableFiles._generate_query_objects()
2752
+ where_clauses = DownloadableFiles._generate_where_clauses(
2753
+ downloadable_files_for_query,
2754
+ files_to_file_groups_for_query,
2755
+ None,
2756
+ None,
2757
+ user,
2758
+ )
2759
+ if where_clauses is None:
2760
+
2761
+ # User doesn't have permissions to view any files; no need to issue a query.
2762
+ return {}
2763
+
2764
+ if not where_clauses:
2765
+
2766
+ # No where clause (the user is likely an admin).
2767
+ statement = (
2768
+ select([downloadable_files_for_query.c.object_url])
2769
+ .where(downloadable_files_for_query.c.id.in_(ids))
2770
+ .select_from(downloadable_files_for_query)
2771
+ )
2772
+
2773
+ else:
2774
+ statement = (
2775
+ select([downloadable_files_for_query.c.object_url])
2776
+ .where(
2777
+ sql_and(*where_clauses, downloadable_files_for_query.c.id.in_(ids))
2778
+ )
2779
+ .select_from(
2780
+ downloadable_files_for_query.outerjoin(
2781
+ files_to_file_groups_for_query
2782
+ )
2783
+ )
2784
+ )
2785
+
2786
+ return [row[0] for row in session.execute(statement).fetchall()]
2787
+
2788
+ @classmethod
2789
+ def _generate_trial_file_counts(
2790
+ cls, downloadable_files: Iterable
2791
+ ) -> Dict[str, int]:
2792
+ results = defaultdict(lambda: 0)
2793
+ for downloadable_file in downloadable_files:
2794
+ if downloadable_file.data_category:
2795
+ results[downloadable_file.trial_id] = (
2796
+ results[downloadable_file.trial_id] + 1
2797
+ )
2798
+ return results
2799
+
2800
+ @classmethod
2801
+ @with_default_session
2802
+ def get_trial_facets_with_permissions(
2803
+ cls, session: Session, facets: Iterable[Iterable], user: Users = None
2804
+ ) -> Dict[str, int]:
2805
+ """
2806
+ Returns a map of trial ID to count of files that the user has permissions to view.
2807
+ Files with a non-true data_category property are not included in these counts.
2808
+ """
2809
+ (
2810
+ downloadable_files_for_query,
2811
+ files_to_file_groups_for_query,
2812
+ ) = DownloadableFiles._generate_query_objects()
2813
+ where_clauses = DownloadableFiles._generate_where_clauses(
2814
+ downloadable_files_for_query,
2815
+ files_to_file_groups_for_query,
2816
+ None,
2817
+ facets,
2818
+ user,
2819
+ )
2820
+ if where_clauses is None:
2821
+
2822
+ # User doesn't have permissions to view any files; no need to issue a query.
2823
+ return {}
2824
+
2825
+ if not where_clauses:
2826
+
2827
+ # No where clause (the user is likely an admin).
2828
+ statement = select([downloadable_files_for_query]).select_from(
2829
+ downloadable_files_for_query
2830
+ )
2831
+
2832
+ else:
2833
+ statement = (
2834
+ select([downloadable_files_for_query])
2835
+ .where(sql_and(*where_clauses))
2836
+ .select_from(
2837
+ downloadable_files_for_query.outerjoin(
2838
+ files_to_file_groups_for_query
2839
+ )
2840
+ )
2841
+ )
2842
+
2843
+ downloadable_files = DownloadableFiles._convert_list_results(
2844
+ downloadable_files_for_query, session.execute(statement).fetchall()
2845
+ )
2846
+ trial_file_counts = DownloadableFiles._generate_trial_file_counts(
2847
+ downloadable_files
2848
+ )
2849
+ return build_trial_facets(trial_file_counts)
2850
+
2301
2851
  @with_default_session
2302
2852
  def get_related_files(self, session: Session) -> list:
2303
2853
  """
@@ -2340,15 +2890,16 @@ class DownloadableFiles(CommonColumns):
2340
2890
 
2341
2891
  return related_files
2342
2892
 
2893
+ # this is the old file filter code which we are temporarily putting back in to make facet search work again
2894
+ # TODO use old serach code again
2343
2895
  @staticmethod
2344
2896
  def build_file_filter(
2345
- trial_ids: List[str] = [], facets: List[List[str]] = [], user: Users = None
2897
+ trial_ids: List[str] = None, facets: List[List[str]] = None, user: Users = None
2346
2898
  ) -> Callable[[Query], Query]:
2347
2899
  """
2348
2900
  Build a file filter function based on the provided parameters. The resultant
2349
2901
  filter can then be passed as the `filter_` argument of `DownloadableFiles.list`
2350
2902
  or `DownloadableFiles.count`.
2351
-
2352
2903
  Args:
2353
2904
  trial_ids: if provided, the filter will include only files with these trial IDs.
2354
2905
  upload_types: if provided, the filter will include only files with these upload types.
@@ -2358,6 +2909,12 @@ class DownloadableFiles(CommonColumns):
2358
2909
  Returns:
2359
2910
  A function that adds filters to a query against the DownloadableFiles table.
2360
2911
  """
2912
+ if trial_ids is None:
2913
+ trial_ids = []
2914
+
2915
+ if facets is None:
2916
+ facets = []
2917
+
2361
2918
  file_filters = []
2362
2919
  if trial_ids:
2363
2920
  file_filters.append(DownloadableFiles.trial_id.in_(trial_ids))
@@ -2418,10 +2975,10 @@ class DownloadableFiles(CommonColumns):
2418
2975
  "additional_metadata": additional_metadata,
2419
2976
  }
2420
2977
 
2978
+ # TODO maybe put non supported stuff from file_metadata to some misc jsonb column?
2421
2979
  for key, value in file_metadata.items():
2422
2980
  if key in supported_columns:
2423
2981
  filtered_metadata[key] = value
2424
- # TODO maybe put non supported stuff from file_metadata to some misc jsonb column?
2425
2982
 
2426
2983
  etag = make_etag(filtered_metadata.values())
2427
2984
 
@@ -2584,6 +3141,7 @@ class DownloadableFiles(CommonColumns):
2584
3141
  """Get the total number of bytes of data stored across all files."""
2585
3142
  filtered_query = filter_(session.query(func.sum(cls.file_size_bytes)))
2586
3143
  total_bytes = filtered_query.one()[0]
3144
+ # return int(total_bytes)
2587
3145
  return int(total_bytes or 0)
2588
3146
 
2589
3147
  @classmethod
@@ -2600,6 +3158,8 @@ class DownloadableFiles(CommonColumns):
2600
3158
  trial_facets = build_trial_facets(trial_file_counts)
2601
3159
  return trial_facets
2602
3160
 
3161
+ # old code to make filter search work
3162
+ # TODO fix this
2603
3163
  @classmethod
2604
3164
  @with_default_session
2605
3165
  def get_data_category_facets(
@@ -2611,6 +3171,14 @@ class DownloadableFiles(CommonColumns):
2611
3171
  data_category_facets = build_data_category_facets(facet_group_file_counts)
2612
3172
  return data_category_facets
2613
3173
 
3174
+ # new code that has been commented out to make filter search work
3175
+ # def get_data_category_facets(cls, trial_ids, user):
3176
+ # facet_group_file_counts = DownloadableFiles.count_by_facet_with_permissions(
3177
+ # trial_ids=trial_ids, user=user
3178
+ # )
3179
+ # data_category_facets = build_data_category_facets(facet_group_file_counts)
3180
+ # return data_category_facets
3181
+
2614
3182
 
2615
3183
  # Query clause for computing a downloadable file's data category.
2616
3184
  # Used above in the DownloadableFiles.data_category computed property.