nci-cidc-api-modules 1.0.0rc0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cidc_api/config/db.py +1 -1
- cidc_api/config/secrets.py +2 -2
- cidc_api/config/settings.py +1 -2
- cidc_api/csms/auth.py +14 -7
- cidc_api/models/csms_api.py +101 -83
- cidc_api/models/files/details.py +28 -38
- cidc_api/models/files/facets.py +41 -24
- cidc_api/models/migrations.py +16 -9
- cidc_api/models/models.py +763 -195
- cidc_api/shared/auth.py +18 -13
- cidc_api/shared/gcloud_client.py +106 -61
- cidc_api/shared/rest_utils.py +6 -5
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.2.dist-info}/METADATA +38 -10
- nci_cidc_api_modules-1.0.2.dist-info/RECORD +25 -0
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.2.dist-info}/WHEEL +1 -1
- nci_cidc_api_modules-1.0.0rc0.dist-info/RECORD +0 -25
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.2.dist-info}/LICENSE +0 -0
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.2.dist-info}/top_level.txt +0 -0
cidc_api/models/models.py
CHANGED
@@ -4,6 +4,8 @@ __all__ = [
|
|
4
4
|
"Column",
|
5
5
|
"CommonColumns",
|
6
6
|
"DownloadableFiles",
|
7
|
+
"FileGroups",
|
8
|
+
"FilesToFileGroups",
|
7
9
|
"EXTRA_DATA_TYPES",
|
8
10
|
"IntegrityError",
|
9
11
|
"IAMException",
|
@@ -26,8 +28,6 @@ from collections import defaultdict
|
|
26
28
|
import re
|
27
29
|
import hashlib
|
28
30
|
import os
|
29
|
-
|
30
|
-
os.environ["TZ"] = "UTC"
|
31
31
|
from datetime import datetime, timedelta
|
32
32
|
from enum import Enum as EnumBaseClass
|
33
33
|
from functools import wraps
|
@@ -42,6 +42,7 @@ from typing import (
|
|
42
42
|
Union,
|
43
43
|
Callable,
|
44
44
|
Tuple,
|
45
|
+
Iterable,
|
45
46
|
)
|
46
47
|
|
47
48
|
import pandas as pd
|
@@ -59,8 +60,9 @@ from sqlalchemy import (
|
|
59
60
|
Index,
|
60
61
|
func,
|
61
62
|
CheckConstraint,
|
63
|
+
ForeignKey,
|
62
64
|
ForeignKeyConstraint,
|
63
|
-
|
65
|
+
PrimaryKeyConstraint,
|
64
66
|
tuple_,
|
65
67
|
asc,
|
66
68
|
desc,
|
@@ -71,16 +73,26 @@ from sqlalchemy import (
|
|
71
73
|
not_,
|
72
74
|
literal,
|
73
75
|
or_,
|
76
|
+
Table,
|
77
|
+
MetaData,
|
74
78
|
)
|
75
|
-
from sqlalchemy.
|
76
|
-
from sqlalchemy.ext.hybrid import hybrid_property
|
77
|
-
from sqlalchemy.orm import validates
|
79
|
+
from sqlalchemy.orm import relationship, validates
|
78
80
|
from sqlalchemy.orm.attributes import flag_modified
|
79
81
|
from sqlalchemy.orm.exc import NoResultFound
|
80
82
|
from sqlalchemy.orm.session import Session
|
81
83
|
from sqlalchemy.orm.query import Query
|
82
|
-
from sqlalchemy.sql import
|
84
|
+
from sqlalchemy.sql import (
|
85
|
+
# This is unfortunate but other code in this file relies on sqlalchemy.and_, or_, etc
|
86
|
+
# instead of the sqlalchemy.sql versions we are importing here. The solution is to
|
87
|
+
# break up this giant file.
|
88
|
+
and_ as sql_and,
|
89
|
+
# or_ as sql_or, # NOT USED
|
90
|
+
# select, # ALREADY IMPORTED
|
91
|
+
text,
|
92
|
+
)
|
83
93
|
from sqlalchemy.sql.functions import coalesce
|
94
|
+
from sqlalchemy.exc import IntegrityError
|
95
|
+
from sqlalchemy.ext.hybrid import hybrid_property
|
84
96
|
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
85
97
|
from sqlalchemy.engine import ResultProxy
|
86
98
|
|
@@ -105,6 +117,7 @@ from ..config.settings import (
|
|
105
117
|
)
|
106
118
|
from ..shared import emails
|
107
119
|
from ..shared.gcloud_client import (
|
120
|
+
grant_download_access_to_blob_names,
|
108
121
|
grant_lister_access,
|
109
122
|
grant_download_access,
|
110
123
|
publish_artifact_upload,
|
@@ -116,9 +129,15 @@ from ..shared.gcloud_client import (
|
|
116
129
|
)
|
117
130
|
from ..config.logging import get_logger
|
118
131
|
|
132
|
+
|
133
|
+
os.environ["TZ"] = "UTC"
|
119
134
|
logger = get_logger(__name__)
|
120
135
|
|
121
136
|
|
137
|
+
class FileGroups:
|
138
|
+
pass
|
139
|
+
|
140
|
+
|
122
141
|
def with_default_session(f):
|
123
142
|
"""
|
124
143
|
For some `f` expecting a database session instance as a keyword argument,
|
@@ -160,7 +179,7 @@ class CommonColumns(BaseModel): # type: ignore
|
|
160
179
|
[s for c in cls.__bases__ for s in _all_bases(c)]
|
161
180
|
)
|
162
181
|
|
163
|
-
columns_to_check =
|
182
|
+
columns_to_check = list(type(self).__table__.columns)
|
164
183
|
for b in _all_bases(type(self)):
|
165
184
|
if hasattr(b, "__table__"):
|
166
185
|
columns_to_check.extend(b.__table__.columns)
|
@@ -312,7 +331,6 @@ class CommonColumns(BaseModel): # type: ignore
|
|
312
331
|
|
313
332
|
def validate(self):
|
314
333
|
"""Run custom validations on attributes set on this instance."""
|
315
|
-
pass
|
316
334
|
|
317
335
|
@classmethod
|
318
336
|
def get_unique_columns(cls):
|
@@ -408,7 +426,7 @@ class Users(CommonColumns):
|
|
408
426
|
|
409
427
|
user = Users.find_by_email(email)
|
410
428
|
if not user:
|
411
|
-
logger.info(
|
429
|
+
logger.info("Creating new user with email %s", email)
|
412
430
|
user = Users(
|
413
431
|
email=email, contact_email=email, first_n=first_n, last_n=last_n
|
414
432
|
)
|
@@ -431,7 +449,7 @@ class Users(CommonColumns):
|
|
431
449
|
.values(disabled=True)
|
432
450
|
.returning(Users.id)
|
433
451
|
)
|
434
|
-
disabled_user_ids: List[int] =
|
452
|
+
disabled_user_ids: List[int] = list(session.execute(update_query))
|
435
453
|
if commit:
|
436
454
|
session.commit()
|
437
455
|
|
@@ -503,7 +521,11 @@ class IAMException(Exception):
|
|
503
521
|
pass
|
504
522
|
|
505
523
|
|
506
|
-
EXTRA_DATA_TYPES = [
|
524
|
+
EXTRA_DATA_TYPES = [
|
525
|
+
"participants info",
|
526
|
+
"samples info",
|
527
|
+
"file_group",
|
528
|
+
]
|
507
529
|
ALL_UPLOAD_TYPES = set(
|
508
530
|
[
|
509
531
|
*prism.SUPPORTED_MANIFESTS,
|
@@ -535,10 +557,14 @@ class Permissions(CommonColumns):
|
|
535
557
|
name="ix_permissions_trial_id",
|
536
558
|
ondelete="CASCADE",
|
537
559
|
),
|
538
|
-
|
539
|
-
"
|
560
|
+
ForeignKeyConstraint(
|
561
|
+
["file_group_id"],
|
562
|
+
["file_groups.id"],
|
563
|
+
name="ix_permissions_file_group_id",
|
564
|
+
ondelete="CASCADE",
|
540
565
|
),
|
541
566
|
CheckConstraint("trial_id is not null or upload_type is not null"),
|
567
|
+
CheckConstraint("upload_type <> 'file_group' OR file_group_id is NOT NULL"),
|
542
568
|
)
|
543
569
|
__mapper_args__ = {"confirm_deleted_rows": False}
|
544
570
|
|
@@ -548,6 +574,26 @@ class Permissions(CommonColumns):
|
|
548
574
|
granted_to_user = Column(Integer, nullable=False, index=True)
|
549
575
|
trial_id = Column(String, index=True)
|
550
576
|
upload_type = Column(String)
|
577
|
+
file_group_id = Column(Integer, index=True)
|
578
|
+
file_group = relationship("FileGroups", back_populates="permissions")
|
579
|
+
|
580
|
+
__table_args__ = __table_args__ + (
|
581
|
+
Index(
|
582
|
+
"uix_upload_type_perms",
|
583
|
+
"granted_to_user",
|
584
|
+
"trial_id",
|
585
|
+
"upload_type",
|
586
|
+
unique=True,
|
587
|
+
postgresql_where=file_group_id.is_(None),
|
588
|
+
),
|
589
|
+
Index(
|
590
|
+
"uix_file_group_perms",
|
591
|
+
"granted_to_user",
|
592
|
+
"file_group_id",
|
593
|
+
unique=True,
|
594
|
+
postgresql_where=file_group_id.isnot(None),
|
595
|
+
),
|
596
|
+
)
|
551
597
|
|
552
598
|
# Shorthand to make code related to trial- and upload-type-level permissions
|
553
599
|
# easier to interpret.
|
@@ -577,8 +623,19 @@ class Permissions(CommonColumns):
|
|
577
623
|
|
578
624
|
NOTE: values provided to the `commit` argument will be ignored. This method always commits.
|
579
625
|
"""
|
580
|
-
if
|
581
|
-
|
626
|
+
if (
|
627
|
+
self.upload_type == self.EVERY
|
628
|
+
and self.trial_id == self.EVERY
|
629
|
+
and not self.file_group_id
|
630
|
+
):
|
631
|
+
raise ValueError(
|
632
|
+
"A permission must have a trial id, upload type, or file group."
|
633
|
+
)
|
634
|
+
|
635
|
+
if self.file_group_id and self.upload_type != "file_group":
|
636
|
+
raise ValueError(
|
637
|
+
"If a permission has a file group, its upload_type must be set to file_group"
|
638
|
+
)
|
582
639
|
|
583
640
|
grantee = Users.find_by_id(self.granted_to_user, session=session)
|
584
641
|
if grantee is None:
|
@@ -595,7 +652,7 @@ class Permissions(CommonColumns):
|
|
595
652
|
raise IntegrityError(
|
596
653
|
params=None,
|
597
654
|
statement=None,
|
598
|
-
orig=
|
655
|
+
orig="`granted_by_user` user must be given",
|
599
656
|
)
|
600
657
|
if grantor is None:
|
601
658
|
raise IntegrityError(
|
@@ -603,36 +660,52 @@ class Permissions(CommonColumns):
|
|
603
660
|
statement=None,
|
604
661
|
orig=f"`granted_by_user` user must exist, but no user found with id {self.granted_by_user}",
|
605
662
|
)
|
663
|
+
file_group: FileGroups = None
|
664
|
+
if self.upload_type == "file_group":
|
665
|
+
file_group = FileGroups.find_by_id(self.file_group_id, session=session)
|
666
|
+
if not file_group:
|
667
|
+
raise IntegrityError(
|
668
|
+
params=None,
|
669
|
+
statement=None,
|
670
|
+
orig=f"`file_group` must exist, but none found with id {self.file_group_id}",
|
671
|
+
)
|
606
672
|
|
607
|
-
|
608
|
-
|
609
|
-
)
|
673
|
+
info_message = f"admin-action: {grantor.email} gave {grantee.email} the permission {self.upload_type or 'all assays'}{'(' + file_group.name + ')' if file_group else ''} on {self.trial_id or 'all trials'}"
|
674
|
+
logger.info(info_message)
|
610
675
|
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
676
|
+
if self.upload_type == "file_group":
|
677
|
+
# Do not delete past-assigned file_group permissions for this user.
|
678
|
+
perms_to_delete = []
|
679
|
+
else:
|
680
|
+
# If this is a permission granting the user access to all trials for
|
681
|
+
# a given upload type or all upload types for a given trial, delete
|
682
|
+
# any related trial-upload type specific permissions to avoid
|
683
|
+
# redundancy in the database and in conditional IAM bindings.
|
684
|
+
perms_to_delete = (
|
685
|
+
session.query(Permissions)
|
686
|
+
.filter(
|
687
|
+
Permissions.granted_to_user == self.granted_to_user,
|
688
|
+
# If inserting a cross-trial perm, then select relevant
|
689
|
+
# trial-specific perms for deletion.
|
690
|
+
(
|
691
|
+
Permissions.trial_id != self.EVERY
|
692
|
+
if self.trial_id == self.EVERY
|
693
|
+
else Permissions.trial_id == self.trial_id
|
694
|
+
),
|
695
|
+
# If inserting a cross-upload type perm, then select relevant
|
696
|
+
# upload type-specific perms for deletion. This does NOT
|
697
|
+
# include clinical_data, just manifests/assays/analysis.
|
698
|
+
(
|
699
|
+
and_(
|
700
|
+
Permissions.upload_type != self.EVERY,
|
701
|
+
Permissions.upload_type != "clinical_data",
|
702
|
+
)
|
703
|
+
if self.upload_type == self.EVERY
|
704
|
+
else Permissions.upload_type == self.upload_type
|
705
|
+
),
|
630
706
|
)
|
631
|
-
|
632
|
-
else Permissions.upload_type == self.upload_type,
|
707
|
+
.all()
|
633
708
|
)
|
634
|
-
.all()
|
635
|
-
)
|
636
709
|
|
637
710
|
# Add any related permission deletions to the insertion transaction.
|
638
711
|
# If a delete operation fails, all other deletes and the insertion will
|
@@ -649,16 +722,26 @@ class Permissions(CommonColumns):
|
|
649
722
|
or grantee.disabled
|
650
723
|
or grantee.approval_date is None
|
651
724
|
):
|
725
|
+
# TODO: pact users do not have download permissions currently
|
652
726
|
return
|
653
727
|
|
654
728
|
try:
|
655
729
|
# Grant ACL download permissions in GCS
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
730
|
+
|
731
|
+
if self.upload_type == "file_group":
|
732
|
+
Permissions.grant_download_access_to_file_group(
|
733
|
+
grantee.email, file_group
|
734
|
+
)
|
735
|
+
else:
|
736
|
+
|
737
|
+
# if they have any download permissions, they need the CIDC Lister role
|
738
|
+
grant_lister_access(grantee.email)
|
739
|
+
grant_download_access(grantee.email, self.trial_id, self.upload_type)
|
740
|
+
# Remove permissions staged for deletion, if any
|
741
|
+
for perm in perms_to_delete:
|
742
|
+
revoke_download_access(
|
743
|
+
grantee.email, perm.trial_id, perm.upload_type
|
744
|
+
)
|
662
745
|
except Exception as e:
|
663
746
|
# Add back deleted permissions, if any
|
664
747
|
for perm in perms_to_delete:
|
@@ -697,7 +780,9 @@ class Permissions(CommonColumns):
|
|
697
780
|
revoke_download_access(grantee.email, self.trial_id, self.upload_type)
|
698
781
|
|
699
782
|
# If the permission to delete is the last one, also revoke Lister access
|
700
|
-
filter_
|
783
|
+
def filter_(q):
|
784
|
+
return q.filter(Permissions.granted_to_user == grantee.id)
|
785
|
+
|
701
786
|
if Permissions.count(session=session, filter_=filter_) <= 1:
|
702
787
|
# this one hasn't been deleted yet, so 1 means this is the last one
|
703
788
|
revoke_lister_access(grantee.email)
|
@@ -707,9 +792,8 @@ class Permissions(CommonColumns):
|
|
707
792
|
"IAM revoke failed, and permission db record not removed."
|
708
793
|
) from e
|
709
794
|
|
710
|
-
|
711
|
-
|
712
|
-
)
|
795
|
+
info_message = f"admin-action: {deleted_by_user.email} removed from {grantee.email} the permission {self.upload_type or 'all assays'} on {self.trial_id or 'all trials'}"
|
796
|
+
logger.info(info_message)
|
713
797
|
super().delete(session=session, commit=True)
|
714
798
|
|
715
799
|
@staticmethod
|
@@ -813,15 +897,18 @@ class Permissions(CommonColumns):
|
|
813
897
|
|
814
898
|
@staticmethod
|
815
899
|
@with_default_session
|
816
|
-
def
|
817
|
-
user_id: int,
|
900
|
+
def find_for_user_trial_file(
|
901
|
+
user_id: int,
|
902
|
+
trial_id: str,
|
903
|
+
upload_type: str,
|
904
|
+
file_groups: Iterable,
|
905
|
+
session: Session,
|
818
906
|
) -> Optional["Permissions"]:
|
819
907
|
"""
|
820
|
-
Check if a Permissions record exists for the given user, trial, and
|
821
|
-
The result may be a trial
|
822
|
-
given trial id or upload type.
|
908
|
+
Check if a Permissions record exists for the given user, trial, and file.
|
909
|
+
The result may be a trial-, assay-, or file-group-level permission.
|
823
910
|
"""
|
824
|
-
|
911
|
+
results = (
|
825
912
|
session.query(Permissions)
|
826
913
|
.filter(
|
827
914
|
Permissions.granted_to_user == user_id,
|
@@ -843,6 +930,27 @@ class Permissions(CommonColumns):
|
|
843
930
|
.first()
|
844
931
|
)
|
845
932
|
|
933
|
+
# If nothing found so far, check file-group-level permissions.
|
934
|
+
if not results and file_groups:
|
935
|
+
results = (
|
936
|
+
session.query(Permissions)
|
937
|
+
.join(FileGroups, Permissions.file_group_id == FileGroups.id)
|
938
|
+
.filter(Permissions.granted_to_user == user_id)
|
939
|
+
.filter(Permissions.trial_id == trial_id)
|
940
|
+
.filter(Permissions.file_group_id == FileGroups.id)
|
941
|
+
.all()
|
942
|
+
)
|
943
|
+
if results:
|
944
|
+
file_group_ids = {file_group.id for file_group in file_groups}
|
945
|
+
results = [
|
946
|
+
result
|
947
|
+
for result in results
|
948
|
+
if result.file_group_id in file_group_ids
|
949
|
+
]
|
950
|
+
|
951
|
+
results = results and results or None
|
952
|
+
return results
|
953
|
+
|
846
954
|
@staticmethod
|
847
955
|
@with_default_session
|
848
956
|
def grant_user_permissions(user: Users, session: Session) -> None:
|
@@ -855,13 +963,23 @@ class Permissions(CommonColumns):
|
|
855
963
|
|
856
964
|
perms = Permissions.find_for_user(user.id, session=session)
|
857
965
|
# if they have any download permissions, they need the CIDC Lister role
|
858
|
-
|
966
|
+
# If a Permission's FileGroup is None, that implies the Permission is a
|
967
|
+
# trial/assay type and thus lister access is required.
|
968
|
+
if len(perms) and any(perm.file_group_id is None for perm in perms):
|
859
969
|
grant_lister_access(user.email)
|
860
970
|
|
861
971
|
# separate permissions by trial, as they are strictly non-overlapping
|
862
972
|
perms_by_trial: Dict[str, List[Permissions]] = defaultdict(list)
|
973
|
+
# Or separate permissions by filegroup
|
974
|
+
file_group_perms = []
|
975
|
+
file_group_perms_ids = []
|
863
976
|
for perm in perms:
|
864
|
-
|
977
|
+
if perm.upload_type == "file_group":
|
978
|
+
if perm.file_group_id not in file_group_perms_ids:
|
979
|
+
file_group_perms.append(perm)
|
980
|
+
file_group_perms_ids.append(perm.file_group_id)
|
981
|
+
else:
|
982
|
+
perms_by_trial[perm.trial_id].append(perm)
|
865
983
|
perms_by_trial = dict(perms_by_trial)
|
866
984
|
|
867
985
|
for trial_id, trial_perms in perms_by_trial.items():
|
@@ -871,6 +989,11 @@ class Permissions(CommonColumns):
|
|
871
989
|
trial_id=trial_id,
|
872
990
|
upload_type=[p.upload_type for p in trial_perms],
|
873
991
|
)
|
992
|
+
for perm in file_group_perms:
|
993
|
+
file_group: FileGroups = FileGroups.find_by_id(
|
994
|
+
perm.file_group_id, session=session
|
995
|
+
)
|
996
|
+
Permissions.grant_download_access_to_file_group(user.email, file_group)
|
874
997
|
|
875
998
|
# Regrant all of the user's intake bucket upload permissions, if they have any
|
876
999
|
refresh_intake_access(user.email)
|
@@ -945,9 +1068,9 @@ class Permissions(CommonColumns):
|
|
945
1068
|
or user.email in user_email_list
|
946
1069
|
):
|
947
1070
|
continue
|
948
|
-
|
949
|
-
|
950
|
-
|
1071
|
+
|
1072
|
+
user_email_list.append(user.email)
|
1073
|
+
grant_lister_access(user.email)
|
951
1074
|
|
952
1075
|
if upload.upload_type in prism.SUPPORTED_SHIPPING_MANIFESTS:
|
953
1076
|
# Passed with empty user email list because they will be queried for in CFn
|
@@ -957,125 +1080,20 @@ class Permissions(CommonColumns):
|
|
957
1080
|
grant_download_access(user_email_list, upload.trial_id, upload.upload_type)
|
958
1081
|
|
959
1082
|
@staticmethod
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
@staticmethod
|
969
|
-
@with_default_session
|
970
|
-
def revoke_download_permissions(
|
971
|
-
trial_id: str, upload_type: str, session: Session
|
972
|
-
) -> None:
|
973
|
-
Permissions._change_download_permissions(
|
974
|
-
trial_id=trial_id, upload_type=upload_type, grant=False, session=session
|
1083
|
+
def grant_download_access_to_file_group(user_email: str, file_group: FileGroups):
|
1084
|
+
# grant to individual blobs rather than calling grant_download_access(grantee, trial, upload_type)
|
1085
|
+
blob_names: list[str] = [df.object_url for df in file_group.downloadable_files]
|
1086
|
+
logger.info(
|
1087
|
+
"Granting access to %s for the following downloadables: %s",
|
1088
|
+
user_email,
|
1089
|
+
blob_names,
|
975
1090
|
)
|
976
|
-
|
977
|
-
@staticmethod
|
978
|
-
@with_default_session
|
979
|
-
def _change_download_permissions(
|
980
|
-
trial_id: str, upload_type: str, grant: bool, session: Session
|
981
|
-
) -> None:
|
982
|
-
"""
|
983
|
-
Allows for widespread granting/revoking of existing download permissions in GCS ACL
|
984
|
-
Optionally filtered for specific trials and upload types
|
985
|
-
If granting, also adds lister IAM permission for each user
|
986
|
-
If revoking, DOES NOT remove lister IAM permission from any user
|
987
|
-
|
988
|
-
Parameters
|
989
|
-
----------
|
990
|
-
trial_id: str
|
991
|
-
only affect permissions for this trial
|
992
|
-
None for all trials
|
993
|
-
upload_type: str
|
994
|
-
only affect permissions for this upload type
|
995
|
-
None for all upload types except clinical_data
|
996
|
-
grant: bool
|
997
|
-
whether to grant or remove the (filtered) permissions
|
998
|
-
if True, adds lister IAM permission
|
999
|
-
session: Session
|
1000
|
-
filled by @with_default_session if not provided
|
1001
|
-
"""
|
1002
|
-
filters = [
|
1003
|
-
# set the condition for the join
|
1004
|
-
Permissions.granted_to_user == Users.id,
|
1005
|
-
# admins have blanket access via IAM
|
1006
|
-
Users.role != CIDCRole.ADMIN.value,
|
1007
|
-
]
|
1008
|
-
if grant:
|
1009
|
-
# NCI users and disable aren't granted download permissions
|
1010
|
-
# but we should be able to un-grant ie revoke them
|
1011
|
-
filters.extend(
|
1012
|
-
[
|
1013
|
-
Users.role != CIDCRole.NCI_BIOBANK_USER.value,
|
1014
|
-
Users.disabled == False,
|
1015
|
-
]
|
1016
|
-
)
|
1017
|
-
if trial_id:
|
1018
|
-
filters.append(
|
1019
|
-
or_(
|
1020
|
-
Permissions.trial_id == trial_id, Permissions.trial_id == None
|
1021
|
-
), # null for cross-trial
|
1022
|
-
)
|
1023
|
-
if upload_type == "clinical_data":
|
1024
|
-
# don't get null ie cross-assay
|
1025
|
-
filters.append(
|
1026
|
-
Permissions.upload_type == upload_type,
|
1027
|
-
)
|
1028
|
-
elif upload_type:
|
1029
|
-
filters.append(
|
1030
|
-
or_(
|
1031
|
-
Permissions.upload_type == upload_type,
|
1032
|
-
Permissions.upload_type == None,
|
1033
|
-
), # null for cross-assay
|
1034
|
-
)
|
1035
|
-
else: # null for cross-assay
|
1036
|
-
filters.append(
|
1037
|
-
# don't affect clinical_data
|
1038
|
-
Permissions.upload_type
|
1039
|
-
!= "clinical_data",
|
1040
|
-
)
|
1041
|
-
|
1042
|
-
# List[Tuple[Permissions, Users]]
|
1043
|
-
perms_and_users = session.query(Permissions, Users).filter(*filters).all()
|
1044
|
-
|
1045
|
-
# group by trial and upload type
|
1046
|
-
# Dict[str, Dict[str, List[str]]] = {trial_id: {upload_type: [user_email, ...], ...}, ...}
|
1047
|
-
sorted_permissions = defaultdict(lambda: defaultdict(list))
|
1048
|
-
# also handle user lister IAM permission if granting
|
1049
|
-
already_listed: List[str] = []
|
1050
|
-
for perm, user in perms_and_users:
|
1051
|
-
# make sure we put it only for the desired scope
|
1052
|
-
sorted_permissions[trial_id if trial_id else perm.trial_id][
|
1053
|
-
upload_type if upload_type else perm.upload_type
|
1054
|
-
].append(user.email)
|
1055
|
-
|
1056
|
-
# if granting things, grant_lister_access on every user
|
1057
|
-
# idempotent, amounting to "add or refresh"
|
1058
|
-
if grant and user.email not in already_listed:
|
1059
|
-
grant_lister_access(user.email)
|
1060
|
-
already_listed.append(user.email)
|
1061
|
-
# if un-granting ie revoking things, don't call revoke_lister_access
|
1062
|
-
# with the filtering, we don't know if the users have any other
|
1063
|
-
# ACL permissions remaining that weren't affected here
|
1064
|
-
|
1065
|
-
# now that we've filtered and separated, just do them all
|
1066
|
-
# new values will override passed args
|
1067
|
-
for trial_id, trial_perms in sorted_permissions.items():
|
1068
|
-
for upload_type, users in trial_perms.items():
|
1069
|
-
(grant_download_access if grant else revoke_download_access)(
|
1070
|
-
users, trial_id, upload_type
|
1071
|
-
)
|
1091
|
+
grant_download_access_to_blob_names([user_email], blob_names)
|
1072
1092
|
|
1073
1093
|
|
1074
1094
|
class ValidationMultiError(Exception):
|
1075
1095
|
"""Holds multiple jsonschema.ValidationErrors"""
|
1076
1096
|
|
1077
|
-
pass
|
1078
|
-
|
1079
1097
|
|
1080
1098
|
trial_metadata_validator: json_validation._Validator = (
|
1081
1099
|
json_validation.load_and_validate_schema(
|
@@ -1209,7 +1227,7 @@ class TrialMetadata(CommonColumns):
|
|
1209
1227
|
Create a new clinical trial metadata record.
|
1210
1228
|
"""
|
1211
1229
|
|
1212
|
-
logger.info(
|
1230
|
+
logger.info("Creating new trial metadata with id %s", trial_id)
|
1213
1231
|
trial = TrialMetadata(trial_id=trial_id, metadata_json=metadata_json)
|
1214
1232
|
trial.insert(session=session, commit=commit)
|
1215
1233
|
|
@@ -1401,7 +1419,10 @@ class TrialMetadata(CommonColumns):
|
|
1401
1419
|
session.commit()
|
1402
1420
|
|
1403
1421
|
@classmethod
|
1404
|
-
def build_trial_filter(cls, user: Users, trial_ids: List[str] =
|
1422
|
+
def build_trial_filter(cls, user: Users, trial_ids: List[str] = None):
|
1423
|
+
if trial_ids is None:
|
1424
|
+
trial_ids = []
|
1425
|
+
|
1405
1426
|
filters = []
|
1406
1427
|
if trial_ids:
|
1407
1428
|
filters.append(cls.trial_id.in_(trial_ids))
|
@@ -1662,7 +1683,7 @@ class TrialMetadata(CommonColumns):
|
|
1662
1683
|
trial_metadata,
|
1663
1684
|
jsonb_array_elements(metadata_json->'participants') participant,
|
1664
1685
|
jsonb_array_elements(participant->'samples') sample
|
1665
|
-
|
1686
|
+
|
1666
1687
|
where
|
1667
1688
|
sample->>'processed_sample_derivative' = 'Tumor DNA'
|
1668
1689
|
or
|
@@ -1778,7 +1799,7 @@ class TrialMetadata(CommonColumns):
|
|
1778
1799
|
trial_id,
|
1779
1800
|
jsonb_object_agg(key, value) as value
|
1780
1801
|
from (
|
1781
|
-
select
|
1802
|
+
select
|
1782
1803
|
trial_id,
|
1783
1804
|
key,
|
1784
1805
|
jsonb_agg(sample) as value
|
@@ -1896,7 +1917,7 @@ class TrialMetadata(CommonColumns):
|
|
1896
1917
|
select
|
1897
1918
|
trial_id,
|
1898
1919
|
key,
|
1899
|
-
count(cimac_id) as num_sample
|
1920
|
+
count(distinct cimac_id) as num_sample
|
1900
1921
|
from (
|
1901
1922
|
{generic_assay_subquery}
|
1902
1923
|
union all
|
@@ -1996,7 +2017,7 @@ class UploadJobs(CommonColumns):
|
|
1996
2017
|
__tablename__ = "upload_jobs"
|
1997
2018
|
# An upload job must contain a gcs_file_map is it isn't a manifest upload
|
1998
2019
|
__table_args__ = (
|
1999
|
-
CheckConstraint(
|
2020
|
+
CheckConstraint("multifile = true OR gcs_file_map != null"),
|
2000
2021
|
ForeignKeyConstraint(
|
2001
2022
|
["uploader_email"],
|
2002
2023
|
["users.email"],
|
@@ -2138,10 +2159,10 @@ class UploadJobs(CommonColumns):
|
|
2138
2159
|
if job is None or job.status == UploadJobStatus.MERGE_COMPLETED.value:
|
2139
2160
|
raise ValueError(f"Upload job {job_id} doesn't exist or is already merged")
|
2140
2161
|
|
2141
|
-
logger.info(
|
2162
|
+
logger.info("About to merge extra md to %s/%s", job.id, job.status)
|
2142
2163
|
|
2143
2164
|
for uuid, file in files.items():
|
2144
|
-
logger.info(
|
2165
|
+
logger.info("About to parse/merge extra md on %s", uuid)
|
2145
2166
|
(
|
2146
2167
|
job.metadata_patch,
|
2147
2168
|
updated_artifact,
|
@@ -2149,14 +2170,14 @@ class UploadJobs(CommonColumns):
|
|
2149
2170
|
) = prism.merge_artifact_extra_metadata(
|
2150
2171
|
job.metadata_patch, uuid, job.upload_type, file
|
2151
2172
|
)
|
2152
|
-
logger.info(
|
2173
|
+
logger.info("Updated md for %s: %s", uuid, updated_artifact.keys())
|
2153
2174
|
|
2154
2175
|
# A workaround fix for JSON field modifications not being tracked
|
2155
2176
|
# by SQLalchemy for some reason. Using MutableDict.as_mutable(JSON)
|
2156
2177
|
# in the model doesn't seem to help.
|
2157
2178
|
flag_modified(job, "metadata_patch")
|
2158
2179
|
|
2159
|
-
logger.info(
|
2180
|
+
logger.info("Updated %s/%s patch: %s", job.id, job.status, job.metadata_patch)
|
2160
2181
|
session.commit()
|
2161
2182
|
|
2162
2183
|
@classmethod
|
@@ -2188,6 +2209,62 @@ class UploadJobs(CommonColumns):
|
|
2188
2209
|
self.alert_upload_success(trial)
|
2189
2210
|
|
2190
2211
|
|
2212
|
+
class FilesToFileGroups(BaseModel):
|
2213
|
+
"""
|
2214
|
+
Mapping table between files (DownloadableFiles) and FileGroups.
|
2215
|
+
"""
|
2216
|
+
|
2217
|
+
__tablename__ = "files_to_file_groups"
|
2218
|
+
__table_args__ = (
|
2219
|
+
PrimaryKeyConstraint(
|
2220
|
+
"file_group_id", "file_id", name="pk_files_to_file_groups"
|
2221
|
+
),
|
2222
|
+
)
|
2223
|
+
file_group_id = Column(ForeignKey("file_groups.id"), primary_key=True)
|
2224
|
+
file_id = Column(ForeignKey("downloadable_files.id"), primary_key=True)
|
2225
|
+
_created = Column(DateTime, default=func.now(), nullable=False)
|
2226
|
+
|
2227
|
+
# Not currently used, but may be useful in the future.
|
2228
|
+
sort_order: int = Column(Integer)
|
2229
|
+
|
2230
|
+
@with_default_session
|
2231
|
+
def insert(self, session: Session, commit: bool = True):
|
2232
|
+
"""For compatibility with CommonColumns"""
|
2233
|
+
session.add(self)
|
2234
|
+
if commit:
|
2235
|
+
session.commit()
|
2236
|
+
|
2237
|
+
|
2238
|
+
class FileGroups(CommonColumns):
|
2239
|
+
"""
|
2240
|
+
Entity representing an arbitrary grouping of files. With the current permissions
|
2241
|
+
scheme, file groups always represent groups of files within a single trial, although
|
2242
|
+
this may not be true in the future.
|
2243
|
+
|
2244
|
+
File groups are currently used to restrict PACT users' access to subsets of files
|
2245
|
+
within a trial that do not correspond to assay types or facets. In the future, we
|
2246
|
+
may use them to represent collections of data published in a given study.
|
2247
|
+
"""
|
2248
|
+
|
2249
|
+
__tablename__ = "file_groups"
|
2250
|
+
|
2251
|
+
name: Column = Column(String, nullable=False, unique=True)
|
2252
|
+
display_name: Column = Column(String, nullable=True)
|
2253
|
+
downloadable_files = relationship(
|
2254
|
+
"DownloadableFiles",
|
2255
|
+
secondary="files_to_file_groups",
|
2256
|
+
back_populates="file_groups",
|
2257
|
+
)
|
2258
|
+
|
2259
|
+
permissions = relationship("Permissions", back_populates="file_group")
|
2260
|
+
|
2261
|
+
@classmethod
|
2262
|
+
@with_default_session
|
2263
|
+
def find_by_name(cls, name: str, session: Session):
|
2264
|
+
"""Find the record with this name"""
|
2265
|
+
return session.query(cls).filter_by(name=name).first()
|
2266
|
+
|
2267
|
+
|
2191
2268
|
class DownloadableFiles(CommonColumns):
|
2192
2269
|
"""
|
2193
2270
|
Store required fields from:
|
@@ -2232,6 +2309,12 @@ class DownloadableFiles(CommonColumns):
|
|
2232
2309
|
file_name = Column(String, nullable=True)
|
2233
2310
|
data_format = Column(String, nullable=True)
|
2234
2311
|
|
2312
|
+
file_groups = relationship(
|
2313
|
+
"FileGroups",
|
2314
|
+
secondary="files_to_file_groups",
|
2315
|
+
back_populates="downloadable_files",
|
2316
|
+
)
|
2317
|
+
|
2235
2318
|
FILE_EXT_REGEX = r"\.([^./]*(\.gz)?)$"
|
2236
2319
|
|
2237
2320
|
@hybrid_property
|
@@ -2240,15 +2323,15 @@ class DownloadableFiles(CommonColumns):
|
|
2240
2323
|
return match.group(1) if match else None
|
2241
2324
|
|
2242
2325
|
@file_ext.expression
|
2243
|
-
def file_ext(
|
2244
|
-
return func.substring(
|
2326
|
+
def file_ext(self):
|
2327
|
+
return func.substring(self.object_url, self.FILE_EXT_REGEX)
|
2245
2328
|
|
2246
2329
|
@hybrid_property
|
2247
2330
|
def data_category(self):
|
2248
2331
|
return facet_groups_to_categories.get(self.facet_group)
|
2249
2332
|
|
2250
2333
|
@data_category.expression
|
2251
|
-
def data_category(
|
2334
|
+
def data_category(self):
|
2252
2335
|
return DATA_CATEGORY_CASE_CLAUSE
|
2253
2336
|
|
2254
2337
|
@hybrid_property
|
@@ -2262,7 +2345,7 @@ class DownloadableFiles(CommonColumns):
|
|
2262
2345
|
return self.data_category.split(FACET_NAME_DELIM, 1)[0]
|
2263
2346
|
|
2264
2347
|
@data_category_prefix.expression
|
2265
|
-
def data_category_prefix(
|
2348
|
+
def data_category_prefix(self):
|
2266
2349
|
return func.split_part(DATA_CATEGORY_CASE_CLAUSE, FACET_NAME_DELIM, 1)
|
2267
2350
|
|
2268
2351
|
@hybrid_property
|
@@ -2270,7 +2353,7 @@ class DownloadableFiles(CommonColumns):
|
|
2270
2353
|
return details_dict.get(self.facet_group).file_purpose
|
2271
2354
|
|
2272
2355
|
@file_purpose.expression
|
2273
|
-
def file_purpose(
|
2356
|
+
def file_purpose(self):
|
2274
2357
|
return FILE_PURPOSE_CASE_CLAUSE
|
2275
2358
|
|
2276
2359
|
@property
|
@@ -2298,6 +2381,473 @@ class DownloadableFiles(CommonColumns):
|
|
2298
2381
|
def check_additional_metadata_default(self, key, value):
|
2299
2382
|
return {} if value in ["null", None, {}] else value
|
2300
2383
|
|
2384
|
+
@classmethod
|
2385
|
+
def _generate_query_objects(cls):
|
2386
|
+
|
2387
|
+
# This is really unfortunate and a huge duplication of code from the column
|
2388
|
+
# definitions in this class, but it seemed to be the only way to get the
|
2389
|
+
# SQLAlchemy Expression Language API to work.
|
2390
|
+
metadata = MetaData()
|
2391
|
+
downloadable_files_for_query = Table(
|
2392
|
+
"downloadable_files",
|
2393
|
+
metadata,
|
2394
|
+
Column("_created", BigInteger, nullable=False),
|
2395
|
+
Column("_updated", DateTime, default=func.now(), nullable=False),
|
2396
|
+
Column("_etag", String(40), nullable=False),
|
2397
|
+
Column("id", Integer, primary_key=True, autoincrement=True, nullable=False),
|
2398
|
+
Column("file_size_bytes", BigInteger, nullable=False),
|
2399
|
+
Column("uploaded_timestamp", DateTime, nullable=False),
|
2400
|
+
Column("facet_group", String, nullable=False),
|
2401
|
+
Column("additional_metadata", JSONB, nullable=False),
|
2402
|
+
Column("upload_type", String, nullable=False),
|
2403
|
+
Column("md5_hash", String, nullable=True),
|
2404
|
+
Column("crc32c_hash", String, nullable=True),
|
2405
|
+
Column("trial_id", String, nullable=False),
|
2406
|
+
Column("object_url", String, nullable=False, index=True, unique=True),
|
2407
|
+
Column("visible", Boolean, default=True),
|
2408
|
+
Column("analysis_friendly", Boolean, default=False),
|
2409
|
+
Column("clustergrammer", JSONB, nullable=True),
|
2410
|
+
Column("ihc_combined_plot", JSONB, nullable=True),
|
2411
|
+
Column("file_name", String, nullable=True),
|
2412
|
+
Column("data_format", String, nullable=True),
|
2413
|
+
)
|
2414
|
+
|
2415
|
+
# TODO(jcallaway): consider this reflection-based approach instead. It doesn't
|
2416
|
+
# currently work because of the relationship()s and foreign keys, among other
|
2417
|
+
# problems.
|
2418
|
+
# columns = []
|
2419
|
+
# attributes = inspect.getmembers(CommonColumns, lambda x: not (inspect.isroutine(x)))
|
2420
|
+
# for k, v in attributes:
|
2421
|
+
# if not(k.startswith('__') and k.endswith('__')) and isinstance(v, Column):
|
2422
|
+
# v.name = k
|
2423
|
+
# columns.append(v)
|
2424
|
+
# attributes = inspect.getmembers(DownloadableFiles, lambda x: not (inspect.isroutine(x)))
|
2425
|
+
# for k, v in attributes:
|
2426
|
+
# if not (k.startswith('__') and k.endswith('__')) and isinstance(v, Column):
|
2427
|
+
# v.name = k
|
2428
|
+
# columns.append(v)
|
2429
|
+
# downloadable_files_for_query = Table("downloadable_files", metadata, *columns)
|
2430
|
+
|
2431
|
+
files_to_file_groups_for_query = Table(
|
2432
|
+
"files_to_file_groups",
|
2433
|
+
metadata,
|
2434
|
+
Column("_created", BigInteger, nullable=False),
|
2435
|
+
Column("_updated", DateTime, default=func.now(), nullable=False),
|
2436
|
+
Column("_etag", String(40), nullable=False),
|
2437
|
+
Column("id", Integer, primary_key=True, autoincrement=True, nullable=False),
|
2438
|
+
Column("file_group_id", Integer, nullable=False),
|
2439
|
+
Column("file_id", ForeignKey("downloadable_files.id")),
|
2440
|
+
)
|
2441
|
+
|
2442
|
+
return downloadable_files_for_query, files_to_file_groups_for_query
|
2443
|
+
|
2444
|
+
@classmethod
|
2445
|
+
def _convert_list_results(
|
2446
|
+
cls, downloadable_files_for_query: Table, query_files: List
|
2447
|
+
):
|
2448
|
+
"""Converts the results of a SQLalchemy expression language query into actual DownloadableFiles
|
2449
|
+
objects. This is necessary since the UI depends on some of the derived properties in
|
2450
|
+
DownloadableFiles.
|
2451
|
+
"""
|
2452
|
+
results = []
|
2453
|
+
for query_file in query_files:
|
2454
|
+
args = {}
|
2455
|
+
for column in downloadable_files_for_query.c:
|
2456
|
+
args[column.name] = getattr(query_file, column.name)
|
2457
|
+
results.append(DownloadableFiles(**args))
|
2458
|
+
|
2459
|
+
return results
|
2460
|
+
|
2461
|
+
@classmethod
|
2462
|
+
def _generate_where_clauses(
|
2463
|
+
cls,
|
2464
|
+
downloadable_files_for_query: Table,
|
2465
|
+
files_to_file_groups_for_query: Table,
|
2466
|
+
trial_ids: List[str],
|
2467
|
+
facets: List[List[str]],
|
2468
|
+
user: Users,
|
2469
|
+
):
|
2470
|
+
"""
|
2471
|
+
Returns a list of where clauses for use by list_with_permissions() and count_with_permissions().
|
2472
|
+
If the list is empty, this means to execute the query without a where clause (likely because
|
2473
|
+
the user is an admin). If None is returned instead of a list, this means that the user has no
|
2474
|
+
permissions to view any files specified.
|
2475
|
+
"""
|
2476
|
+
|
2477
|
+
# From the perspective of viewing files, NCI Biobank users are admins.
|
2478
|
+
is_admin = user and (user.is_admin() or user.is_nci_user())
|
2479
|
+
|
2480
|
+
where_clauses = []
|
2481
|
+
if trial_ids:
|
2482
|
+
where_clauses.append(downloadable_files_for_query.c.trial_id.in_(trial_ids))
|
2483
|
+
if facets:
|
2484
|
+
facet_groups = get_facet_groups_for_paths(facets)
|
2485
|
+
where_clauses.append(
|
2486
|
+
downloadable_files_for_query.c.facet_group.in_(facet_groups)
|
2487
|
+
)
|
2488
|
+
|
2489
|
+
if user and not is_admin:
|
2490
|
+
permissions = Permissions.find_for_user(user.id)
|
2491
|
+
full_trial_perms, full_type_perms = [], []
|
2492
|
+
for permission in permissions:
|
2493
|
+
|
2494
|
+
# If upload_type or trial_id is null, that means to grant permissions to
|
2495
|
+
# *all* files for that upload type or trial.
|
2496
|
+
if permission.upload_type is None:
|
2497
|
+
full_trial_perms.append(permission.trial_id)
|
2498
|
+
elif permission.trial_id is None:
|
2499
|
+
full_type_perms.append(permission.upload_type)
|
2500
|
+
elif permission.file_group_id is None:
|
2501
|
+
where_clauses.append(
|
2502
|
+
sql_and(
|
2503
|
+
(
|
2504
|
+
downloadable_files_for_query.c.trial_id
|
2505
|
+
== permission.trial_id
|
2506
|
+
),
|
2507
|
+
(
|
2508
|
+
downloadable_files_for_query.c.upload_type
|
2509
|
+
== permission.upload_type
|
2510
|
+
),
|
2511
|
+
)
|
2512
|
+
)
|
2513
|
+
else:
|
2514
|
+
where_clauses.append(
|
2515
|
+
sql_and(
|
2516
|
+
(
|
2517
|
+
downloadable_files_for_query.c.trial_id
|
2518
|
+
== permission.trial_id
|
2519
|
+
),
|
2520
|
+
(
|
2521
|
+
files_to_file_groups_for_query.c.file_group_id
|
2522
|
+
== permission.file_group_id
|
2523
|
+
),
|
2524
|
+
)
|
2525
|
+
)
|
2526
|
+
|
2527
|
+
if full_trial_perms:
|
2528
|
+
|
2529
|
+
# don't include clinical_data in cross-trial permission
|
2530
|
+
where_clauses.append(
|
2531
|
+
sql_and(
|
2532
|
+
downloadable_files_for_query.c.trial_id.in_(full_trial_perms),
|
2533
|
+
(downloadable_files_for_query.c.upload_type != "clinical_data"),
|
2534
|
+
)
|
2535
|
+
)
|
2536
|
+
if full_type_perms:
|
2537
|
+
where_clauses.append(
|
2538
|
+
downloadable_files_for_query.c.upload_type.in_(full_type_perms)
|
2539
|
+
)
|
2540
|
+
|
2541
|
+
# Need to be careful about return logic. Empty results could be because the user
|
2542
|
+
# is an admin, whereas None means the user has no permissions to view any files.
|
2543
|
+
if is_admin or where_clauses:
|
2544
|
+
return where_clauses
|
2545
|
+
|
2546
|
+
return None
|
2547
|
+
|
2548
|
+
@classmethod
|
2549
|
+
@with_default_session
|
2550
|
+
def list_with_permissions(
|
2551
|
+
cls,
|
2552
|
+
session: Session,
|
2553
|
+
trial_ids: List[str] = None,
|
2554
|
+
facets: List[List[str]] = None,
|
2555
|
+
page_num: int = 0,
|
2556
|
+
page_size: int = PAGINATION_PAGE_SIZE,
|
2557
|
+
sort_field: Optional[str] = None,
|
2558
|
+
sort_direction: Optional[str] = None,
|
2559
|
+
user: Users = None,
|
2560
|
+
):
|
2561
|
+
"""List records in this table, with pagination support."""
|
2562
|
+
if trial_ids is None:
|
2563
|
+
trial_ids = []
|
2564
|
+
|
2565
|
+
if facets is None:
|
2566
|
+
facets = []
|
2567
|
+
|
2568
|
+
(
|
2569
|
+
downloadable_files_for_query,
|
2570
|
+
files_to_file_groups_for_query,
|
2571
|
+
) = DownloadableFiles._generate_query_objects()
|
2572
|
+
where_clauses = DownloadableFiles._generate_where_clauses(
|
2573
|
+
downloadable_files_for_query,
|
2574
|
+
files_to_file_groups_for_query,
|
2575
|
+
trial_ids,
|
2576
|
+
facets,
|
2577
|
+
user,
|
2578
|
+
)
|
2579
|
+
if where_clauses is None:
|
2580
|
+
|
2581
|
+
# User doesn't have permissions to view any files; no need to issue a query.
|
2582
|
+
return {}
|
2583
|
+
|
2584
|
+
if where_clauses:
|
2585
|
+
|
2586
|
+
# No where clause (the user is likely an admin).
|
2587
|
+
statement = select([downloadable_files_for_query]).select_from(
|
2588
|
+
downloadable_files_for_query
|
2589
|
+
)
|
2590
|
+
|
2591
|
+
else:
|
2592
|
+
statement = (
|
2593
|
+
select([downloadable_files_for_query])
|
2594
|
+
.where(sql_and(*where_clauses))
|
2595
|
+
.select_from(
|
2596
|
+
downloadable_files_for_query.outerjoin(
|
2597
|
+
files_to_file_groups_for_query
|
2598
|
+
)
|
2599
|
+
)
|
2600
|
+
)
|
2601
|
+
|
2602
|
+
if sort_field:
|
2603
|
+
sort_attribute = getattr(cls, sort_field)
|
2604
|
+
field_with_dir = (
|
2605
|
+
asc(sort_attribute) if sort_direction == "asc" else desc(sort_attribute)
|
2606
|
+
)
|
2607
|
+
statement = statement.order_by(field_with_dir)
|
2608
|
+
|
2609
|
+
# Enforce positive page numbers
|
2610
|
+
page_num = 0 if page_num < 0 else page_num
|
2611
|
+
|
2612
|
+
# Enforce maximum page size
|
2613
|
+
page_size = min(page_size, MAX_PAGINATION_PAGE_SIZE)
|
2614
|
+
statement = statement.limit(page_size).offset(page_num * page_size)
|
2615
|
+
|
2616
|
+
return DownloadableFiles._convert_list_results(
|
2617
|
+
downloadable_files_for_query, session.execute(statement).fetchall()
|
2618
|
+
)
|
2619
|
+
|
2620
|
+
@classmethod
|
2621
|
+
@with_default_session
|
2622
|
+
def count_with_permissions(
|
2623
|
+
cls,
|
2624
|
+
session: Session,
|
2625
|
+
trial_ids: List[str] = None,
|
2626
|
+
facets: List[List[str]] = None,
|
2627
|
+
# page_num: int = 0,
|
2628
|
+
# page_size: int = PAGINATION_PAGE_SIZE,
|
2629
|
+
# sort_field: Optional[str] = None,
|
2630
|
+
# sort_direction: Optional[str] = None,
|
2631
|
+
user: Users = None,
|
2632
|
+
):
|
2633
|
+
"""
|
2634
|
+
Return the total number of records that would be returned by equivalent calls to
|
2635
|
+
list_with_permissions() (disregarding results paging).
|
2636
|
+
"""
|
2637
|
+
|
2638
|
+
if trial_ids is None:
|
2639
|
+
trial_ids = []
|
2640
|
+
|
2641
|
+
if facets is None:
|
2642
|
+
facets = []
|
2643
|
+
|
2644
|
+
(
|
2645
|
+
downloadable_files_for_query,
|
2646
|
+
files_to_file_groups_for_query,
|
2647
|
+
) = DownloadableFiles._generate_query_objects()
|
2648
|
+
where_clauses = DownloadableFiles._generate_where_clauses(
|
2649
|
+
downloadable_files_for_query,
|
2650
|
+
files_to_file_groups_for_query,
|
2651
|
+
trial_ids,
|
2652
|
+
facets,
|
2653
|
+
user,
|
2654
|
+
)
|
2655
|
+
if where_clauses is None:
|
2656
|
+
|
2657
|
+
# User doesn't have permissions to view any files; no need to issue a query.
|
2658
|
+
return 0
|
2659
|
+
|
2660
|
+
if where_clauses:
|
2661
|
+
|
2662
|
+
# No where clause (the user is likely an admin).
|
2663
|
+
statement = select(
|
2664
|
+
[func.count(downloadable_files_for_query.c.id)]
|
2665
|
+
).select_from(downloadable_files_for_query)
|
2666
|
+
|
2667
|
+
else:
|
2668
|
+
statement = (
|
2669
|
+
select([func.count(downloadable_files_for_query.c.id)])
|
2670
|
+
.where(sql_and(*where_clauses))
|
2671
|
+
.select_from(
|
2672
|
+
downloadable_files_for_query.outerjoin(
|
2673
|
+
files_to_file_groups_for_query
|
2674
|
+
)
|
2675
|
+
)
|
2676
|
+
)
|
2677
|
+
|
2678
|
+
return session.execute(statement).fetchone()[0]
|
2679
|
+
|
2680
|
+
@classmethod
|
2681
|
+
@with_default_session
|
2682
|
+
def count_by_facet_with_permissions(
|
2683
|
+
cls, session: Session, trial_ids: List[str] = None, user: Users = None
|
2684
|
+
):
|
2685
|
+
"""
|
2686
|
+
Returns a map of facet_group to a count of the number of files that the given user
|
2687
|
+
has permissions to view.
|
2688
|
+
"""
|
2689
|
+
|
2690
|
+
if trial_ids is None:
|
2691
|
+
trial_ids = []
|
2692
|
+
|
2693
|
+
(
|
2694
|
+
downloadable_files_for_query,
|
2695
|
+
files_to_file_groups_for_query,
|
2696
|
+
) = DownloadableFiles._generate_query_objects()
|
2697
|
+
where_clauses = DownloadableFiles._generate_where_clauses(
|
2698
|
+
downloadable_files_for_query,
|
2699
|
+
files_to_file_groups_for_query,
|
2700
|
+
trial_ids,
|
2701
|
+
None,
|
2702
|
+
user,
|
2703
|
+
)
|
2704
|
+
if where_clauses is None:
|
2705
|
+
|
2706
|
+
# User doesn't have permissions to view any files; no need to issue a query.
|
2707
|
+
return {}
|
2708
|
+
|
2709
|
+
if not where_clauses:
|
2710
|
+
|
2711
|
+
# No where clause (the user is likely an admin).
|
2712
|
+
statement = select(
|
2713
|
+
[
|
2714
|
+
downloadable_files_for_query.c.facet_group,
|
2715
|
+
func.count(downloadable_files_for_query.c.id),
|
2716
|
+
]
|
2717
|
+
).select_from(downloadable_files_for_query)
|
2718
|
+
|
2719
|
+
else:
|
2720
|
+
statement = (
|
2721
|
+
select(
|
2722
|
+
[
|
2723
|
+
downloadable_files_for_query.c.facet_group,
|
2724
|
+
func.count(downloadable_files_for_query.c.id),
|
2725
|
+
]
|
2726
|
+
)
|
2727
|
+
.where(sql_and(*where_clauses))
|
2728
|
+
.select_from(
|
2729
|
+
downloadable_files_for_query.outerjoin(
|
2730
|
+
files_to_file_groups_for_query
|
2731
|
+
)
|
2732
|
+
)
|
2733
|
+
)
|
2734
|
+
|
2735
|
+
statement = statement.group_by(downloadable_files_for_query.c.facet_group)
|
2736
|
+
results = session.execute(statement).fetchall()
|
2737
|
+
return dict(results)
|
2738
|
+
|
2739
|
+
@classmethod
|
2740
|
+
@with_default_session
|
2741
|
+
def list_object_urls_with_permissions(
|
2742
|
+
cls, session: Session, user: Users = None, ids: Iterable[int] = None
|
2743
|
+
) -> Iterable[str]:
|
2744
|
+
"""
|
2745
|
+
Checks that the given user has access to all the files identified by ids, and returns
|
2746
|
+
the object_urls for those the user has permission for.
|
2747
|
+
"""
|
2748
|
+
(
|
2749
|
+
downloadable_files_for_query,
|
2750
|
+
files_to_file_groups_for_query,
|
2751
|
+
) = DownloadableFiles._generate_query_objects()
|
2752
|
+
where_clauses = DownloadableFiles._generate_where_clauses(
|
2753
|
+
downloadable_files_for_query,
|
2754
|
+
files_to_file_groups_for_query,
|
2755
|
+
None,
|
2756
|
+
None,
|
2757
|
+
user,
|
2758
|
+
)
|
2759
|
+
if where_clauses is None:
|
2760
|
+
|
2761
|
+
# User doesn't have permissions to view any files; no need to issue a query.
|
2762
|
+
return {}
|
2763
|
+
|
2764
|
+
if not where_clauses:
|
2765
|
+
|
2766
|
+
# No where clause (the user is likely an admin).
|
2767
|
+
statement = (
|
2768
|
+
select([downloadable_files_for_query.c.object_url])
|
2769
|
+
.where(downloadable_files_for_query.c.id.in_(ids))
|
2770
|
+
.select_from(downloadable_files_for_query)
|
2771
|
+
)
|
2772
|
+
|
2773
|
+
else:
|
2774
|
+
statement = (
|
2775
|
+
select([downloadable_files_for_query.c.object_url])
|
2776
|
+
.where(
|
2777
|
+
sql_and(*where_clauses, downloadable_files_for_query.c.id.in_(ids))
|
2778
|
+
)
|
2779
|
+
.select_from(
|
2780
|
+
downloadable_files_for_query.outerjoin(
|
2781
|
+
files_to_file_groups_for_query
|
2782
|
+
)
|
2783
|
+
)
|
2784
|
+
)
|
2785
|
+
|
2786
|
+
return [row[0] for row in session.execute(statement).fetchall()]
|
2787
|
+
|
2788
|
+
@classmethod
|
2789
|
+
def _generate_trial_file_counts(
|
2790
|
+
cls, downloadable_files: Iterable
|
2791
|
+
) -> Dict[str, int]:
|
2792
|
+
results = defaultdict(lambda: 0)
|
2793
|
+
for downloadable_file in downloadable_files:
|
2794
|
+
if downloadable_file.data_category:
|
2795
|
+
results[downloadable_file.trial_id] = (
|
2796
|
+
results[downloadable_file.trial_id] + 1
|
2797
|
+
)
|
2798
|
+
return results
|
2799
|
+
|
2800
|
+
@classmethod
|
2801
|
+
@with_default_session
|
2802
|
+
def get_trial_facets_with_permissions(
|
2803
|
+
cls, session: Session, facets: Iterable[Iterable], user: Users = None
|
2804
|
+
) -> Dict[str, int]:
|
2805
|
+
"""
|
2806
|
+
Returns a map of trial ID to count of files that the user has permissions to view.
|
2807
|
+
Files with a non-true data_category property are not included in these counts.
|
2808
|
+
"""
|
2809
|
+
(
|
2810
|
+
downloadable_files_for_query,
|
2811
|
+
files_to_file_groups_for_query,
|
2812
|
+
) = DownloadableFiles._generate_query_objects()
|
2813
|
+
where_clauses = DownloadableFiles._generate_where_clauses(
|
2814
|
+
downloadable_files_for_query,
|
2815
|
+
files_to_file_groups_for_query,
|
2816
|
+
None,
|
2817
|
+
facets,
|
2818
|
+
user,
|
2819
|
+
)
|
2820
|
+
if where_clauses is None:
|
2821
|
+
|
2822
|
+
# User doesn't have permissions to view any files; no need to issue a query.
|
2823
|
+
return {}
|
2824
|
+
|
2825
|
+
if not where_clauses:
|
2826
|
+
|
2827
|
+
# No where clause (the user is likely an admin).
|
2828
|
+
statement = select([downloadable_files_for_query]).select_from(
|
2829
|
+
downloadable_files_for_query
|
2830
|
+
)
|
2831
|
+
|
2832
|
+
else:
|
2833
|
+
statement = (
|
2834
|
+
select([downloadable_files_for_query])
|
2835
|
+
.where(sql_and(*where_clauses))
|
2836
|
+
.select_from(
|
2837
|
+
downloadable_files_for_query.outerjoin(
|
2838
|
+
files_to_file_groups_for_query
|
2839
|
+
)
|
2840
|
+
)
|
2841
|
+
)
|
2842
|
+
|
2843
|
+
downloadable_files = DownloadableFiles._convert_list_results(
|
2844
|
+
downloadable_files_for_query, session.execute(statement).fetchall()
|
2845
|
+
)
|
2846
|
+
trial_file_counts = DownloadableFiles._generate_trial_file_counts(
|
2847
|
+
downloadable_files
|
2848
|
+
)
|
2849
|
+
return build_trial_facets(trial_file_counts)
|
2850
|
+
|
2301
2851
|
@with_default_session
|
2302
2852
|
def get_related_files(self, session: Session) -> list:
|
2303
2853
|
"""
|
@@ -2340,15 +2890,16 @@ class DownloadableFiles(CommonColumns):
|
|
2340
2890
|
|
2341
2891
|
return related_files
|
2342
2892
|
|
2893
|
+
# this is the old file filter code which we are temporarily putting back in to make facet search work again
|
2894
|
+
# TODO use old serach code again
|
2343
2895
|
@staticmethod
|
2344
2896
|
def build_file_filter(
|
2345
|
-
trial_ids: List[str] =
|
2897
|
+
trial_ids: List[str] = None, facets: List[List[str]] = None, user: Users = None
|
2346
2898
|
) -> Callable[[Query], Query]:
|
2347
2899
|
"""
|
2348
2900
|
Build a file filter function based on the provided parameters. The resultant
|
2349
2901
|
filter can then be passed as the `filter_` argument of `DownloadableFiles.list`
|
2350
2902
|
or `DownloadableFiles.count`.
|
2351
|
-
|
2352
2903
|
Args:
|
2353
2904
|
trial_ids: if provided, the filter will include only files with these trial IDs.
|
2354
2905
|
upload_types: if provided, the filter will include only files with these upload types.
|
@@ -2358,6 +2909,12 @@ class DownloadableFiles(CommonColumns):
|
|
2358
2909
|
Returns:
|
2359
2910
|
A function that adds filters to a query against the DownloadableFiles table.
|
2360
2911
|
"""
|
2912
|
+
if trial_ids is None:
|
2913
|
+
trial_ids = []
|
2914
|
+
|
2915
|
+
if facets is None:
|
2916
|
+
facets = []
|
2917
|
+
|
2361
2918
|
file_filters = []
|
2362
2919
|
if trial_ids:
|
2363
2920
|
file_filters.append(DownloadableFiles.trial_id.in_(trial_ids))
|
@@ -2418,10 +2975,10 @@ class DownloadableFiles(CommonColumns):
|
|
2418
2975
|
"additional_metadata": additional_metadata,
|
2419
2976
|
}
|
2420
2977
|
|
2978
|
+
# TODO maybe put non supported stuff from file_metadata to some misc jsonb column?
|
2421
2979
|
for key, value in file_metadata.items():
|
2422
2980
|
if key in supported_columns:
|
2423
2981
|
filtered_metadata[key] = value
|
2424
|
-
# TODO maybe put non supported stuff from file_metadata to some misc jsonb column?
|
2425
2982
|
|
2426
2983
|
etag = make_etag(filtered_metadata.values())
|
2427
2984
|
|
@@ -2584,6 +3141,7 @@ class DownloadableFiles(CommonColumns):
|
|
2584
3141
|
"""Get the total number of bytes of data stored across all files."""
|
2585
3142
|
filtered_query = filter_(session.query(func.sum(cls.file_size_bytes)))
|
2586
3143
|
total_bytes = filtered_query.one()[0]
|
3144
|
+
# return int(total_bytes)
|
2587
3145
|
return int(total_bytes or 0)
|
2588
3146
|
|
2589
3147
|
@classmethod
|
@@ -2600,6 +3158,8 @@ class DownloadableFiles(CommonColumns):
|
|
2600
3158
|
trial_facets = build_trial_facets(trial_file_counts)
|
2601
3159
|
return trial_facets
|
2602
3160
|
|
3161
|
+
# old code to make filter search work
|
3162
|
+
# TODO fix this
|
2603
3163
|
@classmethod
|
2604
3164
|
@with_default_session
|
2605
3165
|
def get_data_category_facets(
|
@@ -2611,6 +3171,14 @@ class DownloadableFiles(CommonColumns):
|
|
2611
3171
|
data_category_facets = build_data_category_facets(facet_group_file_counts)
|
2612
3172
|
return data_category_facets
|
2613
3173
|
|
3174
|
+
# new code that has been commented out to make filter search work
|
3175
|
+
# def get_data_category_facets(cls, trial_ids, user):
|
3176
|
+
# facet_group_file_counts = DownloadableFiles.count_by_facet_with_permissions(
|
3177
|
+
# trial_ids=trial_ids, user=user
|
3178
|
+
# )
|
3179
|
+
# data_category_facets = build_data_category_facets(facet_group_file_counts)
|
3180
|
+
# return data_category_facets
|
3181
|
+
|
2614
3182
|
|
2615
3183
|
# Query clause for computing a downloadable file's data category.
|
2616
3184
|
# Used above in the DownloadableFiles.data_category computed property.
|