nci-cidc-api-modules 1.0.0rc0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cidc_api/config/db.py +1 -1
- cidc_api/config/secrets.py +2 -2
- cidc_api/config/settings.py +1 -2
- cidc_api/csms/auth.py +14 -7
- cidc_api/models/csms_api.py +101 -83
- cidc_api/models/files/details.py +28 -38
- cidc_api/models/files/facets.py +41 -24
- cidc_api/models/migrations.py +16 -9
- cidc_api/models/models.py +763 -195
- cidc_api/shared/auth.py +18 -13
- cidc_api/shared/gcloud_client.py +106 -61
- cidc_api/shared/rest_utils.py +6 -5
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/METADATA +33 -5
- nci_cidc_api_modules-1.0.1.dist-info/RECORD +25 -0
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/WHEEL +1 -1
- nci_cidc_api_modules-1.0.0rc0.dist-info/RECORD +0 -25
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/LICENSE +0 -0
- {nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/top_level.txt +0 -0
cidc_api/shared/auth.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
from functools import wraps
|
2
|
-
from packaging import version
|
3
2
|
from typing import List
|
4
3
|
|
5
4
|
import requests
|
5
|
+
from packaging import version
|
6
6
|
from jose import jwt
|
7
7
|
from flask import g, request, current_app as app, Flask
|
8
8
|
from werkzeug.exceptions import Unauthorized, BadRequest, PreconditionFailed
|
@@ -13,6 +13,8 @@ from ..config.logging import get_logger
|
|
13
13
|
|
14
14
|
logger = get_logger(__name__)
|
15
15
|
|
16
|
+
TIMEOUT_IN_SECONDS = 20
|
17
|
+
|
16
18
|
|
17
19
|
### Main auth utility functions ###
|
18
20
|
def validate_api_auth(app: Flask):
|
@@ -32,7 +34,7 @@ def validate_api_auth(app: Flask):
|
|
32
34
|
)
|
33
35
|
|
34
36
|
|
35
|
-
def requires_auth(resource: str, allowed_roles: list =
|
37
|
+
def requires_auth(resource: str, allowed_roles: list = None):
|
36
38
|
"""
|
37
39
|
A decorator that adds authentication and basic access to an endpoint.
|
38
40
|
|
@@ -43,6 +45,9 @@ def requires_auth(resource: str, allowed_roles: list = []):
|
|
43
45
|
Unauthorized if unauthorized
|
44
46
|
"""
|
45
47
|
|
48
|
+
if allowed_roles is None:
|
49
|
+
allowed_roles = []
|
50
|
+
|
46
51
|
def decorator(endpoint):
|
47
52
|
# Store metadata on this function stating that it is protected by authentication
|
48
53
|
endpoint.is_protected = True
|
@@ -159,11 +164,11 @@ def _extract_token() -> str:
|
|
159
164
|
assert bearer.lower() == "bearer"
|
160
165
|
else:
|
161
166
|
id_token = request.json["id_token"]
|
162
|
-
except (AssertionError, AttributeError, KeyError, TypeError, ValueError):
|
167
|
+
except (AssertionError, AttributeError, KeyError, TypeError, ValueError) as exc:
|
163
168
|
raise Unauthorized(
|
164
169
|
"Either the 'Authorization' header must be set with structure 'Authorization: Bearer <id token>' "
|
165
170
|
'or "id_token" must be present in the JSON body of the request.'
|
166
|
-
)
|
171
|
+
) from exc
|
167
172
|
|
168
173
|
return id_token
|
169
174
|
|
@@ -184,11 +189,11 @@ def _get_issuer_public_key(token: str) -> dict:
|
|
184
189
|
try:
|
185
190
|
header = jwt.get_unverified_header(token)
|
186
191
|
except jwt.JWTError as e:
|
187
|
-
raise Unauthorized(str(e))
|
192
|
+
raise Unauthorized(str(e)) from e
|
188
193
|
|
189
194
|
# Get public keys from our Auth0 domain
|
190
195
|
jwks_url = f"https://{AUTH0_DOMAIN}/.well-known/jwks.json"
|
191
|
-
jwks = requests.get(jwks_url).json()
|
196
|
+
jwks = requests.get(jwks_url, timeout=TIMEOUT_IN_SECONDS).json()
|
192
197
|
|
193
198
|
# Obtain the public key used to sign this token
|
194
199
|
public_key = None
|
@@ -198,7 +203,7 @@ def _get_issuer_public_key(token: str) -> dict:
|
|
198
203
|
|
199
204
|
# If no matching public key was found, we can't validate the token
|
200
205
|
if not public_key:
|
201
|
-
raise Unauthorized("Found no public key with id
|
206
|
+
raise Unauthorized(f"Found no public key with id {header['kid']}")
|
202
207
|
|
203
208
|
return public_key
|
204
209
|
|
@@ -233,11 +238,11 @@ def _decode_id_token(token: str, public_key: dict) -> dict:
|
|
233
238
|
except jwt.ExpiredSignatureError as e:
|
234
239
|
raise Unauthorized(
|
235
240
|
f"{e} Token expired. Obtain a new login token from the CIDC Portal, then try logging in again."
|
236
|
-
)
|
241
|
+
) from e
|
237
242
|
except jwt.JWTClaimsError as e:
|
238
|
-
raise Unauthorized(str(e))
|
243
|
+
raise Unauthorized(str(e)) from e
|
239
244
|
except jwt.JWTError as e:
|
240
|
-
raise Unauthorized(str(e))
|
245
|
+
raise Unauthorized(str(e)) from e
|
241
246
|
|
242
247
|
# Currently, only id_tokens are accepted for authentication.
|
243
248
|
# Going forward, we could also accept access tokens that we
|
@@ -340,9 +345,9 @@ def _enforce_cli_version():
|
|
340
345
|
|
341
346
|
try:
|
342
347
|
client, client_version = user_agent.split("/", 1)
|
343
|
-
except ValueError:
|
344
|
-
logger.error(
|
345
|
-
raise BadRequest("could not parse User-Agent string")
|
348
|
+
except ValueError as exc:
|
349
|
+
logger.error("Unrecognized user-agent string format: %s", user_agent)
|
350
|
+
raise BadRequest("could not parse User-Agent string") from exc
|
346
351
|
|
347
352
|
# The CLI sets the User-Agent header to `cidc-cli/{version}`,
|
348
353
|
# so we can assess whether the requester needs to update their CLI.
|
cidc_api/shared/gcloud_client.py
CHANGED
@@ -1,23 +1,21 @@
|
|
1
1
|
"""Utilities for interacting with the Google Cloud Platform APIs."""
|
2
|
+
|
3
|
+
# pylint: disable=logging-fstring-interpolation
|
4
|
+
|
2
5
|
import json
|
3
6
|
import os
|
4
7
|
from os import environ
|
5
|
-
|
6
|
-
from cidc_api.config.secrets import get_secrets_manager
|
7
|
-
|
8
|
-
os.environ["TZ"] = "UTC"
|
8
|
+
import base64
|
9
9
|
import datetime
|
10
10
|
import warnings
|
11
11
|
import hashlib
|
12
12
|
from collections import namedtuple
|
13
13
|
from concurrent.futures import Future
|
14
|
-
from sqlalchemy.orm.session import Session
|
15
14
|
from typing import (
|
16
15
|
Any,
|
17
16
|
BinaryIO,
|
18
17
|
Callable,
|
19
18
|
Dict,
|
20
|
-
Iterable,
|
21
19
|
List,
|
22
20
|
Optional,
|
23
21
|
Set,
|
@@ -25,13 +23,18 @@ from typing import (
|
|
25
23
|
Union,
|
26
24
|
)
|
27
25
|
|
28
|
-
import
|
26
|
+
from werkzeug.datastructures import FileStorage
|
27
|
+
from sqlalchemy.orm.session import Session
|
29
28
|
from google.cloud import storage, pubsub, bigquery
|
30
29
|
from google.cloud.bigquery.enums import EntityTypes
|
31
30
|
from google.oauth2.service_account import Credentials
|
32
|
-
from werkzeug.datastructures import FileStorage
|
33
|
-
import googleapiclient.discovery
|
34
31
|
from google.api_core.iam import Policy
|
32
|
+
from google.api_core.client_options import ClientOptions
|
33
|
+
import googleapiclient.discovery
|
34
|
+
import requests
|
35
|
+
|
36
|
+
from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
|
37
|
+
from cidc_api.config.secrets import get_secrets_manager
|
35
38
|
|
36
39
|
from ..config.settings import (
|
37
40
|
DEV_USE_GCS,
|
@@ -55,57 +58,82 @@ from ..config.settings import (
|
|
55
58
|
)
|
56
59
|
from ..config.logging import get_logger
|
57
60
|
|
58
|
-
|
59
|
-
|
61
|
+
os.environ["TZ"] = "UTC"
|
60
62
|
logger = get_logger(__name__)
|
61
63
|
|
62
|
-
|
63
|
-
|
64
|
-
|
64
|
+
TIMEOUT_IN_SECONDS = 20
|
65
|
+
|
66
|
+
# these should be initialized here or used as cached values
|
67
|
+
STORAGE_CLIENT = None
|
68
|
+
BIGQUERY_CLIENT = None
|
69
|
+
CRM_SERVICE = None
|
65
70
|
|
66
71
|
# The Secret Manager object should only be initiated once and reused.
|
67
72
|
# This is due to the fact that every time this object is initiated, a
|
68
73
|
# Google Cloud client is also initiated, which is an expensive handshake
|
69
74
|
# that significantly adds to the latency of the script.
|
70
|
-
|
71
|
-
secret_manager = get_secrets_manager()
|
75
|
+
SECRET_MANAGER = get_secrets_manager(TESTING)
|
72
76
|
|
73
77
|
|
74
78
|
def _get_storage_client() -> storage.Client:
|
79
|
+
global STORAGE_CLIENT
|
80
|
+
if STORAGE_CLIENT is None:
|
81
|
+
logger.debug("Getting local client")
|
82
|
+
if os.environ.get("DEV_GOOGLE_STORAGE", None):
|
83
|
+
client_options = ClientOptions(
|
84
|
+
api_endpoint=os.environ.get("DEV_GOOGLE_STORAGE")
|
85
|
+
)
|
86
|
+
credentials = Credentials.from_service_account_info(
|
87
|
+
json.loads(SECRET_MANAGER.get("APP_ENGINE_CREDENTIALS"))
|
88
|
+
)
|
89
|
+
STORAGE_CLIENT = storage.Client(
|
90
|
+
client_options=client_options, credentials=credentials
|
91
|
+
)
|
92
|
+
logger.debug(f"Local client set to {STORAGE_CLIENT}")
|
93
|
+
return STORAGE_CLIENT
|
94
|
+
|
95
|
+
return _get_storage_client2()
|
96
|
+
|
97
|
+
return STORAGE_CLIENT
|
98
|
+
|
99
|
+
|
100
|
+
def _get_storage_client2() -> storage.Client:
|
75
101
|
"""
|
76
102
|
the project which the client acts on behalf of falls back to the default inferred from the environment
|
77
103
|
see: https://googleapis.dev/python/storage/latest/client.html#google.cloud.storage.client.Client
|
78
104
|
|
79
105
|
directly providing service account credentials for signing in get_signed_url() below
|
80
106
|
"""
|
81
|
-
global
|
82
|
-
if
|
107
|
+
global STORAGE_CLIENT
|
108
|
+
if STORAGE_CLIENT is None:
|
83
109
|
credentials = Credentials.from_service_account_info(
|
84
|
-
json.loads(
|
110
|
+
json.loads(SECRET_MANAGER.get(environ.get("APP_ENGINE_CREDENTIALS_ID")))
|
85
111
|
)
|
86
|
-
|
87
|
-
|
112
|
+
# client_options = ClientOptions(api_endpoint=os.environ.get("DEV_GOOGLE_STORAGE"))
|
113
|
+
# STORAGE_CLIENT = storage.Client(client_options=client_options, credentials=credentials)
|
114
|
+
STORAGE_CLIENT = storage.Client(credentials=credentials)
|
115
|
+
return STORAGE_CLIENT
|
88
116
|
|
89
117
|
|
90
118
|
def _get_crm_service() -> googleapiclient.discovery.Resource:
|
91
119
|
"""
|
92
120
|
Initializes a Cloud Resource Manager service.
|
93
121
|
"""
|
94
|
-
global
|
95
|
-
if
|
122
|
+
global CRM_SERVICE
|
123
|
+
if CRM_SERVICE is None:
|
96
124
|
credentials = Credentials.from_service_account_info(
|
97
|
-
json.loads(
|
125
|
+
json.loads(SECRET_MANAGER.get(environ.get("APP_ENGINE_CREDENTIALS_ID")))
|
98
126
|
)
|
99
|
-
|
127
|
+
CRM_SERVICE = googleapiclient.discovery.build(
|
100
128
|
"cloudresourcemanager", "v1", credentials=credentials
|
101
129
|
)
|
102
|
-
return
|
130
|
+
return CRM_SERVICE
|
103
131
|
|
104
132
|
|
105
133
|
def _get_bucket(bucket_name: str) -> storage.Bucket:
|
106
134
|
"""
|
107
135
|
Get the bucket with name `bucket_name` from GCS.
|
108
|
-
This does not make an HTTP request; it simply instantiates a bucket object owned by
|
136
|
+
This does not make an HTTP request; it simply instantiates a bucket object owned by STORAGE_CLIENT.
|
109
137
|
see: https://googleapis.dev/python/storage/latest/client.html#google.cloud.storage.client.Client.bucket
|
110
138
|
"""
|
111
139
|
storage_client = _get_storage_client()
|
@@ -134,24 +162,26 @@ def _get_bigquery_dataset(dataset_id: str) -> bigquery.Dataset:
|
|
134
162
|
Get the bigquery dataset with the id 'dataset_id'.
|
135
163
|
makes an API request to pull this with the bigquery client
|
136
164
|
"""
|
137
|
-
global
|
138
|
-
if
|
165
|
+
global BIGQUERY_CLIENT
|
166
|
+
if BIGQUERY_CLIENT is None:
|
139
167
|
credentials = Credentials.from_service_account_info(
|
140
|
-
json.loads(
|
168
|
+
json.loads(SECRET_MANAGER.get(environ.get("APP_ENGINE_CREDENTIALS_ID")))
|
141
169
|
)
|
142
|
-
|
170
|
+
# client_options = ClientOptions(api_endpoint=os.environ.get("DEV_GOOGLE_BIGQUERY"))
|
171
|
+
# BIGQUERY_CLIENT = bigquery.Client(client_options=client_options, credentials=credentials)
|
172
|
+
BIGQUERY_CLIENT = bigquery.Client(credentials=credentials)
|
143
173
|
|
144
|
-
dataset =
|
174
|
+
dataset = BIGQUERY_CLIENT.get_dataset(dataset_id) # Make an API request.
|
145
175
|
|
146
176
|
return dataset
|
147
177
|
|
148
178
|
|
149
|
-
|
179
|
+
XLSX_GCS_URI_FORMAT = (
|
150
180
|
"{trial_id}/xlsx/{template_category}/{template_type}/{upload_moment}.xlsx"
|
151
181
|
)
|
152
182
|
|
153
183
|
|
154
|
-
|
184
|
+
PseudoBblob = namedtuple(
|
155
185
|
"_pseudo_blob", ["name", "size", "md5_hash", "crc32c", "time_created"]
|
156
186
|
)
|
157
187
|
|
@@ -172,7 +202,7 @@ def upload_xlsx_to_gcs(
|
|
172
202
|
Returns:
|
173
203
|
arg1: GCS blob object
|
174
204
|
"""
|
175
|
-
blob_name =
|
205
|
+
blob_name = XLSX_GCS_URI_FORMAT.format(
|
176
206
|
trial_id=trial_id,
|
177
207
|
template_category=template_category,
|
178
208
|
template_type=template_type,
|
@@ -183,7 +213,7 @@ def upload_xlsx_to_gcs(
|
|
183
213
|
logger.info(
|
184
214
|
f"Would've saved {blob_name} to {GOOGLE_UPLOAD_BUCKET} and {GOOGLE_ACL_DATA_BUCKET}"
|
185
215
|
)
|
186
|
-
return
|
216
|
+
return PseudoBblob(
|
187
217
|
blob_name, 0, "_pseudo_md5_hash", "_pseudo_crc32c", upload_moment
|
188
218
|
)
|
189
219
|
|
@@ -382,7 +412,7 @@ def get_blob_names(
|
|
382
412
|
blob_list.extend(
|
383
413
|
storage_client.list_blobs(GOOGLE_ACL_DATA_BUCKET, prefix=prefix)
|
384
414
|
)
|
385
|
-
return
|
415
|
+
return {blob.name for blob in blob_list}
|
386
416
|
|
387
417
|
|
388
418
|
def grant_download_access_to_blob_names(
|
@@ -524,14 +554,11 @@ def _build_trial_upload_prefixes(
|
|
524
554
|
if not trial_id:
|
525
555
|
from ..models.models import TrialMetadata
|
526
556
|
|
527
|
-
trial_set =
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
)
|
533
|
-
]
|
534
|
-
)
|
557
|
+
trial_set = {
|
558
|
+
str(t.trial_id)
|
559
|
+
for t in session.query(TrialMetadata).add_columns(TrialMetadata.trial_id)
|
560
|
+
}
|
561
|
+
|
535
562
|
else:
|
536
563
|
trial_set = set([trial_id])
|
537
564
|
|
@@ -626,7 +653,7 @@ def grant_bigquery_iam_access(policy: Policy, user_emails: List[str]) -> None:
|
|
626
653
|
|
627
654
|
# try to set the new policy with edits
|
628
655
|
try:
|
629
|
-
|
656
|
+
CRM_SERVICE.projects().setIamPolicy(
|
630
657
|
resource=GOOGLE_CLOUD_PROJECT,
|
631
658
|
body={
|
632
659
|
"policy": policy,
|
@@ -649,7 +676,7 @@ def grant_bigquery_iam_access(policy: Policy, user_emails: List[str]) -> None:
|
|
649
676
|
)
|
650
677
|
)
|
651
678
|
dataset.access_entries = entries
|
652
|
-
|
679
|
+
BIGQUERY_CLIENT.update_dataset(dataset, ["access_entries"]) # Make an API request.
|
653
680
|
|
654
681
|
|
655
682
|
# Arbitrary upper bound on the number of GCS IAM bindings we expect a user to have for uploads
|
@@ -689,15 +716,21 @@ def revoke_bigquery_iam_access(policy: Policy, user_email: str) -> None:
|
|
689
716
|
"""
|
690
717
|
# find and remove user on binding
|
691
718
|
binding = next(
|
692
|
-
b for b in policy["bindings"] if b["role"] == GOOGLE_BIGQUERY_USER_ROLE
|
719
|
+
(b for b in policy["bindings"] if b["role"] == GOOGLE_BIGQUERY_USER_ROLE), None
|
693
720
|
)
|
721
|
+
if not binding:
|
722
|
+
logger.warning(
|
723
|
+
"Expected at least 1 user to have a bigquery jobUser role, but 0 found."
|
724
|
+
)
|
725
|
+
return
|
726
|
+
|
694
727
|
if "members" in binding and user_member(user_email) in binding["members"]:
|
695
728
|
binding["members"].remove(user_member(user_email))
|
696
729
|
|
697
730
|
# try update of the policy
|
698
731
|
try:
|
699
732
|
policy = (
|
700
|
-
|
733
|
+
CRM_SERVICE.projects()
|
701
734
|
.setIamPolicy(
|
702
735
|
resource=GOOGLE_CLOUD_PROJECT,
|
703
736
|
body={
|
@@ -719,14 +752,15 @@ def revoke_bigquery_iam_access(policy: Policy, user_email: str) -> None:
|
|
719
752
|
entry for entry in entries if entry.entity_id != user_email
|
720
753
|
]
|
721
754
|
|
722
|
-
dataset =
|
755
|
+
dataset = BIGQUERY_CLIENT.update_dataset(
|
723
756
|
dataset,
|
724
757
|
# Update just the `access_entries` property of the dataset.
|
725
758
|
["access_entries"],
|
726
759
|
) # Make an API request.
|
727
760
|
|
728
761
|
|
729
|
-
user_member
|
762
|
+
def user_member(email):
|
763
|
+
return f"user:{email}"
|
730
764
|
|
731
765
|
|
732
766
|
def _build_storage_iam_binding(
|
@@ -812,6 +846,8 @@ def _find_and_pop_storage_iam_binding(
|
|
812
846
|
return binding
|
813
847
|
|
814
848
|
|
849
|
+
# object_url | essex_test/xlsx/assays/wes_bam/2023-04-18T16:33:03.735217.xlsx
|
850
|
+
# http://localhost:4443/storage/v1/b/essex-data-staging-acl/o/essex_test%2Fxlsx%2Fassays%2Fwes_bam%2F2023-04-18T16%3A33%3A03.735217.xlsx?Expires=1683748504&GoogleAccessId=commanding-hawk-348012%40appspot.gserviceaccount.com&Signature=nBKeq%2BlOCYrCKqxXgiKP2hnLqOerrl5lTdGYfaFQPgkyJeRzHOk42R25L31X%2FKgR8t%2FHzqfpzQJzsW65kXDK59ZEhDs1TAS23gCUXHQMZImScU7yXWr%2FXTM4iVXNfDi%2Fq592v%2BTpnDowjnG21ixWRLt3oBep39trkAXL%2FOK%2Fe21fJHQvxNo%2F%2BMPGYUcU5oWJqdh1pS55IAZbLfhvcvUJQBSn0B0tWOSahncC9iLtaipBAMGA%2F3vjNUzUTuL2i0ED%2F7rkWrWPPaFaF6c0bTvpfF23hjNXzaH3CEq2a5ozvXAR2ltaDf7zgxxpwtC5XLKnjnc%2F%2BIIVsFnRdFzZGFTToA%3D%3D&response-content-disposition=attachment%3B+filename%3D%22_storage_v1_b_essex_test_xlsx_assays_wes_bam_2023-04-18T16%3A33%3A03.735217.xlsx%22
|
815
851
|
def get_signed_url(
|
816
852
|
object_name: str,
|
817
853
|
bucket_name: str = GOOGLE_ACL_DATA_BUCKET,
|
@@ -825,17 +861,24 @@ def get_signed_url(
|
|
825
861
|
https://cloud.google.com/storage/docs/access-control/signing-urls-with-helpers
|
826
862
|
"""
|
827
863
|
storage_client = _get_storage_client()
|
864
|
+
logger.info(storage_client)
|
828
865
|
bucket = storage_client.get_bucket(bucket_name)
|
829
866
|
blob = bucket.blob(object_name)
|
830
867
|
|
831
868
|
# Generate the signed URL, allowing a client to use `method` for `expiry_mins` minutes
|
832
869
|
expiration = datetime.timedelta(minutes=expiry_mins)
|
833
870
|
full_filename = object_name.replace("/", "_").replace('"', "_").replace(" ", "_")
|
871
|
+
other_kwargs = {}
|
872
|
+
if os.environ.get("DEV_GOOGLE_STORAGE", None):
|
873
|
+
other_kwargs["api_access_endpoint"] = (
|
874
|
+
os.environ.get("DEV_GOOGLE_STORAGE") or ""
|
875
|
+
) + (os.environ.get("DEV_GOOGLE_STORAGE_PATH") or "")
|
834
876
|
url = blob.generate_signed_url(
|
835
877
|
version="v2",
|
836
878
|
expiration=expiration,
|
837
879
|
method=method,
|
838
880
|
response_disposition=f'attachment; filename="{full_filename}"',
|
881
|
+
**other_kwargs,
|
839
882
|
)
|
840
883
|
logger.info(f"generated signed URL for {object_name}: {url}")
|
841
884
|
|
@@ -854,26 +897,28 @@ def _encode_and_publish(content: str, topic: str) -> Future:
|
|
854
897
|
logger.info(
|
855
898
|
f"Publishing message {content!r} to topic {DEV_CFUNCTIONS_SERVER}/{topic}"
|
856
899
|
)
|
857
|
-
import base64
|
858
900
|
|
859
901
|
bdata = base64.b64encode(content.encode("utf-8"))
|
860
902
|
try:
|
861
903
|
res = requests.post(
|
862
|
-
f"{DEV_CFUNCTIONS_SERVER}/{topic}",
|
904
|
+
f"{DEV_CFUNCTIONS_SERVER}/{topic}",
|
905
|
+
data={"data": bdata},
|
906
|
+
timeout=TIMEOUT_IN_SECONDS,
|
863
907
|
)
|
864
908
|
except Exception as e:
|
865
909
|
raise Exception(
|
866
910
|
f"Couldn't publish message {content!r} to topic {DEV_CFUNCTIONS_SERVER}/{topic}"
|
867
911
|
) from e
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
912
|
+
|
913
|
+
logger.info(f"Got {res}")
|
914
|
+
if res.status_code != 200:
|
915
|
+
raise Exception(
|
916
|
+
f"Couldn't publish message {content!r} to {DEV_CFUNCTIONS_SERVER}/{topic}: {res!r}"
|
917
|
+
)
|
918
|
+
|
874
919
|
else:
|
875
920
|
logger.info(f"Would've published message {content} to topic {topic}")
|
876
|
-
return
|
921
|
+
return None
|
877
922
|
|
878
923
|
# The Pub/Sub publisher client returns a concurrent.futures.Future
|
879
924
|
# containing info about whether the publishing was successful.
|
@@ -922,7 +967,7 @@ def send_email(to_emails: List[str], subject: str, html_content: str, **kw) -> N
|
|
922
967
|
|
923
968
|
logger.info(f"({ENV}) Sending email to {to_emails} with subject {subject}")
|
924
969
|
email_json = json.dumps(
|
925
|
-
|
970
|
+
{"to_emails": to_emails, "subject": subject, "html_content": html_content, **kw}
|
926
971
|
)
|
927
972
|
|
928
973
|
report = _encode_and_publish(email_json, GOOGLE_EMAILS_TOPIC)
|
cidc_api/shared/rest_utils.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
"""Shared utility functions for building CIDC API resource endpoints."""
|
2
|
+
|
2
3
|
from functools import wraps
|
3
4
|
from typing import Optional, Callable, Union
|
4
5
|
|
@@ -6,6 +7,7 @@ from flask import request, jsonify
|
|
6
7
|
from webargs import fields
|
7
8
|
from webargs.flaskparser import use_args
|
8
9
|
from marshmallow import validate
|
10
|
+
from marshmallow.exceptions import ValidationError
|
9
11
|
from werkzeug.exceptions import (
|
10
12
|
PreconditionRequired,
|
11
13
|
PreconditionFailed,
|
@@ -13,7 +15,6 @@ from werkzeug.exceptions import (
|
|
13
15
|
BadRequest,
|
14
16
|
UnprocessableEntity,
|
15
17
|
)
|
16
|
-
from marshmallow.exceptions import ValidationError
|
17
18
|
|
18
19
|
from ..models import BaseModel, BaseSchema, ValidationMultiError
|
19
20
|
|
@@ -48,11 +49,11 @@ def unmarshal_request(schema: BaseSchema, kwarg_name: str, load_sqla: bool = Tru
|
|
48
49
|
loaded_instance.validate()
|
49
50
|
# The many ways that validation errors might get raised...
|
50
51
|
except ValueError as e:
|
51
|
-
raise UnprocessableEntity(str(e))
|
52
|
+
raise UnprocessableEntity(str(e)) from e
|
52
53
|
except ValidationError as e:
|
53
|
-
raise UnprocessableEntity(e.messages)
|
54
|
+
raise UnprocessableEntity(e.messages) from e
|
54
55
|
except ValidationMultiError as e:
|
55
|
-
raise UnprocessableEntity({"errors": e.args[0]})
|
56
|
+
raise UnprocessableEntity({"errors": e.args[0]}) from e
|
56
57
|
|
57
58
|
kwargs[kwarg_name] = body
|
58
59
|
|
@@ -186,7 +187,7 @@ def use_args_with_pagination(argmap: dict, model_schema: BaseSchema):
|
|
186
187
|
return {k: v for k, v in args.items() if k in argmap.keys()}
|
187
188
|
|
188
189
|
def get_pagination_args(args: dict):
|
189
|
-
return {k: v for k, v in args.items() if k in pagination_argmap
|
190
|
+
return {k: v for k, v in args.items() if k in pagination_argmap}
|
190
191
|
|
191
192
|
def decorator(endpoint):
|
192
193
|
@wraps(endpoint)
|
@@ -1,10 +1,10 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
|
-
Name:
|
3
|
-
Version: 1.0.
|
2
|
+
Name: nci_cidc_api_modules
|
3
|
+
Version: 1.0.1
|
4
4
|
Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
|
5
5
|
Home-page: https://github.com/NCI-CIDC/cidc-api-gae
|
6
6
|
License: MIT license
|
7
|
-
Requires-Python: >=3.
|
7
|
+
Requires-Python: >=3.9
|
8
8
|
Description-Content-Type: text/markdown
|
9
9
|
License-File: LICENSE
|
10
10
|
Requires-Dist: werkzeug ==2.0.3
|
@@ -23,7 +23,7 @@ Requires-Dist: pandas <2,>=1
|
|
23
23
|
Requires-Dist: python-dotenv ==0.10.3
|
24
24
|
Requires-Dist: requests ==2.22.0
|
25
25
|
Requires-Dist: jinja2 ~=3.0.3
|
26
|
-
Requires-Dist: nci-cidc-schemas ~=0.26.
|
26
|
+
Requires-Dist: nci-cidc-schemas ~=0.26.28
|
27
27
|
|
28
28
|
# NCI CIDC API <!-- omit in TOC -->
|
29
29
|
|
@@ -44,10 +44,11 @@ The next generation of the CIDC API, reworked to use Google Cloud-managed servic
|
|
44
44
|
- [Deploying by hand](#deploying-by-hand)
|
45
45
|
- [Connecting to the API](#connecting-to-the-api)
|
46
46
|
- [Provisioning the system from scratch](#provisioning-the-system-from-scratch)
|
47
|
+
- [Docker Compose](#setting-up-docker-compose)
|
47
48
|
|
48
49
|
## Install Python dependencies
|
49
50
|
|
50
|
-
Python versions tested include 3.
|
51
|
+
Python versions tested include 3.9 and 3.10. The current App Engine is using version 3.9 (see [app.prod.yaml](./app.prod.yaml)). You can use https://github.com/pyenv/pyenv to manage your python versions. Homebrew will also work, but you will have to be specific when you install packages with pip outside of virtual environments. On that note, it is recommended that you install your python dependencies in an isolated environment. For example,
|
51
52
|
|
52
53
|
```bash
|
53
54
|
# make a virtual environment in the current direcory called "venv"
|
@@ -301,6 +302,33 @@ To connect to the production API locally, follow the same procedure, but instead
|
|
301
302
|
|
302
303
|
For an overview of how to set up the CIDC API service from scratch, see the step-by-step guide in `PROVISION.md`.
|
303
304
|
|
305
|
+
## Setting up Docker Compose
|
306
|
+
|
307
|
+
If you would like to run this project as a docker container. We have dockerized the cidc-api-gae and cidc-ui so that you don't have to install all the requirements above. Included in the docker-compose file are postgres:14 with data and test user login, bigquery-emulator, fake-gcs-server with buckets and data to match postgres, and gcs-oauth2-emulator to generate faked presigned urls.
|
308
|
+
|
309
|
+
**_NOTE:_** You must have docker installed and have this repository and cidc-ui in the same directory (~/git/cidc/cidc-ui and ~/git/cidc/cidc-api-gae), or you can download each and build the image with the command `docker build .`
|
310
|
+
|
311
|
+
**_NOTE:_** Having issues with the cidc-ui docker container. You'll have to start that manually using the instructions in the repo.
|
312
|
+
|
313
|
+
**_NOTE:_** You can't use Docker while simultaneously running your NIH VPN. This is due to a quirk with self-hosting a google secrets bucket. More work is required to make the docker containers work while the VPN is on.
|
314
|
+
|
315
|
+
This repo has hot code reloading. However, you will need to build the image again if there is an update to python libraries. Make sure you don't use a cached image when rebuilding.
|
316
|
+
|
317
|
+
Make sure you add this line to your /etc/hosts file: ```127.0.0.1 host.docker.internal```
|
318
|
+
|
319
|
+
To run everything simply run the following commands:
|
320
|
+
```bash
|
321
|
+
vim .env # uncomment the docker section in the .env file. Comment out any overlaping variable defintions(POSTGRES)
|
322
|
+
cp ~/.config/gcloud/application_default_credentials.json .
|
323
|
+
cd docker
|
324
|
+
docker compose up
|
325
|
+
```
|
326
|
+
**_NOTE:_** You still need to install and signin to gcloud CLI. The application_default_credentials.json should be under the cidc-api-gae directory next to the Dockerfile. We have mocked most of the connection points to GCP but at startup it still checks for a valid user account. This is very similar behavior to aws's localstack. It requires a realistic token at the start even though it doesn't make connections to aws.
|
327
|
+
|
328
|
+
**_TODO:_** The application_default_credentials.json I think could be faked and pointed to the gcs-oauth2-emulator for startup. In this case a gcloud cli wouldnt be needed at all and a faked application_default_credentials.json could be uploaded under the docker folder.
|
329
|
+
|
330
|
+
|
331
|
+
|
304
332
|
## JIRA Integration
|
305
333
|
|
306
334
|
To set-up the git hook for JIRA integration, run:
|
@@ -0,0 +1,25 @@
|
|
1
|
+
cidc_api/config/__init__.py,sha256=5kcMGor07TrBm6e_UW6CCVnJu5wJ-8QX8X9ZRmeuPKA,147
|
2
|
+
cidc_api/config/db.py,sha256=ayeeNV-sV20hGoFyMMTMncI2V-FI9lVN3JV-Lmpr3xI,1981
|
3
|
+
cidc_api/config/logging.py,sha256=gJ2TGgQVREng4Hv0phlCCkQai7HhumKYjJxubpxS6Q0,1090
|
4
|
+
cidc_api/config/secrets.py,sha256=2DXeew1Pm0lnf2SLuo8wW5c5kOJp2WrhjflxZGsY_Ng,1505
|
5
|
+
cidc_api/config/settings.py,sha256=Ua6UpiQu9l1ZD-YlmpaWuQOv9tPyYLxWFLC2DEJdAyQ,4044
|
6
|
+
cidc_api/csms/__init__.py,sha256=eJkY6rWNOAUBmSd4G1_U6h7i472druKEtBdVmgFZVPg,20
|
7
|
+
cidc_api/csms/auth.py,sha256=25Yma2Kz3KLENAPSeBYacFuSZXng-EDgmgInKBsRyP0,3191
|
8
|
+
cidc_api/models/__init__.py,sha256=bl445G8Zic9YbhZ8ZBni07wtBMhLJRMBA-JqjLxx2bw,66
|
9
|
+
cidc_api/models/csms_api.py,sha256=Wp4b53vwOqSlOIaoAYGlI1p8ZfXRXmVJ6MLcsvzq0LA,31664
|
10
|
+
cidc_api/models/migrations.py,sha256=gp9vtkYbA9FFy2s-7woelAmsvQbJ41LO2_DY-YkFIrQ,11464
|
11
|
+
cidc_api/models/models.py,sha256=AYt0rIzaeQ0HHlTSeerbTpYwMJwqt93aadOuFLLEqBA,120820
|
12
|
+
cidc_api/models/schemas.py,sha256=7tDYtmULuzTt2kg7RorWhte06ffalgpQKrFiDRGcPEQ,2711
|
13
|
+
cidc_api/models/files/__init__.py,sha256=8BMTnUSHzUbz0lBeEQY6NvApxDD3GMWMduoVMos2g4Y,213
|
14
|
+
cidc_api/models/files/details.py,sha256=eg1u8uZwtxb0m9mFobcTL_mnPBMq1MPZv3NN3KWMGOI,62309
|
15
|
+
cidc_api/models/files/facets.py,sha256=5xI5eM1J0Uc97W0-MvAPSRRN-2Hs2xb_AIupliJRMJU,29172
|
16
|
+
cidc_api/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
+
cidc_api/shared/auth.py,sha256=VMd_3QJE2iG16QxuGzHBV9MzJJItOZNn9gcw0_iUBLI,11647
|
18
|
+
cidc_api/shared/emails.py,sha256=AhSp2hxWyuMpe21pERuQwrAEy0yCfy3rvSygDNKtgdc,4820
|
19
|
+
cidc_api/shared/gcloud_client.py,sha256=7dDs0crLMJKdIp4IDSfrZBMB3h-zvWNieB81azoeLO4,33746
|
20
|
+
cidc_api/shared/rest_utils.py,sha256=LMfBpvJRjkfQjCzVXuhTTe4Foz4wlvaKg6QntyR-Hkc,6648
|
21
|
+
nci_cidc_api_modules-1.0.1.dist-info/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
|
22
|
+
nci_cidc_api_modules-1.0.1.dist-info/METADATA,sha256=rhlYaotiiwG6djDCZaSq5GT9CcE_2YVlHs_gnSd9tgY,40275
|
23
|
+
nci_cidc_api_modules-1.0.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
24
|
+
nci_cidc_api_modules-1.0.1.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
|
25
|
+
nci_cidc_api_modules-1.0.1.dist-info/RECORD,,
|
@@ -1,25 +0,0 @@
|
|
1
|
-
cidc_api/config/__init__.py,sha256=5kcMGor07TrBm6e_UW6CCVnJu5wJ-8QX8X9ZRmeuPKA,147
|
2
|
-
cidc_api/config/db.py,sha256=cw0wTfcak_FyAKZE8oKVYLVkph644Lv-_MAbYutGV44,1978
|
3
|
-
cidc_api/config/logging.py,sha256=gJ2TGgQVREng4Hv0phlCCkQai7HhumKYjJxubpxS6Q0,1090
|
4
|
-
cidc_api/config/secrets.py,sha256=DlloiKiy420WJO250SfKs_-T0wKYPbgdwyPncP5MGLY,1518
|
5
|
-
cidc_api/config/settings.py,sha256=GyeGDCj1tt61u4fbhomPIDq_fiamRMIT6dQPY1ufl-w,4076
|
6
|
-
cidc_api/csms/__init__.py,sha256=eJkY6rWNOAUBmSd4G1_U6h7i472druKEtBdVmgFZVPg,20
|
7
|
-
cidc_api/csms/auth.py,sha256=upJ52PX-u1KD4I0-Hy4JptCp_cp6qgs_ZMXPxqfeVI0,3071
|
8
|
-
cidc_api/models/__init__.py,sha256=bl445G8Zic9YbhZ8ZBni07wtBMhLJRMBA-JqjLxx2bw,66
|
9
|
-
cidc_api/models/csms_api.py,sha256=ubT-1Z5ajh5slAZ1jUvY8_0xKKl-7AVMwpQy0QT08eU,31379
|
10
|
-
cidc_api/models/migrations.py,sha256=Z2ycc-0XWdOnfDmCPFNXdg0lHLL8sDLIlhJHiEeYrv8,11292
|
11
|
-
cidc_api/models/models.py,sha256=hs6zrrEcLID_pI-Ue78B5HPFbWa3w680JO9O8BrPTYI,100057
|
12
|
-
cidc_api/models/schemas.py,sha256=7tDYtmULuzTt2kg7RorWhte06ffalgpQKrFiDRGcPEQ,2711
|
13
|
-
cidc_api/models/files/__init__.py,sha256=8BMTnUSHzUbz0lBeEQY6NvApxDD3GMWMduoVMos2g4Y,213
|
14
|
-
cidc_api/models/files/details.py,sha256=AP0EeMj9E9dzlafPToNCIeM4Ia851lv4bz9fUioTr8s,63004
|
15
|
-
cidc_api/models/files/facets.py,sha256=FVVWDVR0bMX7TvZ9sTelWTeKaLhBP1oYcim_2iVx9e8,28653
|
16
|
-
cidc_api/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
cidc_api/shared/auth.py,sha256=KOmakviByttH4b3yFw0zmcjciA-245AWabz8qatIcRQ,11475
|
18
|
-
cidc_api/shared/emails.py,sha256=AhSp2hxWyuMpe21pERuQwrAEy0yCfy3rvSygDNKtgdc,4820
|
19
|
-
cidc_api/shared/gcloud_client.py,sha256=v1QKFym1uX1FM5wuM2AyQZKZ0mA7CUnszm06pX-XHDo,31194
|
20
|
-
cidc_api/shared/rest_utils.py,sha256=MCUypPmOOHKQR-vThkSMj8Fe8m_lZmv85jViCxZgGPE,6633
|
21
|
-
nci_cidc_api_modules-1.0.0rc0.dist-info/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
|
22
|
-
nci_cidc_api_modules-1.0.0rc0.dist-info/METADATA,sha256=JHO1daZNVK3D6UgZll7UHu8ROVVPUCeTWQxVad-Bp-s,37983
|
23
|
-
nci_cidc_api_modules-1.0.0rc0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
24
|
-
nci_cidc_api_modules-1.0.0rc0.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
|
25
|
-
nci_cidc_api_modules-1.0.0rc0.dist-info/RECORD,,
|
File without changes
|
{nci_cidc_api_modules-1.0.0rc0.dist-info → nci_cidc_api_modules-1.0.1.dist-info}/top_level.txt
RENAMED
File without changes
|