nci-cidc-api-modules 1.1.30__py3-none-any.whl → 1.1.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cidc_api/config/settings.py +1 -0
- cidc_api/models/models.py +68 -0
- cidc_api/shared/gcloud_client.py +76 -13
- {nci_cidc_api_modules-1.1.30.dist-info → nci_cidc_api_modules-1.1.31.dist-info}/METADATA +2 -2
- {nci_cidc_api_modules-1.1.30.dist-info → nci_cidc_api_modules-1.1.31.dist-info}/RECORD +8 -8
- {nci_cidc_api_modules-1.1.30.dist-info → nci_cidc_api_modules-1.1.31.dist-info}/WHEEL +0 -0
- {nci_cidc_api_modules-1.1.30.dist-info → nci_cidc_api_modules-1.1.31.dist-info}/licenses/LICENSE +0 -0
- {nci_cidc_api_modules-1.1.30.dist-info → nci_cidc_api_modules-1.1.31.dist-info}/top_level.txt +0 -0
cidc_api/config/settings.py
CHANGED
@@ -70,6 +70,7 @@ GOOGLE_INTAKE_BUCKET = environ["GOOGLE_INTAKE_BUCKET"]
|
|
70
70
|
GOOGLE_UPLOAD_BUCKET = environ["GOOGLE_UPLOAD_BUCKET"]
|
71
71
|
GOOGLE_UPLOAD_TOPIC = environ["GOOGLE_UPLOAD_TOPIC"]
|
72
72
|
GOOGLE_ACL_DATA_BUCKET = environ["GOOGLE_ACL_DATA_BUCKET"]
|
73
|
+
GOOGLE_CLINICAL_DATA_BUCKET = environ["GOOGLE_CLINICAL_DATA_BUCKET"]
|
73
74
|
GOOGLE_EPHEMERAL_BUCKET = environ["GOOGLE_EPHEMERAL_BUCKET"]
|
74
75
|
GOOGLE_UPLOAD_ROLE = environ["GOOGLE_UPLOAD_ROLE"]
|
75
76
|
GOOGLE_LISTER_ROLE = environ["GOOGLE_LISTER_ROLE"]
|
cidc_api/models/models.py
CHANGED
@@ -22,6 +22,7 @@ __all__ = [
|
|
22
22
|
"Users",
|
23
23
|
"ValidationMultiError",
|
24
24
|
"with_default_session",
|
25
|
+
"PreprocessedFiles",
|
25
26
|
]
|
26
27
|
|
27
28
|
import hashlib
|
@@ -3184,3 +3185,70 @@ def upload_manifest_json(
|
|
3184
3185
|
# Publish that a manifest upload has been received
|
3185
3186
|
publish_patient_sample_update(manifest_upload.id)
|
3186
3187
|
return manifest_upload.id
|
3188
|
+
|
3189
|
+
|
3190
|
+
class PreprocessedFiles(CommonColumns):
|
3191
|
+
__tablename__ = "preprocessed_files"
|
3192
|
+
|
3193
|
+
file_name = Column(String)
|
3194
|
+
object_url = Column(String)
|
3195
|
+
trial_id = Column(String)
|
3196
|
+
file_category = Column(String)
|
3197
|
+
uploader_email = Column(String)
|
3198
|
+
status = Column(String)
|
3199
|
+
version = Column(Integer)
|
3200
|
+
released_version = Column(String)
|
3201
|
+
|
3202
|
+
@staticmethod
|
3203
|
+
@with_default_session
|
3204
|
+
def create(
|
3205
|
+
file_name: str,
|
3206
|
+
object_url: str,
|
3207
|
+
file_category: str,
|
3208
|
+
uploader_email: str,
|
3209
|
+
status: str = "pending",
|
3210
|
+
trial_id: str = None,
|
3211
|
+
version: int = None,
|
3212
|
+
released_version: str = None,
|
3213
|
+
session: Session = None,
|
3214
|
+
):
|
3215
|
+
"""Create and insert a new PreprocessedFiles record."""
|
3216
|
+
new_file = PreprocessedFiles(
|
3217
|
+
file_name=file_name,
|
3218
|
+
object_url=object_url,
|
3219
|
+
file_category=file_category,
|
3220
|
+
uploader_email=uploader_email,
|
3221
|
+
status=status,
|
3222
|
+
trial_id=trial_id,
|
3223
|
+
version=version,
|
3224
|
+
released_version=released_version,
|
3225
|
+
)
|
3226
|
+
new_file.insert(session=session)
|
3227
|
+
return new_file
|
3228
|
+
|
3229
|
+
@classmethod
|
3230
|
+
@with_default_session
|
3231
|
+
def archive_current_files(cls, file_category: str, session: Session = None):
|
3232
|
+
"""Update any 'current' files in the given category to 'archived'."""
|
3233
|
+
current_files = cls.get_files_by_category_and_status(file_category, "current", session=session)
|
3234
|
+
for file in current_files:
|
3235
|
+
file.status = "archived"
|
3236
|
+
file._updated = datetime.now()
|
3237
|
+
session.commit()
|
3238
|
+
|
3239
|
+
@classmethod
|
3240
|
+
@with_default_session
|
3241
|
+
def delete_pending_files_by_category(cls, file_category: str, session: Session = None):
|
3242
|
+
"""Delete all pending files matching given file_category."""
|
3243
|
+
records = cls.get_files_by_category_and_status(file_category, "pending", session=session)
|
3244
|
+
for record in records:
|
3245
|
+
session.delete(record)
|
3246
|
+
session.commit()
|
3247
|
+
|
3248
|
+
@classmethod
|
3249
|
+
@with_default_session
|
3250
|
+
def get_files_by_category_and_status(
|
3251
|
+
cls, file_category: str, status: str, session: Session = None
|
3252
|
+
) -> list["PreprocessedFiles"]:
|
3253
|
+
"""Return all files matching given file_category and status."""
|
3254
|
+
return session.query(cls).filter_by(file_category=file_category, status=status).all()
|
cidc_api/shared/gcloud_client.py
CHANGED
@@ -2,15 +2,16 @@
|
|
2
2
|
|
3
3
|
# pylint: disable=logging-fstring-interpolation
|
4
4
|
|
5
|
-
import json
|
6
|
-
import os
|
7
|
-
from os import environ
|
8
5
|
import base64
|
9
6
|
import datetime
|
10
|
-
import warnings
|
11
7
|
import hashlib
|
8
|
+
import io
|
9
|
+
import json
|
10
|
+
import os
|
11
|
+
import warnings
|
12
12
|
from collections import namedtuple
|
13
13
|
from concurrent.futures import Future
|
14
|
+
from os import environ
|
14
15
|
from typing import (
|
15
16
|
Any,
|
16
17
|
BinaryIO,
|
@@ -23,19 +24,20 @@ from typing import (
|
|
23
24
|
Union,
|
24
25
|
)
|
25
26
|
|
26
|
-
|
27
|
-
|
27
|
+
import googleapiclient.discovery
|
28
|
+
import requests
|
29
|
+
from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
|
30
|
+
from google.api_core.client_options import ClientOptions
|
31
|
+
from google.api_core.iam import Policy
|
28
32
|
from google.cloud import storage, pubsub, bigquery
|
29
33
|
from google.cloud.bigquery.enums import EntityTypes
|
30
34
|
from google.oauth2.service_account import Credentials
|
31
|
-
from
|
32
|
-
from
|
33
|
-
import
|
34
|
-
import requests
|
35
|
+
from sqlalchemy.orm.session import Session
|
36
|
+
from werkzeug.datastructures import FileStorage
|
37
|
+
from werkzeug.utils import secure_filename
|
35
38
|
|
36
|
-
from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
|
37
39
|
from cidc_api.config.secrets import get_secrets_manager
|
38
|
-
|
40
|
+
from ..config.logging import get_logger
|
39
41
|
from ..config.settings import (
|
40
42
|
DEV_USE_GCS,
|
41
43
|
GOOGLE_INTAKE_ROLE,
|
@@ -57,7 +59,6 @@ from ..config.settings import (
|
|
57
59
|
DEV_CFUNCTIONS_SERVER,
|
58
60
|
INACTIVE_USER_DAYS,
|
59
61
|
)
|
60
|
-
from ..config.logging import get_logger
|
61
62
|
|
62
63
|
os.environ["TZ"] = "UTC"
|
63
64
|
logger = get_logger(__name__)
|
@@ -216,6 +217,68 @@ def upload_xlsx_to_gcs(
|
|
216
217
|
return final_object
|
217
218
|
|
218
219
|
|
220
|
+
def upload_file_to_gcs(file: FileStorage, bucket_name: str, gcs_folder: str) -> str:
|
221
|
+
"""Upload a file to the specified GCS folder and return the GCS path from the bucket."""
|
222
|
+
# Secure the filename and prepare file
|
223
|
+
filename = secure_filename(file.filename)
|
224
|
+
gcs_file_path = os.path.join(gcs_folder, filename)
|
225
|
+
binary_file = io.BytesIO(file.read())
|
226
|
+
|
227
|
+
if ENV == "dev" and not DEV_USE_GCS:
|
228
|
+
logger.info(f"Would've saved {gcs_file_path} to {bucket_name}")
|
229
|
+
return gcs_file_path
|
230
|
+
|
231
|
+
# Upload to GCS
|
232
|
+
blob = _get_bucket(bucket_name).blob(gcs_file_path)
|
233
|
+
blob.upload_from_file(binary_file, content_type=file.content_type)
|
234
|
+
|
235
|
+
return gcs_file_path
|
236
|
+
|
237
|
+
|
238
|
+
def move_gcs_file(bucket_name: str, existing_path: str, to_folder: str, append_timestamp: bool = True) -> str:
|
239
|
+
"""Move a file within a GCS bucket to a new folder, optionally appending a timestamp to the filename."""
|
240
|
+
bucket = _get_bucket(bucket_name)
|
241
|
+
filename = os.path.basename(existing_path)
|
242
|
+
if append_timestamp:
|
243
|
+
filename = _append_iso_timestamp_to_filename(filename)
|
244
|
+
# Ensure trailing slash on folder
|
245
|
+
if not to_folder.endswith("/"):
|
246
|
+
to_folder += "/"
|
247
|
+
new_gcs_file_path = f"{to_folder}{filename}"
|
248
|
+
|
249
|
+
if ENV == "dev" and not DEV_USE_GCS:
|
250
|
+
logger.info(f"Would've moved {existing_path} to {new_gcs_file_path} in {bucket_name}")
|
251
|
+
return new_gcs_file_path
|
252
|
+
|
253
|
+
source_blob = bucket.blob(existing_path)
|
254
|
+
if not source_blob.exists():
|
255
|
+
raise Exception("Expected file not found in GCS")
|
256
|
+
new_blob = bucket.blob(new_gcs_file_path)
|
257
|
+
# GCS move = rewrite + delete
|
258
|
+
new_blob.rewrite(source_blob)
|
259
|
+
source_blob.delete()
|
260
|
+
|
261
|
+
return new_gcs_file_path
|
262
|
+
|
263
|
+
|
264
|
+
def delete_items_from_folder(bucket_name: str, folder: str):
|
265
|
+
"""Deletes all blobs from the specified folder in the specified bucket."""
|
266
|
+
bucket = _get_bucket(bucket_name)
|
267
|
+
if ENV == "dev" and not DEV_USE_GCS:
|
268
|
+
logger.info(f"Would've deleted file(s) from {folder} in {bucket_name}")
|
269
|
+
return
|
270
|
+
existing_blobs = bucket.list_blobs(prefix=folder)
|
271
|
+
for blob in existing_blobs:
|
272
|
+
blob.delete()
|
273
|
+
|
274
|
+
|
275
|
+
def _append_iso_timestamp_to_filename(filename: str) -> str:
|
276
|
+
"""Append an ISO 8601 timestamp to a filename, preserving its extension."""
|
277
|
+
base, ext = os.path.splitext(filename)
|
278
|
+
timestamp = datetime.datetime.now().isoformat(timespec="milliseconds").replace(":", "-")
|
279
|
+
return f"{base}_{timestamp}{ext}"
|
280
|
+
|
281
|
+
|
219
282
|
def grant_lister_access(user_email: str) -> None:
|
220
283
|
"""
|
221
284
|
Grant a user list access to the GOOGLE_ACL_DATA_BUCKET. List access is
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nci_cidc_api_modules
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.31
|
4
4
|
Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
|
5
5
|
Home-page: https://github.com/NCI-CIDC/cidc-api-gae
|
6
6
|
License: MIT license
|
@@ -28,7 +28,7 @@ Requires-Dist: python-dotenv==0.10.3
|
|
28
28
|
Requires-Dist: requests==2.32.3
|
29
29
|
Requires-Dist: jinja2==3.1.6
|
30
30
|
Requires-Dist: certifi==2024.7.4
|
31
|
-
Requires-Dist: nci-cidc-schemas==0.27.
|
31
|
+
Requires-Dist: nci-cidc-schemas==0.27.21
|
32
32
|
Dynamic: description
|
33
33
|
Dynamic: description-content-type
|
34
34
|
Dynamic: home-page
|
@@ -2,10 +2,10 @@ cidc_api/config/__init__.py,sha256=5mX8GAPxUKV84iS-aGOoE-4m68LsOCGCDptXNdlgvj0,1
|
|
2
2
|
cidc_api/config/db.py,sha256=cyWhWtmXha4OsrwUf6ez8aKSfm7tPSmPDE9JVSBx3Fk,1935
|
3
3
|
cidc_api/config/logging.py,sha256=abhVYtn8lfhIt0tyV2WHFgSmp_s2eeJh7kodB6LH4J0,1149
|
4
4
|
cidc_api/config/secrets.py,sha256=jRFj7W43pWuPf9DZQLCKF7WPXf5cUv-BAaS3ASqhV_Q,1481
|
5
|
-
cidc_api/config/settings.py,sha256=
|
5
|
+
cidc_api/config/settings.py,sha256=mA-4r7oB60uFepYtl5abbPigjwX8aBz__qCJXdcWWbs,4272
|
6
6
|
cidc_api/models/__init__.py,sha256=bl445G8Zic9YbhZ8ZBni07wtBMhLJRMBA-JqjLxx2bw,66
|
7
7
|
cidc_api/models/migrations.py,sha256=gp9vtkYbA9FFy2s-7woelAmsvQbJ41LO2_DY-YkFIrQ,11464
|
8
|
-
cidc_api/models/models.py,sha256=
|
8
|
+
cidc_api/models/models.py,sha256=JAvKhX2VnbhavfPkGelBIa3M8Qi6JaFKvydJmmvqZ1U,131795
|
9
9
|
cidc_api/models/schemas.py,sha256=7tDYtmULuzTt2kg7RorWhte06ffalgpQKrFiDRGcPEQ,2711
|
10
10
|
cidc_api/models/files/__init__.py,sha256=8BMTnUSHzUbz0lBeEQY6NvApxDD3GMWMduoVMos2g4Y,213
|
11
11
|
cidc_api/models/files/details.py,sha256=WrWPxJqlsteinoNbGTaQ3fcxgvChqLGJ9vY7H829jtk,62842
|
@@ -13,11 +13,11 @@ cidc_api/models/files/facets.py,sha256=JqCmwcjYYSz7XK4bAokSE9i71C8t9EQ4Jtbv7npth
|
|
13
13
|
cidc_api/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
cidc_api/shared/auth.py,sha256=PHqmVGkqDjbmUofytVFwD_9ssgUomESl3fFtFHPwZYQ,9062
|
15
15
|
cidc_api/shared/emails.py,sha256=GY-l0EkoVU_3hjV0g-xo7N9d1iyCdluyq_arftEPPe0,4989
|
16
|
-
cidc_api/shared/gcloud_client.py,sha256=
|
16
|
+
cidc_api/shared/gcloud_client.py,sha256=ko-3kGRyHI0RcIgoA0r_qVZUcDUGjd9t6-5pXJc3A7s,35634
|
17
17
|
cidc_api/shared/jose.py,sha256=-qzGzEDAlokEp9E7WtBtQkXyyfPWTYXlwYpCqVJWmqM,1830
|
18
18
|
cidc_api/shared/rest_utils.py,sha256=RwR30WOUAYCxL7V-i2totEyeriG30GbBDvBcpLXhM9w,6594
|
19
|
-
nci_cidc_api_modules-1.1.
|
20
|
-
nci_cidc_api_modules-1.1.
|
21
|
-
nci_cidc_api_modules-1.1.
|
22
|
-
nci_cidc_api_modules-1.1.
|
23
|
-
nci_cidc_api_modules-1.1.
|
19
|
+
nci_cidc_api_modules-1.1.31.dist-info/licenses/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
|
20
|
+
nci_cidc_api_modules-1.1.31.dist-info/METADATA,sha256=92LzhqUCRQiUvKbuj0Gs_lZ3OrmPBzBHfrNZQ_EjrwQ,41285
|
21
|
+
nci_cidc_api_modules-1.1.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
22
|
+
nci_cidc_api_modules-1.1.31.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
|
23
|
+
nci_cidc_api_modules-1.1.31.dist-info/RECORD,,
|
File without changes
|
{nci_cidc_api_modules-1.1.30.dist-info → nci_cidc_api_modules-1.1.31.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
{nci_cidc_api_modules-1.1.30.dist-info → nci_cidc_api_modules-1.1.31.dist-info}/top_level.txt
RENAMED
File without changes
|