nci-cidc-api-modules 1.1.30__py3-none-any.whl → 1.1.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -70,6 +70,7 @@ GOOGLE_INTAKE_BUCKET = environ["GOOGLE_INTAKE_BUCKET"]
70
70
  GOOGLE_UPLOAD_BUCKET = environ["GOOGLE_UPLOAD_BUCKET"]
71
71
  GOOGLE_UPLOAD_TOPIC = environ["GOOGLE_UPLOAD_TOPIC"]
72
72
  GOOGLE_ACL_DATA_BUCKET = environ["GOOGLE_ACL_DATA_BUCKET"]
73
+ GOOGLE_CLINICAL_DATA_BUCKET = environ["GOOGLE_CLINICAL_DATA_BUCKET"]
73
74
  GOOGLE_EPHEMERAL_BUCKET = environ["GOOGLE_EPHEMERAL_BUCKET"]
74
75
  GOOGLE_UPLOAD_ROLE = environ["GOOGLE_UPLOAD_ROLE"]
75
76
  GOOGLE_LISTER_ROLE = environ["GOOGLE_LISTER_ROLE"]
cidc_api/models/models.py CHANGED
@@ -22,6 +22,7 @@ __all__ = [
22
22
  "Users",
23
23
  "ValidationMultiError",
24
24
  "with_default_session",
25
+ "PreprocessedFiles",
25
26
  ]
26
27
 
27
28
  import hashlib
@@ -3184,3 +3185,73 @@ def upload_manifest_json(
3184
3185
  # Publish that a manifest upload has been received
3185
3186
  publish_patient_sample_update(manifest_upload.id)
3186
3187
  return manifest_upload.id
3188
+
3189
+
3190
+ class PreprocessedFiles(CommonColumns):
3191
+ __tablename__ = "preprocessed_files"
3192
+
3193
+ file_name = Column(String)
3194
+ object_url = Column(String)
3195
+ trial_id = Column(String)
3196
+ file_category = Column(String)
3197
+ uploader_email = Column(String)
3198
+ status = Column(String)
3199
+ version = Column(Integer)
3200
+ released_version = Column(String)
3201
+
3202
+ @staticmethod
3203
+ @with_default_session
3204
+ def create(
3205
+ file_name: str,
3206
+ object_url: str,
3207
+ file_category: str,
3208
+ uploader_email: str,
3209
+ status: str = "pending",
3210
+ trial_id: str = None,
3211
+ version: int = None,
3212
+ released_version: str = None,
3213
+ session: Session = None,
3214
+ ):
3215
+ """Create and insert a new PreprocessedFiles record."""
3216
+ new_file = PreprocessedFiles(
3217
+ file_name=file_name,
3218
+ object_url=object_url,
3219
+ file_category=file_category,
3220
+ uploader_email=uploader_email,
3221
+ status=status,
3222
+ trial_id=trial_id,
3223
+ version=version,
3224
+ released_version=released_version,
3225
+ )
3226
+ new_file.insert(session=session)
3227
+ return new_file
3228
+
3229
+ @classmethod
3230
+ @with_default_session
3231
+ def archive_current_files(cls, file_category: str, session: Session = None):
3232
+ """Update any 'current' files in the given category to 'archived'. Returns latest existing version number."""
3233
+ current_version = 0
3234
+ current_files = cls.get_files_by_category_and_status(file_category, "current", session=session)
3235
+ for file in current_files:
3236
+ file.status = "archived"
3237
+ file._updated = datetime.now()
3238
+ current_version = file.version if file.version > current_version else current_version
3239
+ session.commit()
3240
+ return current_version
3241
+
3242
+ @classmethod
3243
+ @with_default_session
3244
+ def delete_pending_files_by_category(cls, file_category: str, session: Session = None):
3245
+ """Delete all pending files matching given file_category."""
3246
+ records = cls.get_files_by_category_and_status(file_category, "pending", session=session)
3247
+ for record in records:
3248
+ session.delete(record)
3249
+ session.commit()
3250
+
3251
+ @classmethod
3252
+ @with_default_session
3253
+ def get_files_by_category_and_status(
3254
+ cls, file_category: str, status: str, session: Session = None
3255
+ ) -> list["PreprocessedFiles"]:
3256
+ """Return all files matching given file_category and status."""
3257
+ return session.query(cls).filter_by(file_category=file_category, status=status).all()
@@ -2,15 +2,16 @@
2
2
 
3
3
  # pylint: disable=logging-fstring-interpolation
4
4
 
5
- import json
6
- import os
7
- from os import environ
8
5
  import base64
9
6
  import datetime
10
- import warnings
11
7
  import hashlib
8
+ import io
9
+ import json
10
+ import os
11
+ import warnings
12
12
  from collections import namedtuple
13
13
  from concurrent.futures import Future
14
+ from os import environ
14
15
  from typing import (
15
16
  Any,
16
17
  BinaryIO,
@@ -23,19 +24,20 @@ from typing import (
23
24
  Union,
24
25
  )
25
26
 
26
- from werkzeug.datastructures import FileStorage
27
- from sqlalchemy.orm.session import Session
27
+ import googleapiclient.discovery
28
+ import requests
29
+ from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
30
+ from google.api_core.client_options import ClientOptions
31
+ from google.api_core.iam import Policy
28
32
  from google.cloud import storage, pubsub, bigquery
29
33
  from google.cloud.bigquery.enums import EntityTypes
30
34
  from google.oauth2.service_account import Credentials
31
- from google.api_core.iam import Policy
32
- from google.api_core.client_options import ClientOptions
33
- import googleapiclient.discovery
34
- import requests
35
+ from sqlalchemy.orm.session import Session
36
+ from werkzeug.datastructures import FileStorage
37
+ from werkzeug.utils import secure_filename
35
38
 
36
- from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
37
39
  from cidc_api.config.secrets import get_secrets_manager
38
-
40
+ from ..config.logging import get_logger
39
41
  from ..config.settings import (
40
42
  DEV_USE_GCS,
41
43
  GOOGLE_INTAKE_ROLE,
@@ -57,7 +59,6 @@ from ..config.settings import (
57
59
  DEV_CFUNCTIONS_SERVER,
58
60
  INACTIVE_USER_DAYS,
59
61
  )
60
- from ..config.logging import get_logger
61
62
 
62
63
  os.environ["TZ"] = "UTC"
63
64
  logger = get_logger(__name__)
@@ -216,6 +217,68 @@ def upload_xlsx_to_gcs(
216
217
  return final_object
217
218
 
218
219
 
220
+ def upload_file_to_gcs(file: FileStorage, bucket_name: str, gcs_folder: str) -> str:
221
+ """Upload a file to the specified GCS folder and return the GCS path from the bucket."""
222
+ # Secure the filename and prepare file
223
+ filename = secure_filename(file.filename)
224
+ gcs_file_path = os.path.join(gcs_folder, filename)
225
+ binary_file = io.BytesIO(file.read())
226
+
227
+ if ENV == "dev" and not DEV_USE_GCS:
228
+ logger.info(f"Would've saved {gcs_file_path} to {bucket_name}")
229
+ return gcs_file_path
230
+
231
+ # Upload to GCS
232
+ blob = _get_bucket(bucket_name).blob(gcs_file_path)
233
+ blob.upload_from_file(binary_file, content_type=file.content_type)
234
+
235
+ return gcs_file_path
236
+
237
+
238
+ def move_gcs_file(bucket_name: str, existing_path: str, to_folder: str, append_timestamp: bool = True) -> str:
239
+ """Move a file within a GCS bucket to a new folder, optionally appending a timestamp to the filename."""
240
+ filename = os.path.basename(existing_path)
241
+ if append_timestamp:
242
+ filename = _append_iso_timestamp_to_filename(filename)
243
+ # Ensure trailing slash on folder
244
+ if not to_folder.endswith("/"):
245
+ to_folder += "/"
246
+ new_gcs_file_path = f"{to_folder}{filename}"
247
+
248
+ if ENV == "dev" and not DEV_USE_GCS:
249
+ logger.info(f"Would've moved {existing_path} to {new_gcs_file_path} in {bucket_name}")
250
+ return new_gcs_file_path
251
+
252
+ bucket = _get_bucket(bucket_name)
253
+ source_blob = bucket.blob(existing_path)
254
+ if not source_blob.exists():
255
+ raise Exception("Expected file not found in GCS")
256
+ new_blob = bucket.blob(new_gcs_file_path)
257
+ # GCS move = rewrite + delete
258
+ new_blob.rewrite(source_blob)
259
+ source_blob.delete()
260
+
261
+ return new_gcs_file_path
262
+
263
+
264
+ def delete_items_from_folder(bucket_name: str, folder: str):
265
+ """Deletes all blobs from the specified folder in the specified bucket."""
266
+ if ENV == "dev" and not DEV_USE_GCS:
267
+ logger.info(f"Would've deleted file(s) from {folder} in {bucket_name}")
268
+ return
269
+ bucket = _get_bucket(bucket_name)
270
+ existing_blobs = bucket.list_blobs(prefix=folder)
271
+ for blob in existing_blobs:
272
+ blob.delete()
273
+
274
+
275
+ def _append_iso_timestamp_to_filename(filename: str) -> str:
276
+ """Append an ISO 8601 timestamp to a filename, preserving its extension."""
277
+ base, ext = os.path.splitext(filename)
278
+ timestamp = datetime.datetime.now().isoformat(timespec="milliseconds").replace(":", "-")
279
+ return f"{base}_{timestamp}{ext}"
280
+
281
+
219
282
  def grant_lister_access(user_email: str) -> None:
220
283
  """
221
284
  Grant a user list access to the GOOGLE_ACL_DATA_BUCKET. List access is
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.1.30
3
+ Version: 1.1.32
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -25,10 +25,10 @@ Requires-Dist: pyarrow==14.0.1
25
25
  Requires-Dist: numpy<2,>=1.16.5
26
26
  Requires-Dist: pandas==1.5.3
27
27
  Requires-Dist: python-dotenv==0.10.3
28
- Requires-Dist: requests==2.32.3
28
+ Requires-Dist: requests==2.32.4
29
29
  Requires-Dist: jinja2==3.1.6
30
30
  Requires-Dist: certifi==2024.7.4
31
- Requires-Dist: nci-cidc-schemas==0.27.20
31
+ Requires-Dist: nci-cidc-schemas==0.27.22
32
32
  Dynamic: description
33
33
  Dynamic: description-content-type
34
34
  Dynamic: home-page
@@ -2,10 +2,10 @@ cidc_api/config/__init__.py,sha256=5mX8GAPxUKV84iS-aGOoE-4m68LsOCGCDptXNdlgvj0,1
2
2
  cidc_api/config/db.py,sha256=cyWhWtmXha4OsrwUf6ez8aKSfm7tPSmPDE9JVSBx3Fk,1935
3
3
  cidc_api/config/logging.py,sha256=abhVYtn8lfhIt0tyV2WHFgSmp_s2eeJh7kodB6LH4J0,1149
4
4
  cidc_api/config/secrets.py,sha256=jRFj7W43pWuPf9DZQLCKF7WPXf5cUv-BAaS3ASqhV_Q,1481
5
- cidc_api/config/settings.py,sha256=fJQIaCfxsuooEi1pAO8FhHurN0BjP6FZKX8jl7uHGZM,4203
5
+ cidc_api/config/settings.py,sha256=mA-4r7oB60uFepYtl5abbPigjwX8aBz__qCJXdcWWbs,4272
6
6
  cidc_api/models/__init__.py,sha256=bl445G8Zic9YbhZ8ZBni07wtBMhLJRMBA-JqjLxx2bw,66
7
7
  cidc_api/models/migrations.py,sha256=gp9vtkYbA9FFy2s-7woelAmsvQbJ41LO2_DY-YkFIrQ,11464
8
- cidc_api/models/models.py,sha256=HBXb5228CeUInaaKOXYBcPz-T9pfwULz_7BaSyJmNDI,129427
8
+ cidc_api/models/models.py,sha256=WU9nCAyYJEi1_TWyjSCrJ8XX1btPNa2xO7tpDWGccdU,131992
9
9
  cidc_api/models/schemas.py,sha256=7tDYtmULuzTt2kg7RorWhte06ffalgpQKrFiDRGcPEQ,2711
10
10
  cidc_api/models/files/__init__.py,sha256=8BMTnUSHzUbz0lBeEQY6NvApxDD3GMWMduoVMos2g4Y,213
11
11
  cidc_api/models/files/details.py,sha256=WrWPxJqlsteinoNbGTaQ3fcxgvChqLGJ9vY7H829jtk,62842
@@ -13,11 +13,11 @@ cidc_api/models/files/facets.py,sha256=JqCmwcjYYSz7XK4bAokSE9i71C8t9EQ4Jtbv7npth
13
13
  cidc_api/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  cidc_api/shared/auth.py,sha256=PHqmVGkqDjbmUofytVFwD_9ssgUomESl3fFtFHPwZYQ,9062
15
15
  cidc_api/shared/emails.py,sha256=GY-l0EkoVU_3hjV0g-xo7N9d1iyCdluyq_arftEPPe0,4989
16
- cidc_api/shared/gcloud_client.py,sha256=i4ZZLoDC_pEwKaMS8218uUJ0fsIi0DKwd-hzGHGQw7g,33139
16
+ cidc_api/shared/gcloud_client.py,sha256=oD7Y3Glp1ZrPYkonv7DvX1koGtF30lgm3ENXYQ7G5cI,35634
17
17
  cidc_api/shared/jose.py,sha256=-qzGzEDAlokEp9E7WtBtQkXyyfPWTYXlwYpCqVJWmqM,1830
18
18
  cidc_api/shared/rest_utils.py,sha256=RwR30WOUAYCxL7V-i2totEyeriG30GbBDvBcpLXhM9w,6594
19
- nci_cidc_api_modules-1.1.30.dist-info/licenses/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
20
- nci_cidc_api_modules-1.1.30.dist-info/METADATA,sha256=s378K4KxeTTwVZuypJg2yiWUSO3jBN9zqeMwsuQ06sU,41285
21
- nci_cidc_api_modules-1.1.30.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
- nci_cidc_api_modules-1.1.30.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
23
- nci_cidc_api_modules-1.1.30.dist-info/RECORD,,
19
+ nci_cidc_api_modules-1.1.32.dist-info/licenses/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
20
+ nci_cidc_api_modules-1.1.32.dist-info/METADATA,sha256=K5SKeS-yZim2JWbhfsbrFo7G2vSegE4uYk5xy3lgtIA,41285
21
+ nci_cidc_api_modules-1.1.32.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
22
+ nci_cidc_api_modules-1.1.32.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
23
+ nci_cidc_api_modules-1.1.32.dist-info/RECORD,,