nci-cidc-api-modules 1.1.29__tar.gz → 1.1.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {nci_cidc_api_modules-1.1.29/nci_cidc_api_modules.egg-info → nci_cidc_api_modules-1.1.31}/PKG-INFO +2 -2
  2. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/config/settings.py +1 -0
  3. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/models/models.py +68 -0
  4. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/shared/gcloud_client.py +76 -13
  5. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31/nci_cidc_api_modules.egg-info}/PKG-INFO +2 -2
  6. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/nci_cidc_api_modules.egg-info/SOURCES.txt +0 -3
  7. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/nci_cidc_api_modules.egg-info/requires.txt +1 -1
  8. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/requirements.modules.txt +1 -1
  9. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/setup.py +0 -1
  10. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/tests/test_api.py +2 -2
  11. nci_cidc_api_modules-1.1.29/cidc_api/csms/__init__.py +0 -1
  12. nci_cidc_api_modules-1.1.29/cidc_api/csms/auth.py +0 -105
  13. nci_cidc_api_modules-1.1.29/cidc_api/models/csms_api.py +0 -872
  14. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/LICENSE +0 -0
  15. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/MANIFEST.in +0 -0
  16. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/README.md +0 -0
  17. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/config/__init__.py +0 -0
  18. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/config/db.py +0 -0
  19. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/config/logging.py +0 -0
  20. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/config/secrets.py +0 -0
  21. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/models/__init__.py +0 -0
  22. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/models/files/__init__.py +0 -0
  23. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/models/files/details.py +0 -0
  24. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/models/files/facets.py +0 -0
  25. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/models/migrations.py +0 -0
  26. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/models/schemas.py +0 -0
  27. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/shared/__init__.py +0 -0
  28. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/shared/auth.py +0 -0
  29. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/shared/emails.py +0 -0
  30. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/shared/jose.py +0 -0
  31. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/cidc_api/shared/rest_utils.py +0 -0
  32. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/nci_cidc_api_modules.egg-info/dependency_links.txt +0 -0
  33. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/nci_cidc_api_modules.egg-info/not-zip-safe +0 -0
  34. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/nci_cidc_api_modules.egg-info/top_level.txt +0 -0
  35. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/pyproject.toml +0 -0
  36. {nci_cidc_api_modules-1.1.29 → nci_cidc_api_modules-1.1.31}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.1.29
3
+ Version: 1.1.31
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -28,7 +28,7 @@ Requires-Dist: python-dotenv==0.10.3
28
28
  Requires-Dist: requests==2.32.3
29
29
  Requires-Dist: jinja2==3.1.6
30
30
  Requires-Dist: certifi==2024.7.4
31
- Requires-Dist: nci-cidc-schemas==0.27.19
31
+ Requires-Dist: nci-cidc-schemas==0.27.21
32
32
  Dynamic: description
33
33
  Dynamic: description-content-type
34
34
  Dynamic: home-page
@@ -70,6 +70,7 @@ GOOGLE_INTAKE_BUCKET = environ["GOOGLE_INTAKE_BUCKET"]
70
70
  GOOGLE_UPLOAD_BUCKET = environ["GOOGLE_UPLOAD_BUCKET"]
71
71
  GOOGLE_UPLOAD_TOPIC = environ["GOOGLE_UPLOAD_TOPIC"]
72
72
  GOOGLE_ACL_DATA_BUCKET = environ["GOOGLE_ACL_DATA_BUCKET"]
73
+ GOOGLE_CLINICAL_DATA_BUCKET = environ["GOOGLE_CLINICAL_DATA_BUCKET"]
73
74
  GOOGLE_EPHEMERAL_BUCKET = environ["GOOGLE_EPHEMERAL_BUCKET"]
74
75
  GOOGLE_UPLOAD_ROLE = environ["GOOGLE_UPLOAD_ROLE"]
75
76
  GOOGLE_LISTER_ROLE = environ["GOOGLE_LISTER_ROLE"]
@@ -22,6 +22,7 @@ __all__ = [
22
22
  "Users",
23
23
  "ValidationMultiError",
24
24
  "with_default_session",
25
+ "PreprocessedFiles",
25
26
  ]
26
27
 
27
28
  import hashlib
@@ -3184,3 +3185,70 @@ def upload_manifest_json(
3184
3185
  # Publish that a manifest upload has been received
3185
3186
  publish_patient_sample_update(manifest_upload.id)
3186
3187
  return manifest_upload.id
3188
+
3189
+
3190
+ class PreprocessedFiles(CommonColumns):
3191
+ __tablename__ = "preprocessed_files"
3192
+
3193
+ file_name = Column(String)
3194
+ object_url = Column(String)
3195
+ trial_id = Column(String)
3196
+ file_category = Column(String)
3197
+ uploader_email = Column(String)
3198
+ status = Column(String)
3199
+ version = Column(Integer)
3200
+ released_version = Column(String)
3201
+
3202
+ @staticmethod
3203
+ @with_default_session
3204
+ def create(
3205
+ file_name: str,
3206
+ object_url: str,
3207
+ file_category: str,
3208
+ uploader_email: str,
3209
+ status: str = "pending",
3210
+ trial_id: str = None,
3211
+ version: int = None,
3212
+ released_version: str = None,
3213
+ session: Session = None,
3214
+ ):
3215
+ """Create and insert a new PreprocessedFiles record."""
3216
+ new_file = PreprocessedFiles(
3217
+ file_name=file_name,
3218
+ object_url=object_url,
3219
+ file_category=file_category,
3220
+ uploader_email=uploader_email,
3221
+ status=status,
3222
+ trial_id=trial_id,
3223
+ version=version,
3224
+ released_version=released_version,
3225
+ )
3226
+ new_file.insert(session=session)
3227
+ return new_file
3228
+
3229
+ @classmethod
3230
+ @with_default_session
3231
+ def archive_current_files(cls, file_category: str, session: Session = None):
3232
+ """Update any 'current' files in the given category to 'archived'."""
3233
+ current_files = cls.get_files_by_category_and_status(file_category, "current", session=session)
3234
+ for file in current_files:
3235
+ file.status = "archived"
3236
+ file._updated = datetime.now()
3237
+ session.commit()
3238
+
3239
+ @classmethod
3240
+ @with_default_session
3241
+ def delete_pending_files_by_category(cls, file_category: str, session: Session = None):
3242
+ """Delete all pending files matching given file_category."""
3243
+ records = cls.get_files_by_category_and_status(file_category, "pending", session=session)
3244
+ for record in records:
3245
+ session.delete(record)
3246
+ session.commit()
3247
+
3248
+ @classmethod
3249
+ @with_default_session
3250
+ def get_files_by_category_and_status(
3251
+ cls, file_category: str, status: str, session: Session = None
3252
+ ) -> list["PreprocessedFiles"]:
3253
+ """Return all files matching given file_category and status."""
3254
+ return session.query(cls).filter_by(file_category=file_category, status=status).all()
@@ -2,15 +2,16 @@
2
2
 
3
3
  # pylint: disable=logging-fstring-interpolation
4
4
 
5
- import json
6
- import os
7
- from os import environ
8
5
  import base64
9
6
  import datetime
10
- import warnings
11
7
  import hashlib
8
+ import io
9
+ import json
10
+ import os
11
+ import warnings
12
12
  from collections import namedtuple
13
13
  from concurrent.futures import Future
14
+ from os import environ
14
15
  from typing import (
15
16
  Any,
16
17
  BinaryIO,
@@ -23,19 +24,20 @@ from typing import (
23
24
  Union,
24
25
  )
25
26
 
26
- from werkzeug.datastructures import FileStorage
27
- from sqlalchemy.orm.session import Session
27
+ import googleapiclient.discovery
28
+ import requests
29
+ from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
30
+ from google.api_core.client_options import ClientOptions
31
+ from google.api_core.iam import Policy
28
32
  from google.cloud import storage, pubsub, bigquery
29
33
  from google.cloud.bigquery.enums import EntityTypes
30
34
  from google.oauth2.service_account import Credentials
31
- from google.api_core.iam import Policy
32
- from google.api_core.client_options import ClientOptions
33
- import googleapiclient.discovery
34
- import requests
35
+ from sqlalchemy.orm.session import Session
36
+ from werkzeug.datastructures import FileStorage
37
+ from werkzeug.utils import secure_filename
35
38
 
36
- from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
37
39
  from cidc_api.config.secrets import get_secrets_manager
38
-
40
+ from ..config.logging import get_logger
39
41
  from ..config.settings import (
40
42
  DEV_USE_GCS,
41
43
  GOOGLE_INTAKE_ROLE,
@@ -57,7 +59,6 @@ from ..config.settings import (
57
59
  DEV_CFUNCTIONS_SERVER,
58
60
  INACTIVE_USER_DAYS,
59
61
  )
60
- from ..config.logging import get_logger
61
62
 
62
63
  os.environ["TZ"] = "UTC"
63
64
  logger = get_logger(__name__)
@@ -216,6 +217,68 @@ def upload_xlsx_to_gcs(
216
217
  return final_object
217
218
 
218
219
 
220
+ def upload_file_to_gcs(file: FileStorage, bucket_name: str, gcs_folder: str) -> str:
221
+ """Upload a file to the specified GCS folder and return the GCS path from the bucket."""
222
+ # Secure the filename and prepare file
223
+ filename = secure_filename(file.filename)
224
+ gcs_file_path = os.path.join(gcs_folder, filename)
225
+ binary_file = io.BytesIO(file.read())
226
+
227
+ if ENV == "dev" and not DEV_USE_GCS:
228
+ logger.info(f"Would've saved {gcs_file_path} to {bucket_name}")
229
+ return gcs_file_path
230
+
231
+ # Upload to GCS
232
+ blob = _get_bucket(bucket_name).blob(gcs_file_path)
233
+ blob.upload_from_file(binary_file, content_type=file.content_type)
234
+
235
+ return gcs_file_path
236
+
237
+
238
+ def move_gcs_file(bucket_name: str, existing_path: str, to_folder: str, append_timestamp: bool = True) -> str:
239
+ """Move a file within a GCS bucket to a new folder, optionally appending a timestamp to the filename."""
240
+ bucket = _get_bucket(bucket_name)
241
+ filename = os.path.basename(existing_path)
242
+ if append_timestamp:
243
+ filename = _append_iso_timestamp_to_filename(filename)
244
+ # Ensure trailing slash on folder
245
+ if not to_folder.endswith("/"):
246
+ to_folder += "/"
247
+ new_gcs_file_path = f"{to_folder}{filename}"
248
+
249
+ if ENV == "dev" and not DEV_USE_GCS:
250
+ logger.info(f"Would've moved {existing_path} to {new_gcs_file_path} in {bucket_name}")
251
+ return new_gcs_file_path
252
+
253
+ source_blob = bucket.blob(existing_path)
254
+ if not source_blob.exists():
255
+ raise Exception("Expected file not found in GCS")
256
+ new_blob = bucket.blob(new_gcs_file_path)
257
+ # GCS move = rewrite + delete
258
+ new_blob.rewrite(source_blob)
259
+ source_blob.delete()
260
+
261
+ return new_gcs_file_path
262
+
263
+
264
+ def delete_items_from_folder(bucket_name: str, folder: str):
265
+ """Deletes all blobs from the specified folder in the specified bucket."""
266
+ bucket = _get_bucket(bucket_name)
267
+ if ENV == "dev" and not DEV_USE_GCS:
268
+ logger.info(f"Would've deleted file(s) from {folder} in {bucket_name}")
269
+ return
270
+ existing_blobs = bucket.list_blobs(prefix=folder)
271
+ for blob in existing_blobs:
272
+ blob.delete()
273
+
274
+
275
+ def _append_iso_timestamp_to_filename(filename: str) -> str:
276
+ """Append an ISO 8601 timestamp to a filename, preserving its extension."""
277
+ base, ext = os.path.splitext(filename)
278
+ timestamp = datetime.datetime.now().isoformat(timespec="milliseconds").replace(":", "-")
279
+ return f"{base}_{timestamp}{ext}"
280
+
281
+
219
282
  def grant_lister_access(user_email: str) -> None:
220
283
  """
221
284
  Grant a user list access to the GOOGLE_ACL_DATA_BUCKET. List access is
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.1.29
3
+ Version: 1.1.31
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -28,7 +28,7 @@ Requires-Dist: python-dotenv==0.10.3
28
28
  Requires-Dist: requests==2.32.3
29
29
  Requires-Dist: jinja2==3.1.6
30
30
  Requires-Dist: certifi==2024.7.4
31
- Requires-Dist: nci-cidc-schemas==0.27.19
31
+ Requires-Dist: nci-cidc-schemas==0.27.21
32
32
  Dynamic: description
33
33
  Dynamic: description-content-type
34
34
  Dynamic: home-page
@@ -9,10 +9,7 @@ cidc_api/config/db.py
9
9
  cidc_api/config/logging.py
10
10
  cidc_api/config/secrets.py
11
11
  cidc_api/config/settings.py
12
- cidc_api/csms/__init__.py
13
- cidc_api/csms/auth.py
14
12
  cidc_api/models/__init__.py
15
- cidc_api/models/csms_api.py
16
13
  cidc_api/models/migrations.py
17
14
  cidc_api/models/models.py
18
15
  cidc_api/models/schemas.py
@@ -19,4 +19,4 @@ python-dotenv==0.10.3
19
19
  requests==2.32.3
20
20
  jinja2==3.1.6
21
21
  certifi==2024.7.4
22
- nci-cidc-schemas==0.27.19
22
+ nci-cidc-schemas==0.27.21
@@ -19,4 +19,4 @@ python-dotenv==0.10.3
19
19
  requests==2.32.3
20
20
  jinja2==3.1.6
21
21
  certifi==2024.7.4
22
- nci-cidc-schemas==0.27.19
22
+ nci-cidc-schemas==0.27.21
@@ -18,7 +18,6 @@ setup(
18
18
  license="MIT license",
19
19
  packages=[
20
20
  "cidc_api.config",
21
- "cidc_api.csms",
22
21
  "cidc_api.models",
23
22
  "cidc_api.shared",
24
23
  "cidc_api.models.files",
@@ -410,7 +410,8 @@ def test_endpoint_urls(cidc_api):
410
410
  """
411
411
  expected_endpoints = {
412
412
  "/",
413
- "/admin/test_csms",
413
+ "/clinical_data/files/master_appendix_a/pending",
414
+ "/clinical_data/jobs",
414
415
  "/downloadable_files/",
415
416
  "/downloadable_files/filelist",
416
417
  "/downloadable_files/compressed_batch",
@@ -437,7 +438,6 @@ def test_endpoint_urls(cidc_api):
437
438
  "/permissions/<int:permission>",
438
439
  "/samples/",
439
440
  "/trial_metadata/",
440
- "/trial_metadata/new_manifest",
441
441
  "/trial_metadata/summaries",
442
442
  "/trial_metadata/<string:trial>",
443
443
  "/trial_metadata/<string:trial>/refresh_trial",
@@ -1 +0,0 @@
1
- from .auth import *
@@ -1,105 +0,0 @@
1
- __all__ = ["get_token", "get_with_authorization", "get_with_paging"]
2
-
3
- import os
4
- from datetime import datetime, timedelta
5
- from typing import Any, Dict, Iterator
6
-
7
- import requests
8
-
9
- from ..config.settings import (
10
- CSMS_BASE_URL,
11
- CSMS_CLIENT_ID,
12
- CSMS_CLIENT_SECRET,
13
- CSMS_TOKEN_URL,
14
- )
15
-
16
- os.environ["TZ"] = "UTC"
17
-
18
- TIMEOUT_IN_SECONDS = 20
19
- _TOKEN, _TOKEN_EXPIRY = None, datetime.now()
20
-
21
-
22
- def get_token():
23
- global _TOKEN, _TOKEN_EXPIRY
24
- if not _TOKEN or datetime.now() >= _TOKEN_EXPIRY:
25
- res, time = (
26
- requests.post(
27
- CSMS_TOKEN_URL,
28
- headers={"Content-Type": "application/x-www-form-urlencoded"},
29
- data={
30
- "grant_type": "client_credentials",
31
- "client_id": CSMS_CLIENT_ID,
32
- "client_secret": CSMS_CLIENT_SECRET,
33
- },
34
- timeout=TIMEOUT_IN_SECONDS,
35
- ).json(),
36
- datetime.now(),
37
- )
38
-
39
- # res definition from https://developer.okta.com/docs/reference/api/oidc/#response-example-error-7
40
- if "errorCode" in res:
41
- raise RuntimeError(res["errorCode"] + ": " + res.get("errorSummary"))
42
-
43
- _TOKEN = res["access_token"]
44
- _TOKEN_EXPIRY = time + timedelta(seconds=res["expires_in"])
45
-
46
- return _TOKEN
47
-
48
-
49
- def get_with_authorization(url: str, **kwargs) -> requests.Response:
50
- """url should be fully valid or begin with `/` to be prefixed with CSMS_BASE_URL"""
51
- token = get_token()
52
- headers = {
53
- **kwargs.get("headers", {}),
54
- "Authorization": f"Bearer {token}",
55
- "accept": "*/*",
56
- }
57
- kwargs["headers"] = headers
58
- if not url.startswith(CSMS_BASE_URL):
59
- url = CSMS_BASE_URL + url
60
- return requests.get(
61
- url,
62
- **kwargs,
63
- timeout=TIMEOUT_IN_SECONDS,
64
- )
65
-
66
-
67
- def get_with_paging(url: str, limit: int = None, offset: int = 0, **kwargs) -> Iterator[Dict[str, Any]]:
68
- """
69
- Return an iterator of entries via get_with_authorization with handling for CSMS paging
70
-
71
- Parameters
72
- ----------
73
- url: str
74
- url should be fully valid or begin with `/` to be prefixed with CSMS_BASE_URL
75
- limit: int = None
76
- the number of records to return on each page
77
- default: 5000 for samples, 50 for manifests, 1 otherwise
78
- offset: int = 0
79
- which page to return, 0-indexed
80
- increments as needed to continue returning
81
-
82
- Raises
83
- ------
84
- requests.exceptions.HTTPError
85
- via res.raise_for_status()
86
- https://docs.python-requests.org/en/master/user/quickstart/#response-status-codes
87
- """
88
- if not limit:
89
- if "samples" in url:
90
- limit = 5000
91
- elif "manifests" in url:
92
- limit = 50
93
- else:
94
- limit = 1
95
-
96
- kwargs.update({"limit": limit, "offset": offset})
97
-
98
- res = get_with_authorization(url, params=kwargs)
99
- while res.status_code < 300 and len(res.json().get("data", [])) > 0:
100
- # if there's not an error and we're still returning
101
- yield from res.json()["data"]
102
- kwargs["offset"] += 1 # get the next page
103
- res = get_with_authorization(url, params=kwargs)
104
-
105
- res.raise_for_status()