nci-cidc-api-modules 1.1.38__py3-none-any.whl → 1.1.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cidc_api/config/db.py CHANGED
@@ -5,6 +5,7 @@ from flask_sqlalchemy import SQLAlchemy
5
5
  from flask_migrate import Migrate, upgrade
6
6
  from sqlalchemy.engine.url import URL
7
7
  from sqlalchemy.orm import declarative_base
8
+ from google.cloud.sql.connector import Connector, IPTypes
8
9
 
9
10
  from .secrets import get_secrets_manager
10
11
 
@@ -12,6 +13,20 @@ db = SQLAlchemy()
12
13
  BaseModel = declarative_base()
13
14
  db.Model = BaseModel
14
15
 
16
+ connector = Connector()
17
+
18
+
19
+ def getconn():
20
+ return connector.connect(
21
+ environ.get("CLOUD_SQL_INSTANCE_NAME"),
22
+ "pg8000",
23
+ user=environ.get("CLOUD_SQL_DB_USER"),
24
+ password="xxxxx",
25
+ db=environ.get("CLOUD_SQL_DB_NAME"),
26
+ enable_iam_auth=True,
27
+ ip_type=IPTypes.PUBLIC,
28
+ )
29
+
15
30
 
16
31
  def init_db(app: Flask):
17
32
  """Connect `app` to the database and run migrations"""
@@ -30,6 +45,9 @@ def get_sqlalchemy_database_uri(testing: bool = False) -> str:
30
45
  # Connect to the test database
31
46
  db_uri = environ.get("TEST_POSTGRES_URI", "fake-conn-string")
32
47
  elif not db_uri:
48
+ # TODO: figure out why IAM authentication causes an issue with the huge filter query
49
+ # db_uri = f"postgresql+pg8000://{environ.get('CLOUD_SQL_DB_USER')}:xxx@/{environ.get('CLOUD_SQL_DB_NAME')}"
50
+
33
51
  secrets = get_secrets_manager(testing)
34
52
 
35
53
  # If POSTGRES_URI env variable is not set,
@@ -58,3 +76,13 @@ def get_sqlalchemy_database_uri(testing: bool = False) -> str:
58
76
  assert db_uri
59
77
 
60
78
  return db_uri
79
+
80
+
81
+ # Use SQLALCHEMY_ENGINE_OPTIONS to connect to the cloud but use uri for local db
82
+ def cloud_connector(testing: bool = False):
83
+ return {}
84
+ # TODO: figure out IAM authentication
85
+ # if not testing and not environ.get("POSTGRES_URI"):
86
+ # return {"creator": getconn}
87
+ # else:
88
+ # return {}
@@ -10,7 +10,7 @@ from os import environ, path, mkdir
10
10
 
11
11
  from dotenv import load_dotenv
12
12
 
13
- from .db import get_sqlalchemy_database_uri
13
+ from .db import get_sqlalchemy_database_uri, cloud_connector
14
14
  from .secrets import get_secrets_manager
15
15
 
16
16
  load_dotenv()
@@ -54,6 +54,7 @@ else:
54
54
 
55
55
  ### Configure Flask-SQLAlchemy ###
56
56
  SQLALCHEMY_DATABASE_URI = get_sqlalchemy_database_uri(TESTING)
57
+ SQLALCHEMY_ENGINE_OPTIONS = cloud_connector(TESTING)
57
58
  SQLALCHEMY_TRACK_MODIFICATIONS = False
58
59
  SQLALCHEMY_ECHO = False # Set to True to emit all compiled sql statements
59
60
 
@@ -81,6 +82,7 @@ GOOGLE_PATIENT_SAMPLE_TOPIC = environ["GOOGLE_PATIENT_SAMPLE_TOPIC"]
81
82
  GOOGLE_EMAILS_TOPIC = environ["GOOGLE_EMAILS_TOPIC"]
82
83
  GOOGLE_ARTIFACT_UPLOAD_TOPIC = environ["GOOGLE_ARTIFACT_UPLOAD_TOPIC"]
83
84
  GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC = environ["GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC"]
85
+ GOOGLE_HL_CLINICAL_VALIDATION_TOPIC = environ["GOOGLE_HL_CLINICAL_VALIDATION_TOPIC"]
84
86
  GOOGLE_AND_OPERATOR = " && "
85
87
  GOOGLE_OR_OPERATOR = " || "
86
88
 
cidc_api/models/models.py CHANGED
@@ -80,6 +80,7 @@ from sqlalchemy import (
80
80
  Boolean,
81
81
  CheckConstraint,
82
82
  Column,
83
+ Date,
83
84
  DateTime,
84
85
  Enum,
85
86
  ForeignKey,
@@ -3354,16 +3355,28 @@ class PreprocessedFiles(CommonColumns):
3354
3355
  .all()
3355
3356
  )
3356
3357
 
3357
- # TODO: logic for pending vs current files after high level validation
3358
3358
  @classmethod
3359
3359
  @with_default_session
3360
- def get_pending_non_admin_files(cls, job_id: int, session: Session) -> list["PreprocessedFiles"]:
3361
- return (
3360
+ def get_latest_non_admin_files(cls, job_id: int, session: Session) -> list["PreprocessedFiles"]:
3361
+ """Return the most recently uploaded file for each non-admin file category for the given job_id."""
3362
+ # Subquery to get latest _created per file_category
3363
+ latest_subquery = (
3364
+ session.query(cls.file_category, func.max(cls._created).label("latest_created"))
3365
+ .filter(cls.job_id == job_id, cls.file_category.notin_(ADMIN_FILE_CATEGORIES))
3366
+ .group_by(cls.file_category)
3367
+ .subquery()
3368
+ )
3369
+ # Join main table on file_category and latest _created to get full records of latest files
3370
+ latest_files = (
3362
3371
  session.query(cls)
3363
- .filter(cls.job_id == job_id)
3364
- .filter(cls.status == "pending", cls.file_category.notin_(ADMIN_FILE_CATEGORIES))
3372
+ .join(
3373
+ latest_subquery,
3374
+ (cls.file_category == latest_subquery.c.file_category)
3375
+ & (cls._created == latest_subquery.c.latest_created),
3376
+ )
3365
3377
  .all()
3366
3378
  )
3379
+ return latest_files
3367
3380
 
3368
3381
  @classmethod
3369
3382
  def add_job_filter(cls, query, job_id):
@@ -3413,11 +3426,13 @@ class IngestionJobs(CommonColumns):
3413
3426
  status = Column("status", Enum(*INGESTION_JOB_STATUSES, name="status"), nullable=False)
3414
3427
  trial_id = Column(String, nullable=False)
3415
3428
  version = Column(Integer, nullable=False)
3429
+ pending = Column(Boolean, nullable=False, default=False)
3430
+ start_date = Column(Date, nullable=True)
3416
3431
 
3417
3432
  @staticmethod
3418
3433
  @with_default_session
3419
- def create(trial_id: str, status: str, version: int, session: Session = None):
3420
- new_job = IngestionJobs(trial_id=trial_id, status=status, version=version)
3434
+ def create(trial_id: str, status: str, version: int, pending: Boolean = False, session: Session = None):
3435
+ new_job = IngestionJobs(trial_id=trial_id, status=status, version=version, pending=pending)
3421
3436
  new_job.insert(session=session)
3422
3437
  return new_job
3423
3438
 
@@ -3444,6 +3459,20 @@ class IngestionJobs(CommonColumns):
3444
3459
  headers_ended = True
3445
3460
  return categories
3446
3461
 
3462
+ @classmethod
3463
+ @with_default_session
3464
+ def atomic_set_job_as_pending(cls, job_id: int, session: Session) -> Boolean:
3465
+ # Preventing rare race condition where multiple people try and submit a job for validation
3466
+ result = bool(
3467
+ session.execute(
3468
+ update(IngestionJobs)
3469
+ .where(and_(IngestionJobs.id == job_id, IngestionJobs.pending == False))
3470
+ .values(pending=True)
3471
+ ).rowcount
3472
+ )
3473
+ session.commit()
3474
+ return result
3475
+
3447
3476
  @classmethod
3448
3477
  @with_default_session
3449
3478
  def get_jobs_by_trial(cls, trial_id: str, session: Session = None) -> list["IngestionJobs"]:
@@ -1,10 +1,15 @@
1
+ from pathlib import Path
2
+
1
3
  from werkzeug.datastructures import FileStorage
2
- from werkzeug.exceptions import BadRequest
4
+ from werkzeug.exceptions import BadRequest, InternalServerError
3
5
 
6
+ from ..config.logging import get_logger
4
7
  from ..config.settings import GOOGLE_CLINICAL_DATA_BUCKET
5
8
  from ..models import PreprocessedFiles
6
9
  from ..shared.auth import get_current_user
7
- from ..shared.gcloud_client import upload_file_to_gcs
10
+ from ..shared.gcloud_client import upload_file_to_gcs, move_gcs_file
11
+
12
+ logger = get_logger(__name__)
8
13
 
9
14
 
10
15
  def set_current_file(file: FileStorage, file_category: str, gcs_folder: str, job_id: int = None) -> PreprocessedFiles:
@@ -22,7 +27,7 @@ def create_file(
22
27
  ) -> PreprocessedFiles:
23
28
  """Upload file to GCS and create corresponding metadata record in the database."""
24
29
  status = "pending" if gcs_folder.endswith("pending/") else "current"
25
- # only need timestamp for current/approved files
30
+ # only need timestamp for current/versioned files
26
31
  append_timestamp = status == "current"
27
32
  # create file in GCS
28
33
  gcs_file_path = upload_file_to_gcs(file, GOOGLE_CLINICAL_DATA_BUCKET, gcs_folder, append_timestamp=append_timestamp)
@@ -56,35 +61,36 @@ def format_common_preprocessed_file_response(file: PreprocessedFiles):
56
61
  }
57
62
 
58
63
 
59
- # TODO Below functions approve_pending_file and delete_pending_files were copied from deleted clinical_data.py
60
- # Consider re-implementing with pending files in clinical data file uploads, or remove
61
- # def approve_pending_file(pending_file: FileStorage):
62
- # original_filename = pending_file.file_name
63
- # pending_gcs_path = pending_file.object_url
64
- # try:
65
- # new_gcs_path = gcloud_client.move_gcs_file(
66
- # GOOGLE_CLINICAL_DATA_BUCKET, pending_gcs_path, f"{MASTER_APPENDIX_A}/"
67
- # )
68
- # except Exception as e:
69
- # logger.error(str(e))
70
- # raise InternalServerError(str(e))
71
- # # Move any 'current' file(s) to 'archived' status
72
- # latest_version = PreprocessedFiles.archive_current_files(MASTER_APPENDIX_A)
73
- # # Insert new "approved" DB record
74
- # PreprocessedFiles.create(
75
- # file_name=original_filename,
76
- # object_url=new_gcs_path,
77
- # file_category=MASTER_APPENDIX_A,
78
- # uploader_email=get_current_user().email,
79
- # status="current",
80
- # version=latest_version + 1,
81
- # )
82
- # # Delete pending record
83
- # pending_file.delete()
84
- # return new_gcs_path
85
- #
86
- #
87
- # def delete_pending_files(pending_folder: str, file_category: str):
88
- # """Deletes specified pending file(s) from GCS and associated db record(s)."""
89
- # gcloud_client.delete_items_from_folder(GOOGLE_CLINICAL_DATA_BUCKET, pending_folder)
90
- # PreprocessedFiles.delete_pending_files_by_category(file_category)
64
+ def version_pending_file(pending_file: PreprocessedFiles):
65
+ """Transitions an existing pending file to be a current versioned file."""
66
+ original_filename = pending_file.file_name
67
+ pending_gcs_path = pending_file.object_url
68
+ try:
69
+ versioned_gcs_folder = strip_filename_and_pending_folder(pending_gcs_path)
70
+ new_gcs_path = move_gcs_file(GOOGLE_CLINICAL_DATA_BUCKET, pending_gcs_path, versioned_gcs_folder)
71
+ except Exception as e:
72
+ logger.error(str(e))
73
+ raise InternalServerError(str(e))
74
+ # Move any 'current' file(s) to 'archived' status
75
+ latest_version = PreprocessedFiles.archive_current_files(pending_file.file_category, pending_file.job_id)
76
+ # Insert new current/versioned DB record
77
+ PreprocessedFiles.create(
78
+ file_name=original_filename,
79
+ object_url=new_gcs_path,
80
+ file_category=pending_file.file_category,
81
+ uploader_email=get_current_user().email,
82
+ status="current",
83
+ job_id=pending_file.job_id,
84
+ version=latest_version + 1,
85
+ )
86
+ # Delete pending record
87
+ pending_file.delete()
88
+ return new_gcs_path
89
+
90
+
91
+ def strip_filename_and_pending_folder(path_str):
92
+ """Returns the file path above the 'pending' folder to be used for versioned files."""
93
+ path = Path(path_str)
94
+ if path.parent.name != "pending":
95
+ raise ValueError("Expected 'pending' folder above file")
96
+ return str(path.parent.parent)
@@ -25,8 +25,8 @@ from typing import (
25
25
  )
26
26
 
27
27
  import googleapiclient.discovery
28
- import requests
29
28
  import pandas as pd
29
+ import requests
30
30
  from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
31
31
  from google.api_core.client_options import ClientOptions
32
32
  from google.api_core.iam import Policy
@@ -54,6 +54,7 @@ from ..config.settings import (
54
54
  GOOGLE_PATIENT_SAMPLE_TOPIC,
55
55
  GOOGLE_ARTIFACT_UPLOAD_TOPIC,
56
56
  GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC,
57
+ GOOGLE_HL_CLINICAL_VALIDATION_TOPIC,
57
58
  TESTING,
58
59
  ENV,
59
60
  IS_EMAIL_ON,
@@ -973,6 +974,12 @@ def publish_artifact_upload(file_id: int) -> None:
973
974
  report.result()
974
975
 
975
976
 
977
+ def publish_hl_clinical_validation(job_id: int) -> None:
978
+ """Publish to the high_level_clinical_validation topic that a job's files are ready to be validated."""
979
+ # Start validation asynchronously
980
+ _report = _encode_and_publish(str(job_id), GOOGLE_HL_CLINICAL_VALIDATION_TOPIC)
981
+
982
+
976
983
  def send_email(to_emails: List[str], subject: str, html_content: str, **kw) -> None:
977
984
  """
978
985
  Publish an email-to-send to the emails topic.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.1.38
3
+ Version: 1.1.40
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -28,7 +28,8 @@ Requires-Dist: python-dotenv==0.10.3
28
28
  Requires-Dist: requests==2.32.4
29
29
  Requires-Dist: jinja2==3.1.6
30
30
  Requires-Dist: certifi==2024.7.4
31
- Requires-Dist: nci-cidc-schemas==0.27.27
31
+ Requires-Dist: cloud-sql-python-connector[pg8000]==1.18.3
32
+ Requires-Dist: nci-cidc-schemas==0.27.28
32
33
  Dynamic: description
33
34
  Dynamic: description-content-type
34
35
  Dynamic: home-page
@@ -206,44 +207,15 @@ FLASK_APP=cidc_api.app:app flask db upgrade
206
207
 
207
208
  ### Connecting to a Cloud SQL database instance
208
209
 
209
- Install the [Cloud SQL Proxy](https://cloud.google.com/sql/docs/mysql/quickstart-proxy-test):
210
-
211
- ```bash
212
- sudo curl -o /usr/local/bin/cloud-sql-proxy https://storage.googleapis.com/cloud-sql-connectors/cloud-sql-proxy/v2.15.1/cloud-sql-proxy.darwin.amd64
213
- sudo chmod +x /usr/local/bin/cloud-sql-proxy
214
- mkdir ~/.cloudsql
215
- chmod 770 ~/.cloudsql
216
- ```
217
-
218
- Proxy to the dev Cloud SQL instance:
219
-
220
- ```bash
221
- cloud-sql-proxy --auto-iam-authn --address 127.0.0.1 --port 5432 nih-nci-cimac-cidc-dev2:us-east4:cidc-postgresql-dev2 &
222
- ```
223
-
224
- If you want to run the proxy alongside a postgres instance on localhost listening on 5432, change the port for the proxy to another port instead like 5433.
225
- If you experience auth errors, make sure your google cloud sdk is authenticated.
210
+ Make sure you are authenticated to gcloud:
226
211
 
227
212
  ```bash
228
213
  gcloud auth login
229
214
  gcloud auth application-default login
230
215
  ```
231
216
 
232
- To point an API running on localhost to the remote Postgres database, edit your `.env` file and comment out `POSTGRES_URI` and uncomment all environment variables prefixed with `CLOUD_SQL_`. Change CLOUD_SQL_SOCKET_DIR to contain a reference to your home directory. Restart your local API instance, and it will connect to the staging Cloud SQL instance via the local proxy.
233
-
234
- If you wish to connect to the staging Cloud SQL instance via the postgres REPL, download and run the CIDC sql proxy tool (a wrapper for `cloud_sql_proxy`):
235
-
236
- ```bash
237
- # Download the proxy
238
- curl https://raw.githubusercontent.com/NCI-CIDC/cidc-devops/master/scripts/cidc_sql_proxy.sh -o /usr/local/bin/cidc_sql_proxy
239
-
240
- # Prepare the proxy
241
- chmod +x /usr/local/bin/cidc_sql_proxy
242
- cidc_sql_proxy install
243
-
244
- # Run the proxy
245
- cidc_sql_proxy staging # or cidc_sql_proxy prod
246
- ```
217
+ In your .env file, comment out `POSTGRES_URI` and uncommment
218
+ `CLOUD_SQL_INSTANCE_NAME CLOUD_SQL_DB_USER CLOUD_SQL_DB_NAME` Replace `CLOUD_SQL_DB_USER` with your NIH email.
247
219
 
248
220
  ### Running database migrations
249
221
 
@@ -1,11 +1,11 @@
1
1
  cidc_api/config/__init__.py,sha256=5mX8GAPxUKV84iS-aGOoE-4m68LsOCGCDptXNdlgvj0,148
2
- cidc_api/config/db.py,sha256=5rf7kIowkiJIqJj2_JtO1cY9L55IjJBonJ-vThA4cGo,1960
2
+ cidc_api/config/db.py,sha256=kbCemCDYv_zyczw-V7H2JvLCa_XeEcvXkzvF28FxADw,2862
3
3
  cidc_api/config/logging.py,sha256=abhVYtn8lfhIt0tyV2WHFgSmp_s2eeJh7kodB6LH4J0,1149
4
4
  cidc_api/config/secrets.py,sha256=jRFj7W43pWuPf9DZQLCKF7WPXf5cUv-BAaS3ASqhV_Q,1481
5
- cidc_api/config/settings.py,sha256=mA-4r7oB60uFepYtl5abbPigjwX8aBz__qCJXdcWWbs,4272
5
+ cidc_api/config/settings.py,sha256=NsJbqW6Vqcz2f79xcAbk4th5tHne_I-RPCbKq_3hpz0,4427
6
6
  cidc_api/models/__init__.py,sha256=bl445G8Zic9YbhZ8ZBni07wtBMhLJRMBA-JqjLxx2bw,66
7
7
  cidc_api/models/migrations.py,sha256=gp9vtkYbA9FFy2s-7woelAmsvQbJ41LO2_DY-YkFIrQ,11464
8
- cidc_api/models/models.py,sha256=D4GmcQSLKGBi0k3-w3TZk7J46zKR80JViN2_vTBZ1ZQ,141678
8
+ cidc_api/models/models.py,sha256=eRZrUcQYIe_V5hMwD3ONc175vPQpUTe_KPcmGdYexJc,142944
9
9
  cidc_api/models/schemas.py,sha256=6IE2dJoEMcMbi0Vr1V3cYKnPKU0hv9vRKBixOZHe88s,2766
10
10
  cidc_api/models/files/__init__.py,sha256=8BMTnUSHzUbz0lBeEQY6NvApxDD3GMWMduoVMos2g4Y,213
11
11
  cidc_api/models/files/details.py,sha256=sZkGM7iEV4-J6IDQCdiMV6KBDLbPxCOqUMaU3aY9rX8,65153
@@ -13,12 +13,12 @@ cidc_api/models/files/facets.py,sha256=WqjfqtYJgY2tBnZ598Yc0eJdQUo2slFNLyTDaqPx_
13
13
  cidc_api/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  cidc_api/shared/auth.py,sha256=PHqmVGkqDjbmUofytVFwD_9ssgUomESl3fFtFHPwZYQ,9062
15
15
  cidc_api/shared/emails.py,sha256=HQIixEUsR8yyu7Iv8S81RjtvEQeGuzQHzBfGsWIfP7k,4961
16
- cidc_api/shared/file_handling.py,sha256=lbdY4XH-otpNjWDY1g6EQoVWtEYM_9j95OlQXeeFkhE,3808
17
- cidc_api/shared/gcloud_client.py,sha256=tgi6Ja31EUQcJueAIYHc3VyrMchoMZCdui1eruakCLg,36351
16
+ cidc_api/shared/file_handling.py,sha256=l4wiRkVJLL7QbCoODsLx-uki6Km8QoMmUlRVnUV9vkk,3894
17
+ cidc_api/shared/gcloud_client.py,sha256=cNtzCRsD_StBQRAzWJ8BimW-ecEGOqywvlM7kip8CrE,36681
18
18
  cidc_api/shared/jose.py,sha256=-qzGzEDAlokEp9E7WtBtQkXyyfPWTYXlwYpCqVJWmqM,1830
19
19
  cidc_api/shared/rest_utils.py,sha256=RwR30WOUAYCxL7V-i2totEyeriG30GbBDvBcpLXhM9w,6594
20
- nci_cidc_api_modules-1.1.38.dist-info/licenses/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
21
- nci_cidc_api_modules-1.1.38.dist-info/METADATA,sha256=Vlhx5ZliL-b3-3Umhqm1q22BztzUG-mc2dTIyyHzZ24,41284
22
- nci_cidc_api_modules-1.1.38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
- nci_cidc_api_modules-1.1.38.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
24
- nci_cidc_api_modules-1.1.38.dist-info/RECORD,,
20
+ nci_cidc_api_modules-1.1.40.dist-info/licenses/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
21
+ nci_cidc_api_modules-1.1.40.dist-info/METADATA,sha256=IZwg1tt0j0d6j8ve8d3yYrfck4Hnp_GKo2By-9bb0vM,39967
22
+ nci_cidc_api_modules-1.1.40.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ nci_cidc_api_modules-1.1.40.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
24
+ nci_cidc_api_modules-1.1.40.dist-info/RECORD,,