nci-cidc-api-modules 1.1.37__py3-none-any.whl → 1.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cidc_api/config/db.py CHANGED
@@ -5,6 +5,7 @@ from flask_sqlalchemy import SQLAlchemy
5
5
  from flask_migrate import Migrate, upgrade
6
6
  from sqlalchemy.engine.url import URL
7
7
  from sqlalchemy.orm import declarative_base
8
+ from google.cloud.sql.connector import Connector, IPTypes
8
9
 
9
10
  from .secrets import get_secrets_manager
10
11
 
@@ -12,6 +13,20 @@ db = SQLAlchemy()
12
13
  BaseModel = declarative_base()
13
14
  db.Model = BaseModel
14
15
 
16
+ connector = Connector()
17
+
18
+
19
+ def getconn():
20
+ return connector.connect(
21
+ environ.get("CLOUD_SQL_INSTANCE_NAME"),
22
+ "pg8000",
23
+ user=environ.get("CLOUD_SQL_DB_USER"),
24
+ password="xxxxx",
25
+ db=environ.get("CLOUD_SQL_DB_NAME"),
26
+ enable_iam_auth=True,
27
+ ip_type=IPTypes.PUBLIC,
28
+ )
29
+
15
30
 
16
31
  def init_db(app: Flask):
17
32
  """Connect `app` to the database and run migrations"""
@@ -30,31 +45,15 @@ def get_sqlalchemy_database_uri(testing: bool = False) -> str:
30
45
  # Connect to the test database
31
46
  db_uri = environ.get("TEST_POSTGRES_URI", "fake-conn-string")
32
47
  elif not db_uri:
33
- secrets = get_secrets_manager(testing)
34
-
35
- # If POSTGRES_URI env variable is not set,
36
- # we're connecting to a Cloud SQL instance.
37
-
38
- config: dict = {
39
- "drivername": "postgresql",
40
- "username": environ.get("CLOUD_SQL_DB_USER"),
41
- "password": secrets.get(environ.get("CLOUD_SQL_DB_PASS_ID")),
42
- "database": environ.get("CLOUD_SQL_DB_NAME"),
43
- }
44
-
45
- if environ.get("CLOUD_SQL_INSTANCE_NAME"):
46
- socket_dir = environ.get("CLOUD_SQL_SOCKET_DIR", "/cloudsql/")
47
-
48
- # If CLOUD_SQL_INSTANCE_NAME is defined, we're connecting
49
- # via a unix socket from inside App Engine.
50
- config["query"] = {"host": f'{socket_dir}{environ.get("CLOUD_SQL_INSTANCE_NAME")}'}
51
- else:
52
- raise RuntimeError(
53
- "Either POSTGRES_URI or CLOUD_SQL_INSTANCE_NAME must be defined to connect " + "to a database."
54
- )
55
-
56
- db_uri = str(URL.create(**config).render_as_string(hide_password=False))
57
-
48
+ db_uri = f"postgresql+pg8000://{environ.get('CLOUD_SQL_DB_USER')}:xxx@/{environ.get('CLOUD_SQL_DB_NAME')}"
58
49
  assert db_uri
59
50
 
60
51
  return db_uri
52
+
53
+
54
+ # Use SQLALCHEMY_ENGINE_OPTIONS to connect to the cloud but use uri for local db
55
+ def cloud_connector(testing: bool = False):
56
+ if not testing and not environ.get("POSTGRES_URI"):
57
+ return {"creator": getconn}
58
+ else:
59
+ return {}
@@ -10,7 +10,7 @@ from os import environ, path, mkdir
10
10
 
11
11
  from dotenv import load_dotenv
12
12
 
13
- from .db import get_sqlalchemy_database_uri
13
+ from .db import get_sqlalchemy_database_uri, cloud_connector
14
14
  from .secrets import get_secrets_manager
15
15
 
16
16
  load_dotenv()
@@ -54,6 +54,7 @@ else:
54
54
 
55
55
  ### Configure Flask-SQLAlchemy ###
56
56
  SQLALCHEMY_DATABASE_URI = get_sqlalchemy_database_uri(TESTING)
57
+ SQLALCHEMY_ENGINE_OPTIONS = cloud_connector(TESTING)
57
58
  SQLALCHEMY_TRACK_MODIFICATIONS = False
58
59
  SQLALCHEMY_ECHO = False # Set to True to emit all compiled sql statements
59
60
 
@@ -81,6 +82,7 @@ GOOGLE_PATIENT_SAMPLE_TOPIC = environ["GOOGLE_PATIENT_SAMPLE_TOPIC"]
81
82
  GOOGLE_EMAILS_TOPIC = environ["GOOGLE_EMAILS_TOPIC"]
82
83
  GOOGLE_ARTIFACT_UPLOAD_TOPIC = environ["GOOGLE_ARTIFACT_UPLOAD_TOPIC"]
83
84
  GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC = environ["GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC"]
85
+ GOOGLE_HL_CLINICAL_VALIDATION_TOPIC = environ["GOOGLE_HL_CLINICAL_VALIDATION_TOPIC"]
84
86
  GOOGLE_AND_OPERATOR = " && "
85
87
  GOOGLE_OR_OPERATOR = " || "
86
88
 
cidc_api/models/models.py CHANGED
@@ -80,6 +80,7 @@ from sqlalchemy import (
80
80
  Boolean,
81
81
  CheckConstraint,
82
82
  Column,
83
+ Date,
83
84
  DateTime,
84
85
  Enum,
85
86
  ForeignKey,
@@ -357,6 +358,7 @@ class CIDCRole(EnumBaseClass):
357
358
  NCI_BIOBANK_USER = "nci-biobank-user"
358
359
  NETWORK_VIEWER = "network-viewer"
359
360
  PACT_USER = "pact-user"
361
+ CLINICAL_TRIAL_USER = "clinical-trial-user"
360
362
 
361
363
 
362
364
  ROLES = [role.value for role in CIDCRole]
@@ -3215,7 +3217,7 @@ def upload_manifest_json(
3215
3217
  * The updated trial metadata object is updated in the `TrialMetadata` table.
3216
3218
  """
3217
3219
  try:
3218
- TrialMetadata.patch_manifest(trial_id, md_patch, session=session, commit=False)
3220
+ TrialMetadata.patch_manifest(trial_id, md_patch, session=session, commit=True)
3219
3221
  except ValidationError as e:
3220
3222
  raise BadRequest(json_validation.format_validation_error(e)) from e
3221
3223
  except ValidationMultiError as e:
@@ -3353,16 +3355,28 @@ class PreprocessedFiles(CommonColumns):
3353
3355
  .all()
3354
3356
  )
3355
3357
 
3356
- # TODO: logic for pending vs current files after high level validation
3357
3358
  @classmethod
3358
3359
  @with_default_session
3359
- def get_pending_non_admin_files(cls, job_id: int, session: Session) -> list["PreprocessedFiles"]:
3360
- return (
3360
+ def get_latest_non_admin_files(cls, job_id: int, session: Session) -> list["PreprocessedFiles"]:
3361
+ """Return the most recently uploaded file for each non-admin file category for the given job_id."""
3362
+ # Subquery to get latest _created per file_category
3363
+ latest_subquery = (
3364
+ session.query(cls.file_category, func.max(cls._created).label("latest_created"))
3365
+ .filter(cls.job_id == job_id, cls.file_category.notin_(ADMIN_FILE_CATEGORIES))
3366
+ .group_by(cls.file_category)
3367
+ .subquery()
3368
+ )
3369
+ # Join main table on file_category and latest _created to get full records of latest files
3370
+ latest_files = (
3361
3371
  session.query(cls)
3362
- .filter(cls.job_id == job_id)
3363
- .filter(cls.status == "pending", cls.file_category.notin_(ADMIN_FILE_CATEGORIES))
3372
+ .join(
3373
+ latest_subquery,
3374
+ (cls.file_category == latest_subquery.c.file_category)
3375
+ & (cls._created == latest_subquery.c.latest_created),
3376
+ )
3364
3377
  .all()
3365
3378
  )
3379
+ return latest_files
3366
3380
 
3367
3381
  @classmethod
3368
3382
  def add_job_filter(cls, query, job_id):
@@ -3412,11 +3426,13 @@ class IngestionJobs(CommonColumns):
3412
3426
  status = Column("status", Enum(*INGESTION_JOB_STATUSES, name="status"), nullable=False)
3413
3427
  trial_id = Column(String, nullable=False)
3414
3428
  version = Column(Integer, nullable=False)
3429
+ pending = Column(Boolean, nullable=False, default=False)
3430
+ start_date = Column(Date, nullable=True)
3415
3431
 
3416
3432
  @staticmethod
3417
3433
  @with_default_session
3418
- def create(trial_id: str, status: str, version: int, session: Session = None):
3419
- new_job = IngestionJobs(trial_id=trial_id, status=status, version=version)
3434
+ def create(trial_id: str, status: str, version: int, pending: Boolean = False, session: Session = None):
3435
+ new_job = IngestionJobs(trial_id=trial_id, status=status, version=version, pending=pending)
3420
3436
  new_job.insert(session=session)
3421
3437
  return new_job
3422
3438
 
@@ -3443,6 +3459,18 @@ class IngestionJobs(CommonColumns):
3443
3459
  headers_ended = True
3444
3460
  return categories
3445
3461
 
3462
+ @classmethod
3463
+ @with_default_session
3464
+ def atomic_set_job_as_pending(cls, job_id: int, session: Session) -> Boolean:
3465
+ # Preventing rare race condition where multiple people try and submit a job for validation
3466
+ return bool(
3467
+ session.execute(
3468
+ update(IngestionJobs)
3469
+ .where(and_(IngestionJobs.id == job_id, IngestionJobs.pending == False))
3470
+ .values(pending=True)
3471
+ ).rowcount
3472
+ )
3473
+
3446
3474
  @classmethod
3447
3475
  @with_default_session
3448
3476
  def get_jobs_by_trial(cls, trial_id: str, session: Session = None) -> list["IngestionJobs"]:
@@ -1,10 +1,15 @@
1
+ from pathlib import Path
2
+
1
3
  from werkzeug.datastructures import FileStorage
2
- from werkzeug.exceptions import BadRequest
4
+ from werkzeug.exceptions import BadRequest, InternalServerError
3
5
 
6
+ from ..config.logging import get_logger
4
7
  from ..config.settings import GOOGLE_CLINICAL_DATA_BUCKET
5
8
  from ..models import PreprocessedFiles
6
9
  from ..shared.auth import get_current_user
7
- from ..shared.gcloud_client import upload_file_to_gcs
10
+ from ..shared.gcloud_client import upload_file_to_gcs, move_gcs_file
11
+
12
+ logger = get_logger(__name__)
8
13
 
9
14
 
10
15
  def set_current_file(file: FileStorage, file_category: str, gcs_folder: str, job_id: int = None) -> PreprocessedFiles:
@@ -22,7 +27,7 @@ def create_file(
22
27
  ) -> PreprocessedFiles:
23
28
  """Upload file to GCS and create corresponding metadata record in the database."""
24
29
  status = "pending" if gcs_folder.endswith("pending/") else "current"
25
- # only need timestamp for current/approved files
30
+ # only need timestamp for current/versioned files
26
31
  append_timestamp = status == "current"
27
32
  # create file in GCS
28
33
  gcs_file_path = upload_file_to_gcs(file, GOOGLE_CLINICAL_DATA_BUCKET, gcs_folder, append_timestamp=append_timestamp)
@@ -54,3 +59,38 @@ def format_common_preprocessed_file_response(file: PreprocessedFiles):
54
59
  "uploader_email": file.uploader_email,
55
60
  "date": file._created.isoformat(),
56
61
  }
62
+
63
+
64
+ def version_pending_file(pending_file: PreprocessedFiles):
65
+ """Transitions an existing pending file to be a current versioned file."""
66
+ original_filename = pending_file.file_name
67
+ pending_gcs_path = pending_file.object_url
68
+ try:
69
+ versioned_gcs_folder = strip_filename_and_pending_folder(pending_gcs_path)
70
+ new_gcs_path = move_gcs_file(GOOGLE_CLINICAL_DATA_BUCKET, pending_gcs_path, versioned_gcs_folder)
71
+ except Exception as e:
72
+ logger.error(str(e))
73
+ raise InternalServerError(str(e))
74
+ # Move any 'current' file(s) to 'archived' status
75
+ latest_version = PreprocessedFiles.archive_current_files(pending_file.file_category, pending_file.job_id)
76
+ # Insert new current/versioned DB record
77
+ PreprocessedFiles.create(
78
+ file_name=original_filename,
79
+ object_url=new_gcs_path,
80
+ file_category=pending_file.file_category,
81
+ uploader_email=get_current_user().email,
82
+ status="current",
83
+ job_id=pending_file.job_id,
84
+ version=latest_version + 1,
85
+ )
86
+ # Delete pending record
87
+ pending_file.delete()
88
+ return new_gcs_path
89
+
90
+
91
+ def strip_filename_and_pending_folder(path_str):
92
+ """Returns the file path above the 'pending' folder to be used for versioned files."""
93
+ path = Path(path_str)
94
+ if path.parent.name != "pending":
95
+ raise ValueError("Expected 'pending' folder above file")
96
+ return str(path.parent.parent)
@@ -25,8 +25,8 @@ from typing import (
25
25
  )
26
26
 
27
27
  import googleapiclient.discovery
28
- import requests
29
28
  import pandas as pd
29
+ import requests
30
30
  from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
31
31
  from google.api_core.client_options import ClientOptions
32
32
  from google.api_core.iam import Policy
@@ -54,6 +54,7 @@ from ..config.settings import (
54
54
  GOOGLE_PATIENT_SAMPLE_TOPIC,
55
55
  GOOGLE_ARTIFACT_UPLOAD_TOPIC,
56
56
  GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC,
57
+ GOOGLE_HL_CLINICAL_VALIDATION_TOPIC,
57
58
  TESTING,
58
59
  ENV,
59
60
  IS_EMAIL_ON,
@@ -973,6 +974,12 @@ def publish_artifact_upload(file_id: int) -> None:
973
974
  report.result()
974
975
 
975
976
 
977
+ def publish_hl_clinical_validation(job_id: int) -> None:
978
+ """Publish to the high_level_clinical_validation topic that a job's files are ready to be validated."""
979
+ # Start validation asynchronously
980
+ _report = _encode_and_publish(str(job_id), GOOGLE_HL_CLINICAL_VALIDATION_TOPIC)
981
+
982
+
976
983
  def send_email(to_emails: List[str], subject: str, html_content: str, **kw) -> None:
977
984
  """
978
985
  Publish an email-to-send to the emails topic.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.1.37
3
+ Version: 1.1.39
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -28,6 +28,7 @@ Requires-Dist: python-dotenv==0.10.3
28
28
  Requires-Dist: requests==2.32.4
29
29
  Requires-Dist: jinja2==3.1.6
30
30
  Requires-Dist: certifi==2024.7.4
31
+ Requires-Dist: cloud-sql-python-connector[pg8000]==1.18.3
31
32
  Requires-Dist: nci-cidc-schemas==0.27.27
32
33
  Dynamic: description
33
34
  Dynamic: description-content-type
@@ -206,44 +207,15 @@ FLASK_APP=cidc_api.app:app flask db upgrade
206
207
 
207
208
  ### Connecting to a Cloud SQL database instance
208
209
 
209
- Install the [Cloud SQL Proxy](https://cloud.google.com/sql/docs/mysql/quickstart-proxy-test):
210
-
211
- ```bash
212
- sudo curl -o /usr/local/bin/cloud-sql-proxy https://storage.googleapis.com/cloud-sql-connectors/cloud-sql-proxy/v2.15.1/cloud-sql-proxy.darwin.amd64
213
- sudo chmod +x /usr/local/bin/cloud-sql-proxy
214
- mkdir ~/.cloudsql
215
- chmod 770 ~/.cloudsql
216
- ```
217
-
218
- Proxy to the dev Cloud SQL instance:
219
-
220
- ```bash
221
- cloud-sql-proxy --auto-iam-authn --address 127.0.0.1 --port 5432 nih-nci-cimac-cidc-dev2:us-east4:cidc-postgresql-dev2 &
222
- ```
223
-
224
- If you want to run the proxy alongside a postgres instance on localhost listening on 5432, change the port for the proxy to another port instead like 5433.
225
- If you experience auth errors, make sure your google cloud sdk is authenticated.
210
+ Make sure you are authenticated to gcloud:
226
211
 
227
212
  ```bash
228
213
  gcloud auth login
229
214
  gcloud auth application-default login
230
215
  ```
231
216
 
232
- To point an API running on localhost to the remote Postgres database, edit your `.env` file and comment out `POSTGRES_URI` and uncomment all environment variables prefixed with `CLOUD_SQL_`. Change CLOUD_SQL_SOCKET_DIR to contain a reference to your home directory. Restart your local API instance, and it will connect to the staging Cloud SQL instance via the local proxy.
233
-
234
- If you wish to connect to the staging Cloud SQL instance via the postgres REPL, download and run the CIDC sql proxy tool (a wrapper for `cloud_sql_proxy`):
235
-
236
- ```bash
237
- # Download the proxy
238
- curl https://raw.githubusercontent.com/NCI-CIDC/cidc-devops/master/scripts/cidc_sql_proxy.sh -o /usr/local/bin/cidc_sql_proxy
239
-
240
- # Prepare the proxy
241
- chmod +x /usr/local/bin/cidc_sql_proxy
242
- cidc_sql_proxy install
243
-
244
- # Run the proxy
245
- cidc_sql_proxy staging # or cidc_sql_proxy prod
246
- ```
217
+ In your .env file, comment out `POSTGRES_URI` and uncommment
218
+ `CLOUD_SQL_INSTANCE_NAME CLOUD_SQL_DB_USER CLOUD_SQL_DB_NAME` Replace `CLOUD_SQL_DB_USER` with your NIH email.
247
219
 
248
220
  ### Running database migrations
249
221
 
@@ -1,11 +1,11 @@
1
1
  cidc_api/config/__init__.py,sha256=5mX8GAPxUKV84iS-aGOoE-4m68LsOCGCDptXNdlgvj0,148
2
- cidc_api/config/db.py,sha256=5rf7kIowkiJIqJj2_JtO1cY9L55IjJBonJ-vThA4cGo,1960
2
+ cidc_api/config/db.py,sha256=XEPRwjPj6rnCgcOM1p0qKb54tNo5708dDQcY6_wbs0U,1682
3
3
  cidc_api/config/logging.py,sha256=abhVYtn8lfhIt0tyV2WHFgSmp_s2eeJh7kodB6LH4J0,1149
4
4
  cidc_api/config/secrets.py,sha256=jRFj7W43pWuPf9DZQLCKF7WPXf5cUv-BAaS3ASqhV_Q,1481
5
- cidc_api/config/settings.py,sha256=mA-4r7oB60uFepYtl5abbPigjwX8aBz__qCJXdcWWbs,4272
5
+ cidc_api/config/settings.py,sha256=NsJbqW6Vqcz2f79xcAbk4th5tHne_I-RPCbKq_3hpz0,4427
6
6
  cidc_api/models/__init__.py,sha256=bl445G8Zic9YbhZ8ZBni07wtBMhLJRMBA-JqjLxx2bw,66
7
7
  cidc_api/models/migrations.py,sha256=gp9vtkYbA9FFy2s-7woelAmsvQbJ41LO2_DY-YkFIrQ,11464
8
- cidc_api/models/models.py,sha256=u2SA3R0xaVgdoU8nnnl2dptt_hMtkdgQcmZhdEQbLv4,141631
8
+ cidc_api/models/models.py,sha256=--EqlKE70vu-0MRvIMBJbp1rrU-kBlKtkHvZD-eGkVA,142895
9
9
  cidc_api/models/schemas.py,sha256=6IE2dJoEMcMbi0Vr1V3cYKnPKU0hv9vRKBixOZHe88s,2766
10
10
  cidc_api/models/files/__init__.py,sha256=8BMTnUSHzUbz0lBeEQY6NvApxDD3GMWMduoVMos2g4Y,213
11
11
  cidc_api/models/files/details.py,sha256=sZkGM7iEV4-J6IDQCdiMV6KBDLbPxCOqUMaU3aY9rX8,65153
@@ -13,12 +13,12 @@ cidc_api/models/files/facets.py,sha256=WqjfqtYJgY2tBnZ598Yc0eJdQUo2slFNLyTDaqPx_
13
13
  cidc_api/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  cidc_api/shared/auth.py,sha256=PHqmVGkqDjbmUofytVFwD_9ssgUomESl3fFtFHPwZYQ,9062
15
15
  cidc_api/shared/emails.py,sha256=HQIixEUsR8yyu7Iv8S81RjtvEQeGuzQHzBfGsWIfP7k,4961
16
- cidc_api/shared/file_handling.py,sha256=z9Fza4PNTQ2U2VgMEiwx-Xpsp1AKgJuBugEl8loglec,2334
17
- cidc_api/shared/gcloud_client.py,sha256=tgi6Ja31EUQcJueAIYHc3VyrMchoMZCdui1eruakCLg,36351
16
+ cidc_api/shared/file_handling.py,sha256=l4wiRkVJLL7QbCoODsLx-uki6Km8QoMmUlRVnUV9vkk,3894
17
+ cidc_api/shared/gcloud_client.py,sha256=cNtzCRsD_StBQRAzWJ8BimW-ecEGOqywvlM7kip8CrE,36681
18
18
  cidc_api/shared/jose.py,sha256=-qzGzEDAlokEp9E7WtBtQkXyyfPWTYXlwYpCqVJWmqM,1830
19
19
  cidc_api/shared/rest_utils.py,sha256=RwR30WOUAYCxL7V-i2totEyeriG30GbBDvBcpLXhM9w,6594
20
- nci_cidc_api_modules-1.1.37.dist-info/licenses/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
21
- nci_cidc_api_modules-1.1.37.dist-info/METADATA,sha256=lkOyyerechLmU60nEzlIxONbexRRnvVbB1T1qApjjtM,41284
22
- nci_cidc_api_modules-1.1.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
- nci_cidc_api_modules-1.1.37.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
24
- nci_cidc_api_modules-1.1.37.dist-info/RECORD,,
20
+ nci_cidc_api_modules-1.1.39.dist-info/licenses/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
21
+ nci_cidc_api_modules-1.1.39.dist-info/METADATA,sha256=6OU-ZN-TmCrbxAwF162kbfvsUCaeqsgHcPWGNZG0UGQ,39967
22
+ nci_cidc_api_modules-1.1.39.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ nci_cidc_api_modules-1.1.39.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
24
+ nci_cidc_api_modules-1.1.39.dist-info/RECORD,,