nci-cidc-api-modules 1.1.24__tar.gz → 1.2.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. nci_cidc_api_modules-1.2.21/MANIFEST.in +2 -0
  2. nci_cidc_api_modules-1.1.24/README.md → nci_cidc_api_modules-1.2.21/PKG-INFO +50 -36
  3. nci_cidc_api_modules-1.1.24/PKG-INFO → nci_cidc_api_modules-1.2.21/README.md +8 -76
  4. nci_cidc_api_modules-1.2.21/cidc_api/config/db.py +58 -0
  5. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/config/logging.py +5 -2
  6. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/config/settings.py +9 -1
  7. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/models/__init__.py +2 -0
  8. nci_cidc_api_modules-1.2.21/cidc_api/models/data.py +15 -0
  9. nci_cidc_api_modules-1.2.21/cidc_api/models/db/base_orm.py +25 -0
  10. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/models/files/details.py +31 -0
  11. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/models/files/facets.py +77 -0
  12. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/models/migrations.py +12 -39
  13. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/models/models.py +564 -54
  14. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/models/schemas.py +1 -0
  15. nci_cidc_api_modules-1.2.21/cidc_api/models/types.py +1439 -0
  16. nci_cidc_api_modules-1.2.21/cidc_api/shared/email_layout.html +258 -0
  17. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/shared/emails.py +33 -3
  18. nci_cidc_api_modules-1.2.21/cidc_api/shared/file_handling.py +141 -0
  19. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/shared/gcloud_client.py +116 -15
  20. nci_cidc_api_modules-1.2.21/cidc_api/shared/utils.py +11 -0
  21. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/nci_cidc_api_modules.egg-info/PKG-INFO +34 -60
  22. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/nci_cidc_api_modules.egg-info/SOURCES.txt +6 -3
  23. nci_cidc_api_modules-1.2.21/nci_cidc_api_modules.egg-info/requires.txt +24 -0
  24. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/pyproject.toml +7 -2
  25. nci_cidc_api_modules-1.2.21/requirements.modules.txt +26 -0
  26. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/setup.py +3 -2
  27. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/tests/test_api.py +22 -8
  28. nci_cidc_api_modules-1.1.24/MANIFEST.in +0 -1
  29. nci_cidc_api_modules-1.1.24/cidc_api/config/db.py +0 -61
  30. nci_cidc_api_modules-1.1.24/cidc_api/csms/__init__.py +0 -1
  31. nci_cidc_api_modules-1.1.24/cidc_api/csms/auth.py +0 -105
  32. nci_cidc_api_modules-1.1.24/cidc_api/models/csms_api.py +0 -872
  33. nci_cidc_api_modules-1.1.24/nci_cidc_api_modules.egg-info/requires.txt +0 -22
  34. nci_cidc_api_modules-1.1.24/requirements.modules.txt +0 -22
  35. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/LICENSE +0 -0
  36. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/config/__init__.py +0 -0
  37. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/config/secrets.py +0 -0
  38. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/models/files/__init__.py +0 -0
  39. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/shared/__init__.py +0 -0
  40. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/shared/auth.py +0 -0
  41. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/shared/jose.py +0 -0
  42. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/cidc_api/shared/rest_utils.py +0 -0
  43. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/nci_cidc_api_modules.egg-info/dependency_links.txt +0 -0
  44. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/nci_cidc_api_modules.egg-info/not-zip-safe +0 -0
  45. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/nci_cidc_api_modules.egg-info/top_level.txt +0 -0
  46. {nci_cidc_api_modules-1.1.24 → nci_cidc_api_modules-1.2.21}/setup.cfg +0 -0
@@ -0,0 +1,2 @@
1
+ include requirements.modules.txt
2
+ include cidc_api/shared/email_layout.html
@@ -1,3 +1,45 @@
1
+ Metadata-Version: 2.4
2
+ Name: nci_cidc_api_modules
3
+ Version: 1.2.21
4
+ Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
+ Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
+ License: MIT license
7
+ Requires-Python: >=3.13
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: certifi>=2025.10.5
11
+ Requires-Dist: cloud-sql-python-connector[pg8000]>=1.18.5
12
+ Requires-Dist: flask>=3.1.2
13
+ Requires-Dist: flask-migrate>=4.1.0
14
+ Requires-Dist: flask-sqlalchemy>=3.1.1
15
+ Requires-Dist: google-auth==2.41.1
16
+ Requires-Dist: google-api-python-client>=2.185.0
17
+ Requires-Dist: google-cloud-bigquery>=3.38.0
18
+ Requires-Dist: google-cloud-pubsub>=2.32.0
19
+ Requires-Dist: google-cloud-secret-manager>=2.25.0
20
+ Requires-Dist: google-cloud-storage>=3.4.1
21
+ Requires-Dist: jinja2>=3.1.6
22
+ Requires-Dist: marshmallow>=4.0.1
23
+ Requires-Dist: marshmallow-sqlalchemy>=1.4.2
24
+ Requires-Dist: numpy>=2.3.4
25
+ Requires-Dist: packaging>=25.0
26
+ Requires-Dist: pandas>=2.3.3
27
+ Requires-Dist: pyarrow>=22.0.0
28
+ Requires-Dist: python-dotenv>=1.2.1
29
+ Requires-Dist: requests>=2.32.5
30
+ Requires-Dist: sqlalchemy>=2.0.44
31
+ Requires-Dist: sqlalchemy-mixins~=2.0.5
32
+ Requires-Dist: werkzeug>=3.1.3
33
+ Requires-Dist: nci-cidc-schemas==0.28.9
34
+ Dynamic: description
35
+ Dynamic: description-content-type
36
+ Dynamic: home-page
37
+ Dynamic: license
38
+ Dynamic: license-file
39
+ Dynamic: requires-dist
40
+ Dynamic: requires-python
41
+ Dynamic: summary
42
+
1
43
  # NCI CIDC API <!-- omit in TOC -->
2
44
 
3
45
  The next generation of the CIDC API, reworked to use Google Cloud-managed services. This API is built with the Flask REST API framework backed by Google Cloud SQL, running on Google App Engine.
@@ -21,7 +63,7 @@ The next generation of the CIDC API, reworked to use Google Cloud-managed servic
21
63
 
22
64
  ## Install Python dependencies
23
65
 
24
- Python versions tested include 3.9 and 3.10. The current App Engine is using version 3.9 (see [app.prod.yaml](./app.prod.yaml)). You can use https://github.com/pyenv/pyenv to manage your python versions. Homebrew will also work, but you will have to be specific when you install packages with pip outside of virtual environments. On that note, it is recommended that you install your python dependencies in an isolated environment. For example,
66
+ Use Python version 3.13
25
67
 
26
68
  ```bash
27
69
  # make a virtual environment in the current direcory called "venv"
@@ -166,52 +208,24 @@ FLASK_APP=cidc_api.app:app flask db upgrade
166
208
 
167
209
  ### Connecting to a Cloud SQL database instance
168
210
 
169
- Install the [Cloud SQL Proxy](https://cloud.google.com/sql/docs/mysql/quickstart-proxy-test):
170
-
171
- ```bash
172
- sudo curl -o /usr/local/bin/cloud-sql-proxy https://storage.googleapis.com/cloud-sql-connectors/cloud-sql-proxy/v2.15.1/cloud-sql-proxy.darwin.amd64
173
- sudo chmod +x /usr/local/bin/cloud-sql-proxy
174
- mkdir ~/.cloudsql
175
- chmod 770 ~/.cloudsql
176
- ```
177
-
178
- Proxy to the dev Cloud SQL instance:
179
-
180
- ```bash
181
- cloud-sql-proxy --auto-iam-authn --address 127.0.0.1 --port 5432 nih-nci-cimac-cidc-dev2:us-east4:cidc-postgresql-dev2 &
182
- ```
183
-
184
- If you want to run the proxy alongside a postgres instance on localhost listening on 5432, change the port for the proxy to another port instead like 5433.
185
- If you experience auth errors, make sure your google cloud sdk is authenticated.
211
+ Make sure you are authenticated to gcloud:
186
212
 
187
213
  ```bash
188
214
  gcloud auth login
189
215
  gcloud auth application-default login
190
216
  ```
191
217
 
192
- To point an API running on localhost to the remote Postgres database, edit your `.env` file and comment out `POSTGRES_URI` and uncomment all environment variables prefixed with `CLOUD_SQL_`. Change CLOUD_SQL_SOCKET_DIR to contain a reference to your home directory. Restart your local API instance, and it will connect to the staging Cloud SQL instance via the local proxy.
193
-
194
- If you wish to connect to the staging Cloud SQL instance via the postgres REPL, download and run the CIDC sql proxy tool (a wrapper for `cloud_sql_proxy`):
195
-
196
- ```bash
197
- # Download the proxy
198
- curl https://raw.githubusercontent.com/NCI-CIDC/cidc-devops/master/scripts/cidc_sql_proxy.sh -o /usr/local/bin/cidc_sql_proxy
199
-
200
- # Prepare the proxy
201
- chmod +x /usr/local/bin/cidc_sql_proxy
202
- cidc_sql_proxy install
203
-
204
- # Run the proxy
205
- cidc_sql_proxy staging # or cidc_sql_proxy prod
206
- ```
218
+ In your .env file, comment out `POSTGRES_URI` and uncommment
219
+ `CLOUD_SQL_INSTANCE_NAME CLOUD_SQL_DB_USER CLOUD_SQL_DB_NAME` Replace `CLOUD_SQL_DB_USER` with your NIH email.
207
220
 
208
- ### Running database migrations
221
+ ### Creating/Running database migrations
209
222
 
210
223
  This project uses [`Flask Migrate`](https://flask-migrate.readthedocs.io/en/latest/) for managing database migrations. To create a new migration and upgrade the database specified in your `.env` config:
211
224
 
212
225
  ```bash
213
226
  export FLASK_APP=cidc_api/app.py
214
- # Generate the migration script
227
+ # First, make your changes to the model(s)
228
+ # Then, let flask automatically generate the db change. Double check the migration script!
215
229
  flask db migrate -m "<a message describing the changes in this migration>"
216
230
  # Apply changes to the database
217
231
  flask db upgrade
@@ -383,7 +397,7 @@ API authentication relies on _identity tokens_ generated by Auth0 to verify that
383
397
 
384
398
  - It is a well-formatted JWT.
385
399
  - It has not yet expired.
386
- - Its cryptographic signature is valid.
400
+ - Its cryptographic signature is valid.
387
401
 
388
402
  JWTs are a lot like passports - they convey personal information, they’re issued by a trusted entity, and they expire after a certain time. Moreover, like passports, JWTs **can be stolen** and used to impersonate someone. As such, JWTs should be kept private and treated sort of like short-lived passwords.
389
403
 
@@ -1,43 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: nci_cidc_api_modules
3
- Version: 1.1.24
4
- Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
- Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
- License: MIT license
7
- Requires-Python: >=3.9
8
- Description-Content-Type: text/markdown
9
- License-File: LICENSE
10
- Requires-Dist: werkzeug==3.0.6
11
- Requires-Dist: flask==3.0.3
12
- Requires-Dist: flask-migrate==3.1.0
13
- Requires-Dist: flask-sqlalchemy==3.0.2
14
- Requires-Dist: sqlalchemy==1.4.54
15
- Requires-Dist: marshmallow==3.19.0
16
- Requires-Dist: marshmallow-sqlalchemy==0.22.3
17
- Requires-Dist: google-cloud-storage==2.18.0
18
- Requires-Dist: google-cloud-secret-manager==2.20.1
19
- Requires-Dist: google-cloud-pubsub==2.22.0
20
- Requires-Dist: google-cloud-bigquery==3.18.0
21
- Requires-Dist: google-api-python-client==2.64.0
22
- Requires-Dist: google-auth==2.32.0
23
- Requires-Dist: packaging>=20.0.0
24
- Requires-Dist: pyarrow==14.0.1
25
- Requires-Dist: numpy<2,>=1.16.5
26
- Requires-Dist: pandas==1.5.3
27
- Requires-Dist: python-dotenv==0.10.3
28
- Requires-Dist: requests==2.32.3
29
- Requires-Dist: jinja2==3.1.6
30
- Requires-Dist: certifi==2024.7.4
31
- Requires-Dist: nci-cidc-schemas==0.27.16
32
- Dynamic: description
33
- Dynamic: description-content-type
34
- Dynamic: home-page
35
- Dynamic: license
36
- Dynamic: license-file
37
- Dynamic: requires-dist
38
- Dynamic: requires-python
39
- Dynamic: summary
40
-
41
1
  # NCI CIDC API <!-- omit in TOC -->
42
2
 
43
3
  The next generation of the CIDC API, reworked to use Google Cloud-managed services. This API is built with the Flask REST API framework backed by Google Cloud SQL, running on Google App Engine.
@@ -61,7 +21,7 @@ The next generation of the CIDC API, reworked to use Google Cloud-managed servic
61
21
 
62
22
  ## Install Python dependencies
63
23
 
64
- Python versions tested include 3.9 and 3.10. The current App Engine is using version 3.9 (see [app.prod.yaml](./app.prod.yaml)). You can use https://github.com/pyenv/pyenv to manage your python versions. Homebrew will also work, but you will have to be specific when you install packages with pip outside of virtual environments. On that note, it is recommended that you install your python dependencies in an isolated environment. For example,
24
+ Use Python version 3.13
65
25
 
66
26
  ```bash
67
27
  # make a virtual environment in the current direcory called "venv"
@@ -206,52 +166,24 @@ FLASK_APP=cidc_api.app:app flask db upgrade
206
166
 
207
167
  ### Connecting to a Cloud SQL database instance
208
168
 
209
- Install the [Cloud SQL Proxy](https://cloud.google.com/sql/docs/mysql/quickstart-proxy-test):
210
-
211
- ```bash
212
- sudo curl -o /usr/local/bin/cloud-sql-proxy https://storage.googleapis.com/cloud-sql-connectors/cloud-sql-proxy/v2.15.1/cloud-sql-proxy.darwin.amd64
213
- sudo chmod +x /usr/local/bin/cloud-sql-proxy
214
- mkdir ~/.cloudsql
215
- chmod 770 ~/.cloudsql
216
- ```
217
-
218
- Proxy to the dev Cloud SQL instance:
219
-
220
- ```bash
221
- cloud-sql-proxy --auto-iam-authn --address 127.0.0.1 --port 5432 nih-nci-cimac-cidc-dev2:us-east4:cidc-postgresql-dev2 &
222
- ```
223
-
224
- If you want to run the proxy alongside a postgres instance on localhost listening on 5432, change the port for the proxy to another port instead like 5433.
225
- If you experience auth errors, make sure your google cloud sdk is authenticated.
169
+ Make sure you are authenticated to gcloud:
226
170
 
227
171
  ```bash
228
172
  gcloud auth login
229
173
  gcloud auth application-default login
230
174
  ```
231
175
 
232
- To point an API running on localhost to the remote Postgres database, edit your `.env` file and comment out `POSTGRES_URI` and uncomment all environment variables prefixed with `CLOUD_SQL_`. Change CLOUD_SQL_SOCKET_DIR to contain a reference to your home directory. Restart your local API instance, and it will connect to the staging Cloud SQL instance via the local proxy.
233
-
234
- If you wish to connect to the staging Cloud SQL instance via the postgres REPL, download and run the CIDC sql proxy tool (a wrapper for `cloud_sql_proxy`):
235
-
236
- ```bash
237
- # Download the proxy
238
- curl https://raw.githubusercontent.com/NCI-CIDC/cidc-devops/master/scripts/cidc_sql_proxy.sh -o /usr/local/bin/cidc_sql_proxy
239
-
240
- # Prepare the proxy
241
- chmod +x /usr/local/bin/cidc_sql_proxy
242
- cidc_sql_proxy install
243
-
244
- # Run the proxy
245
- cidc_sql_proxy staging # or cidc_sql_proxy prod
246
- ```
176
+ In your .env file, comment out `POSTGRES_URI` and uncommment
177
+ `CLOUD_SQL_INSTANCE_NAME CLOUD_SQL_DB_USER CLOUD_SQL_DB_NAME` Replace `CLOUD_SQL_DB_USER` with your NIH email.
247
178
 
248
- ### Running database migrations
179
+ ### Creating/Running database migrations
249
180
 
250
181
  This project uses [`Flask Migrate`](https://flask-migrate.readthedocs.io/en/latest/) for managing database migrations. To create a new migration and upgrade the database specified in your `.env` config:
251
182
 
252
183
  ```bash
253
184
  export FLASK_APP=cidc_api/app.py
254
- # Generate the migration script
185
+ # First, make your changes to the model(s)
186
+ # Then, let flask automatically generate the db change. Double check the migration script!
255
187
  flask db migrate -m "<a message describing the changes in this migration>"
256
188
  # Apply changes to the database
257
189
  flask db upgrade
@@ -423,7 +355,7 @@ API authentication relies on _identity tokens_ generated by Auth0 to verify that
423
355
 
424
356
  - It is a well-formatted JWT.
425
357
  - It has not yet expired.
426
- - Its cryptographic signature is valid.
358
+ - Its cryptographic signature is valid.
427
359
 
428
360
  JWTs are a lot like passports - they convey personal information, they’re issued by a trusted entity, and they expire after a certain time. Moreover, like passports, JWTs **can be stolen** and used to impersonate someone. As such, JWTs should be kept private and treated sort of like short-lived passwords.
429
361
 
@@ -0,0 +1,58 @@
1
+ from os import environ
2
+
3
+ from flask import Flask
4
+ from flask_sqlalchemy import SQLAlchemy
5
+ from flask_migrate import Migrate, upgrade
6
+ from sqlalchemy.engine.url import URL
7
+ from sqlalchemy.orm import declarative_base
8
+ from google.cloud.sql.connector import Connector, IPTypes
9
+
10
+ from .secrets import get_secrets_manager
11
+
12
+ db = SQLAlchemy()
13
+ BaseModel = db.Model
14
+
15
+ connector = Connector()
16
+
17
+
18
+ def getconn():
19
+ return connector.connect(
20
+ environ.get("CLOUD_SQL_INSTANCE_NAME"),
21
+ "pg8000",
22
+ user=environ.get("CLOUD_SQL_DB_USER"),
23
+ password="xxxxx",
24
+ db=environ.get("CLOUD_SQL_DB_NAME"),
25
+ enable_iam_auth=True,
26
+ ip_type=IPTypes.PUBLIC,
27
+ )
28
+
29
+
30
+ def init_db(app: Flask):
31
+ """Connect `app` to the database and run migrations"""
32
+ db.init_app(app)
33
+ Migrate(app, db, app.config["MIGRATIONS_PATH"])
34
+ with app.app_context():
35
+ upgrade(app.config["MIGRATIONS_PATH"])
36
+
37
+
38
+ def get_sqlalchemy_database_uri(testing: bool = False) -> str:
39
+ """Get the PostgreSQL DB URI from environment variables"""
40
+
41
+ db_uri = environ.get("POSTGRES_URI")
42
+ if testing:
43
+ # Connect to the test database
44
+ db_uri = environ.get("TEST_POSTGRES_URI", "fake-conn-string")
45
+ elif not db_uri:
46
+ db_uri = f"postgresql+pg8000://{environ.get('CLOUD_SQL_DB_USER')}:xxx@/{environ.get('CLOUD_SQL_DB_NAME')}"
47
+
48
+ assert db_uri
49
+
50
+ return db_uri
51
+
52
+
53
+ # Use SQLALCHEMY_ENGINE_OPTIONS to connect to the cloud but use uri for local db
54
+ def cloud_connector(testing: bool = False):
55
+ if not testing and not environ.get("POSTGRES_URI"):
56
+ return {"creator": getconn}
57
+ else:
58
+ return {}
@@ -2,7 +2,7 @@ import sys
2
2
  import logging
3
3
  from typing import Optional
4
4
 
5
- from .settings import IS_GUNICORN, ENV
5
+ from .settings import IS_GUNICORN, ENV, TESTING
6
6
 
7
7
  # TODO: consider adding custom formatting that automatically adds request context
8
8
  # to all logs, like who the requesting user is and what URL they're accessing, e.g.
@@ -19,7 +19,10 @@ def get_logger(name: Optional[str]) -> logging.Logger:
19
19
  logger.setLevel(gunicorn_logger.level)
20
20
  else:
21
21
  handler = logging.StreamHandler(sys.stdout)
22
- handler.setFormatter(logging.Formatter("[%(asctime)s] [%(threadName)s] [%(levelname)s]: %(message)s"))
22
+ formatter = logging.Formatter("[%(asctime)s] [%(threadName)s] [%(levelname)s] [%(name)s]: %(message)s")
23
+ handler.setFormatter(formatter)
23
24
  logger.addHandler(handler)
24
25
  logger.setLevel(logging.DEBUG if ENV == "dev" else logging.INFO)
26
+ if not TESTING:
27
+ logger.propagate = False
25
28
  return logger
@@ -10,7 +10,7 @@ from os import environ, path, mkdir
10
10
 
11
11
  from dotenv import load_dotenv
12
12
 
13
- from .db import get_sqlalchemy_database_uri
13
+ from .db import get_sqlalchemy_database_uri, cloud_connector
14
14
  from .secrets import get_secrets_manager
15
15
 
16
16
  load_dotenv()
@@ -54,6 +54,7 @@ else:
54
54
 
55
55
  ### Configure Flask-SQLAlchemy ###
56
56
  SQLALCHEMY_DATABASE_URI = get_sqlalchemy_database_uri(TESTING)
57
+ SQLALCHEMY_ENGINE_OPTIONS = cloud_connector(TESTING)
57
58
  SQLALCHEMY_TRACK_MODIFICATIONS = False
58
59
  SQLALCHEMY_ECHO = False # Set to True to emit all compiled sql statements
59
60
 
@@ -70,6 +71,7 @@ GOOGLE_INTAKE_BUCKET = environ["GOOGLE_INTAKE_BUCKET"]
70
71
  GOOGLE_UPLOAD_BUCKET = environ["GOOGLE_UPLOAD_BUCKET"]
71
72
  GOOGLE_UPLOAD_TOPIC = environ["GOOGLE_UPLOAD_TOPIC"]
72
73
  GOOGLE_ACL_DATA_BUCKET = environ["GOOGLE_ACL_DATA_BUCKET"]
74
+ GOOGLE_CLINICAL_DATA_BUCKET = environ["GOOGLE_CLINICAL_DATA_BUCKET"]
73
75
  GOOGLE_EPHEMERAL_BUCKET = environ["GOOGLE_EPHEMERAL_BUCKET"]
74
76
  GOOGLE_UPLOAD_ROLE = environ["GOOGLE_UPLOAD_ROLE"]
75
77
  GOOGLE_LISTER_ROLE = environ["GOOGLE_LISTER_ROLE"]
@@ -80,6 +82,8 @@ GOOGLE_PATIENT_SAMPLE_TOPIC = environ["GOOGLE_PATIENT_SAMPLE_TOPIC"]
80
82
  GOOGLE_EMAILS_TOPIC = environ["GOOGLE_EMAILS_TOPIC"]
81
83
  GOOGLE_ARTIFACT_UPLOAD_TOPIC = environ["GOOGLE_ARTIFACT_UPLOAD_TOPIC"]
82
84
  GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC = environ["GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC"]
85
+ GOOGLE_HL_CLINICAL_VALIDATION_TOPIC = environ["GOOGLE_HL_CLINICAL_VALIDATION_TOPIC"]
86
+ GOOGLE_DL_CLINICAL_VALIDATION_TOPIC = environ["GOOGLE_DL_CLINICAL_VALIDATION_TOPIC"]
83
87
  GOOGLE_AND_OPERATOR = " && "
84
88
  GOOGLE_OR_OPERATOR = " || "
85
89
 
@@ -104,5 +108,9 @@ else:
104
108
  IS_EMAIL_ON = environ.get("IS_EMAIL_ON")
105
109
 
106
110
 
111
+ # notification emails
112
+ CIDC_CLINICAL_DATA_EMAIL = environ.get("CIDC_CLINICAL_DATA_EMAIL")
113
+ CIDC_ADMIN_EMAIL = environ.get("CIDC_ADMIN_EMAIL")
114
+
107
115
  # Accumulate all constants defined in this file in a single dictionary
108
116
  SETTINGS = {k: v for k, v in globals().items() if k.isupper()}
@@ -1,3 +1,5 @@
1
1
  from .models import *
2
2
  from .files import *
3
3
  from .schemas import *
4
+
5
+ from cidc_api.models.db.base_orm import BaseORM
@@ -0,0 +1,15 @@
1
+ from cidc_api.models.pydantic.stage2 import all_models
2
+
3
+ standard_data_categories = [model.__data_category__ for model in all_models if hasattr(model, "__data_category__")]
4
+
5
+
6
+ # A class to hold the representation of a trial's dataset all at once
7
+ class Dataset(dict):
8
+ def __init__(self, *args, **kwargs):
9
+ super().__init__(*args, **kwargs)
10
+ for data_category in standard_data_categories:
11
+ self[data_category] = []
12
+
13
+
14
+ # Maps data categories like "treatment" to their associated pydantic model
15
+ data_category_to_model = {model.__data_category__: model for model in all_models if hasattr(model, "__data_category__")}
@@ -0,0 +1,25 @@
1
+ from typing import Self
2
+
3
+ from sqlalchemy_mixins import SerializeMixin, ReprMixin
4
+
5
+ from cidc_api.config.db import db
6
+
7
+
8
+ class BaseORM(db.Model, ReprMixin, SerializeMixin):
9
+ __abstract__ = True
10
+ __repr__ = ReprMixin.__repr__
11
+
12
+ def merge(self, d: dict) -> Self:
13
+ """Merge keys and values from dict d into this model, overwriting as necessary."""
14
+ for key, value in d.items():
15
+ setattr(self, key, value)
16
+ return self
17
+
18
+ def clone(self) -> "BaseORM":
19
+ """Clones a SQLAlchemy ORM object, excluding primary keys."""
20
+ mapper = self.__mapper__
21
+ new_instance = self.__class__()
22
+ for column in mapper.columns:
23
+ if not column.primary_key:
24
+ setattr(new_instance, column.key, getattr(self, column.key))
25
+ return new_instance
@@ -993,4 +993,35 @@ details_dict = {
993
993
  "",
994
994
  "",
995
995
  ),
996
+ # scrna
997
+ "/scrnaseq/samples_metadata.csv": FileDetails("source", "", ""),
998
+ "/scrnaseq/read_1.gz": FileDetails("source", "", ""),
999
+ "/scrnaseq/read_2.gz": FileDetails("source", "", ""),
1000
+ "/scrnaseq_analysis/samples_metadata.csv": FileDetails("source", "", ""),
1001
+ "/scrnaseq_analysis/config.yaml": FileDetails("source", "", ""),
1002
+ "/scrnaseq_analysis/R_package_versions.csv": FileDetails("source", "", ""),
1003
+ "/scrnaseq_analysis/integration.rds": FileDetails("source", "", ""),
1004
+ "/scrnaseq_analysis/integration_heatmap_plots.zip": FileDetails("source", "", ""),
1005
+ "/scrnaseq_analysis/integration_markers.zip": FileDetails("source", "", ""),
1006
+ "/scrnaseq_analysis/integration_split_percent_plots.zip": FileDetails("source", "", ""),
1007
+ "/scrnaseq_analysis/integration_split_umap_plots.zip": FileDetails("source", "", ""),
1008
+ "/scrnaseq_analysis/integration_umap_plots.zip": FileDetails("source", "", ""),
1009
+ "/scrnaseq_analysis/clustering.rds": FileDetails("source", "", ""),
1010
+ "/scrnaseq_analysis/report.html": FileDetails("source", "", ""),
1011
+ "/scrnaseq_analysis/star_sorted_by_cord.bam": FileDetails("source", "", ""),
1012
+ "/scrnaseq_analysis/star_sorted_by_cord.bam.bai": FileDetails("source", "", ""),
1013
+ "/scrnaseq_analysis/log_final.out": FileDetails("source", "", ""),
1014
+ "/scrnaseq_analysis/log.out": FileDetails("source", "", ""),
1015
+ "/scrnaseq_analysis/log_progress.out": FileDetails("source", "", ""),
1016
+ "/scrnaseq_analysis/sj_out.tab": FileDetails("source", "", ""),
1017
+ "/scrnaseq_analysis/barcodes.stats": FileDetails("source", "", ""),
1018
+ "/scrnaseq_analysis/gene_features.stats": FileDetails("source", "", ""),
1019
+ "/scrnaseq_analysis/gene_summary.csv": FileDetails("source", "", ""),
1020
+ "/scrnaseq_analysis/gene_umi_per_cell_sorted.txt": FileDetails("source", "", ""),
1021
+ "/scrnaseq_analysis/gene_filtered_features.tsv": FileDetails("source", "", ""),
1022
+ "/scrnaseq_analysis/gene_filtered_barcodes.tsv": FileDetails("source", "", ""),
1023
+ "/scrnaseq_analysis/gene_filtered_matrix.mtx": FileDetails("source", "", ""),
1024
+ "/scrnaseq_analysis/gene_raw_features.tsv": FileDetails("source", "", ""),
1025
+ "/scrnaseq_analysis/gene_raw_barcodes.tsv": FileDetails("source", "", ""),
1026
+ "/scrnaseq_analysis/gene_raw_matrix.mtx": FileDetails("source", "", ""),
996
1027
  }
@@ -346,6 +346,36 @@ assay_facets: Facets = {
346
346
  "H and E file from MIBI analysis",
347
347
  ),
348
348
  },
349
+ "scRNA": {
350
+ "Samples Metadata": FacetConfig(["/scrnaseq/samples_metadata.csv"], "Sample metadata for scRNA run"),
351
+ "Read 1 gz": FacetConfig(["/scrnaseq/read_1.gz"], "Gz file for read 1"),
352
+ "Read 2 gz": FacetConfig(["/scrnaseq/read_2.gz"], "Gz file for read 2"),
353
+ },
354
+ "Visium": {
355
+ "Samples Metadata": FacetConfig(["/visium/samples_metadata.csv"], "Sample metadata for visium run"),
356
+ "Read 1 fastq gz": FacetConfig(["/visium/R1_001.fastq.gz"], "Gz file for read 1"),
357
+ "Read 2 fastq gz": FacetConfig(["/visium/R2_001.fastq.gz"], "Gz file for read 2"),
358
+ "loupe alignment file": FacetConfig(["/visium/loupe_alignment_file.json"]),
359
+ "brightfield image": FacetConfig(["/visium/brightfield.tiff"]),
360
+ "dark image": FacetConfig(["/visium/dark_image.tiff"]),
361
+ "colorized image": FacetConfig(["/visium/colorized.tiff"]),
362
+ "cytassist image": FacetConfig(["/visium/cytassist.tiff"]),
363
+ },
364
+ "NULISA": {
365
+ "Metadata": FacetConfig(["/nulisa/metadata.csv", "Metadata for NULISA run"]),
366
+ "NPQ File": FacetConfig(["/nulisa/npq_file.csv", "NPQ file for NULISA run"]),
367
+ "Raw Counts File": FacetConfig(["/nulisa/raw_counts_file.csv", "Raw counts file for NULISA run"]),
368
+ },
369
+ "MALDI Glycan": {
370
+ "Metadata": FacetConfig(["/maldi_glycan/metadata.tsv", "Metadata for a MALDI Glycan run"]),
371
+ "Molecular Assignments File": FacetConfig(
372
+ ["/maldi_glycan/molecular_assignments.tsv", "Molecular Assignments file for a MALDI Glycan run"]
373
+ ),
374
+ "IBD File": FacetConfig(["/maldi_glycan/ibd_file.ibd", "IBD file for MALDI Glycan run"]),
375
+ "IMZML File": FacetConfig(["/maldi_glycan/imzml_file.imzml", "IMZML file for MALDI Glycan run"]),
376
+ "Channels": FacetConfig(["/maldi_glycan/channels.csv", "Channels csv file for MALDI Glycan run"]),
377
+ "Tiff Zip": FacetConfig(["/maldi_glycan/tiff.zip", "Tiff zip for MALDI Glycan run"]),
378
+ },
349
379
  "mIHC": {
350
380
  "Samples Report": FacetConfig(["/mihc/sample_report.csv"], "Samples report for mIHC run"),
351
381
  "Multitiffs": FacetConfig(["/mihc/multitiffs.tar.gz"], "Multi Tiffs file from mIHC run"),
@@ -409,6 +439,11 @@ assay_facets: Facets = {
409
439
  "Analysis files for all samples run on the Olink platform in the trial.",
410
440
  ),
411
441
  },
442
+ "Olink HT": {
443
+ "Batch-Level Combined File": FacetConfig(["/olink_ht/batch_level_combined_file.xlsx"]),
444
+ "Study-Level Combined File": FacetConfig(["/olink_ht/study_level_combined_file.xlsx"]),
445
+ "Npx Run File": FacetConfig(["/olink_ht/npx_run_file.xlsx"]),
446
+ },
412
447
  "IHC": {
413
448
  "Images": FacetConfig(["/ihc/ihc_image."]),
414
449
  "Combined Markers": FacetConfig(["csv|ihc marker combined"]),
@@ -549,6 +584,48 @@ analysis_ready_facets = {
549
584
  "WES Analysis": FacetConfig(["/wes/analysis/report.tar.gz"]),
550
585
  "TCR": FacetConfig(["/tcr_analysis/report_trial.tar.gz"]),
551
586
  "mIF": FacetConfig(["/mif/roi_/cell_seg_data.txt"]),
587
+ "scRNA": FacetConfig(
588
+ [
589
+ "/scrnaseq_analysis/samples_metadata.csv",
590
+ "/scrnaseq_analysis/config.yaml",
591
+ "/scrnaseq_analysis/R_package_versions.csv",
592
+ "/scrnaseq_analysis/integration.rds",
593
+ "/scrnaseq_analysis/integration_heatmap_plots.zip",
594
+ "/scrnaseq_analysis/integration_markers.zip",
595
+ "/scrnaseq_analysis/integration_split_percent_plots.zip",
596
+ "/scrnaseq_analysis/integration_split_umap_plots.zip",
597
+ "/scrnaseq_analysis/integration_umap_plots.zip",
598
+ "/scrnaseq_analysis/clustering.rds",
599
+ "/scrnaseq_analysis/report.html",
600
+ "/scrnaseq_analysis/star_sorted_by_cord.bam",
601
+ "/scrnaseq_analysis/star_sorted_by_cord.bam.bai",
602
+ "/scrnaseq_analysis/log_final.out",
603
+ "/scrnaseq_analysis/log.out",
604
+ "/scrnaseq_analysis/log_progress.out",
605
+ "/scrnaseq_analysis/sj_out.tab",
606
+ "/scrnaseq_analysis/barcodes.stats",
607
+ "/scrnaseq_analysis/gene_features.stats",
608
+ "/scrnaseq_analysis/gene_summary.csv",
609
+ "/scrnaseq_analysis/gene_umi_per_cell_sorted.txt",
610
+ "/scrnaseq_analysis/gene_filtered_features.tsv",
611
+ "/scrnaseq_analysis/gene_filtered_barcodes.tsv",
612
+ "/scrnaseq_analysis/gene_filtered_matrix.mtx",
613
+ "/scrnaseq_analysis/gene_raw_features.tsv",
614
+ "/scrnaseq_analysis/gene_raw_barcodes.tsv",
615
+ "/scrnaseq_analysis/gene_raw_matrix.mtx",
616
+ ]
617
+ ),
618
+ "Visium": FacetConfig(
619
+ [
620
+ "/visium_analysis/samples_metadata.csv",
621
+ "/visium_analysis/config.yaml",
622
+ "/visium_analysis/R_package_versions.csv",
623
+ "/visium_analysis/merged.rds",
624
+ "/visium_analysis/spatial_variable_features.rds",
625
+ "/visium_analysis/report.html",
626
+ "/visium_analysis/visium_spaceranger_output.zip",
627
+ ]
628
+ ),
552
629
  }
553
630
 
554
631
  facets_dict: Dict[str, Facets] = {
@@ -91,15 +91,11 @@ def migration_session():
91
91
  session.close()
92
92
 
93
93
 
94
- def run_metadata_migration(
95
- metadata_migration: Callable[[dict], MigrationResult], use_upload_jobs_table: bool
96
- ):
94
+ def run_metadata_migration(metadata_migration: Callable[[dict], MigrationResult], use_upload_jobs_table: bool):
97
95
  """Migrate trial metadata, upload job patches, and downloadable files according to `metadata_migration`"""
98
96
  with migration_session() as (session, task_queue):
99
97
  try:
100
- _run_metadata_migration(
101
- metadata_migration, use_upload_jobs_table, task_queue, session
102
- )
98
+ _run_metadata_migration(metadata_migration, use_upload_jobs_table, task_queue, session)
103
99
  except:
104
100
  traceback.print_exc()
105
101
  raise
@@ -122,9 +118,7 @@ class ManifestUploads(CommonColumns):
122
118
  __tablename__ = "manifest_uploads"
123
119
 
124
120
 
125
- def _select_successful_assay_uploads(
126
- use_upload_jobs_table: bool, session: Session
127
- ) -> List[UploadJobs]:
121
+ def _select_successful_assay_uploads(use_upload_jobs_table: bool, session: Session) -> List[UploadJobs]:
128
122
  if use_upload_jobs_table:
129
123
  return (
130
124
  session.query(UploadJobs)
@@ -133,21 +127,12 @@ def _select_successful_assay_uploads(
133
127
  .all()
134
128
  )
135
129
 
136
- return (
137
- session.query(AssayUploads)
138
- .filter_by(status=UploadJobStatus.MERGE_COMPLETED.value)
139
- .with_for_update()
140
- .all()
141
- )
130
+ return session.query(AssayUploads).filter_by(status=UploadJobStatus.MERGE_COMPLETED.value).with_for_update().all()
142
131
 
143
132
 
144
- def _select_manifest_uploads(
145
- use_upload_jobs_table: bool, session: Session
146
- ) -> List[UploadJobs]:
133
+ def _select_manifest_uploads(use_upload_jobs_table: bool, session: Session) -> List[UploadJobs]:
147
134
  if use_upload_jobs_table:
148
- return (
149
- session.query(UploadJobs).filter_by(multifile=False).with_for_update().all()
150
- )
135
+ return session.query(UploadJobs).filter_by(multifile=False).with_for_update().all()
151
136
 
152
137
  return session.query(ManifestUploads).with_for_update().all()
153
138
 
@@ -188,21 +173,15 @@ def _run_metadata_migration(
188
173
 
189
174
  # Regenerate additional metadata from the migrated clinical trial
190
175
  # metadata object.
191
- print(
192
- f"Regenerating additional metadata for artifact with uuid {artifact['upload_placeholder']}"
193
- )
176
+ print(f"Regenerating additional metadata for artifact with uuid {artifact['upload_placeholder']}")
194
177
  artifact_path = uuid_path_map[artifact["upload_placeholder"]]
195
- df.additional_metadata = get_source(
196
- migration.result, artifact_path, skip_last=True
197
- )[1]
178
+ df.additional_metadata = get_source(migration.result, artifact_path, skip_last=True)[1]
198
179
 
199
180
  # If the GCS URI has changed, rename the blob
200
181
  # makes call to bucket.rename_blob
201
182
  new_gcs_uri = artifact["object_url"]
202
183
  if old_gcs_uri != new_gcs_uri:
203
- print(
204
- f"Encountered GCS data bucket artifact URI to update: {old_gcs_uri}"
205
- )
184
+ print(f"Encountered GCS data bucket artifact URI to update: {old_gcs_uri}")
206
185
  renamer = PieceOfWork(
207
186
  partial(
208
187
  rename_gcs_blob,
@@ -220,9 +199,7 @@ def _run_metadata_migration(
220
199
  gcs_tasks.schedule(renamer)
221
200
 
222
201
  # Migrate all assay upload successes
223
- successful_assay_uploads = _select_successful_assay_uploads(
224
- use_upload_jobs_table, session
225
- )
202
+ successful_assay_uploads = _select_successful_assay_uploads(use_upload_jobs_table, session)
226
203
  for upload in successful_assay_uploads:
227
204
  print(f"Running metadata migration for assay upload: {upload.id}")
228
205
  if use_upload_jobs_table:
@@ -248,9 +225,7 @@ def _run_metadata_migration(
248
225
  if old_target_uri in migration.file_updates:
249
226
  new_target_uri = migration.file_updates[old_target_uri]["object_url"]
250
227
  if old_target_uri != new_target_uri:
251
- print(
252
- f"Encountered GCS upload bucket artifact URI to update: {old_upload_uri}"
253
- )
228
+ print(f"Encountered GCS upload bucket artifact URI to update: {old_upload_uri}")
254
229
  new_upload_uri = "/".join([new_target_uri, upload_timestamp])
255
230
  renamer = PieceOfWork(
256
231
  partial(
@@ -325,7 +300,5 @@ def republish_artifact_uploads():
325
300
  with migration_session() as (session, _):
326
301
  files = session.query(DownloadableFiles).all()
327
302
  for f in files:
328
- print(
329
- f"Publishing to 'artifact_upload' topic for downloadable file with in bucket url {f.object_url}"
330
- )
303
+ print(f"Publishing to 'artifact_upload' topic for downloadable file with in bucket url {f.object_url}")
331
304
  publish_artifact_upload(f.object_url)