PyPI - nci-cidc-api-modules - Versions diffs - 1.2.15__tar.gz → 1.2.19__tar.gz - Mend

nci-cidc-api-modules 1.2.15tar.gz → 1.2.19tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{nci_cidc_api_modules-1.2.15/nci_cidc_api_modules.egg-info → nci_cidc_api_modules-1.2.19}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nci_cidc_api_modules
-Version: 1.2.15
+Version: 1.2.19
 Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
 Home-page: https://github.com/NCI-CIDC/cidc-api-gae
 License: MIT license
@@ -12,7 +12,7 @@ Requires-Dist: cloud-sql-python-connector[pg8000]>=1.18.5
 Requires-Dist: flask>=3.1.2
 Requires-Dist: flask-migrate>=4.1.0
 Requires-Dist: flask-sqlalchemy>=3.1.1
-Requires-Dist: google-auth>=2.42.0
+Requires-Dist: google-auth==2.41.1
 Requires-Dist: google-api-python-client>=2.185.0
 Requires-Dist: google-cloud-bigquery>=3.38.0
 Requires-Dist: google-cloud-pubsub>=2.32.0
@@ -28,6 +28,7 @@ Requires-Dist: pyarrow>=22.0.0
 Requires-Dist: python-dotenv>=1.2.1
 Requires-Dist: requests>=2.32.5
 Requires-Dist: sqlalchemy>=2.0.44
+Requires-Dist: sqlalchemy-mixins~=2.0.5
 Requires-Dist: werkzeug>=3.1.3
 Requires-Dist: nci-cidc-schemas==0.28.8
 Dynamic: description
@@ -217,13 +218,14 @@ gcloud auth application-default login
 In your .env file, comment out `POSTGRES_URI` and uncommment
 `CLOUD_SQL_INSTANCE_NAME CLOUD_SQL_DB_USER CLOUD_SQL_DB_NAME` Replace `CLOUD_SQL_DB_USER` with your NIH email.
-### Running database migrations
+### Creating/Running database migrations
 This project uses [`Flask Migrate`](https://flask-migrate.readthedocs.io/en/latest/) for managing database migrations. To create a new migration and upgrade the database specified in your `.env` config:
 ```bash
 export FLASK_APP=cidc_api/app.py
-# Generate the migration script
+# First, make your changes to the model(s)
+# Then, let flask automatically generate the db change. Double check the migration script!
 flask db migrate -m "<a message describing the changes in this migration>"
 # Apply changes to the database
 flask db upgrade

{nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/README.md RENAMED Viewed

@@ -176,13 +176,14 @@ gcloud auth application-default login
 In your .env file, comment out `POSTGRES_URI` and uncommment
 `CLOUD_SQL_INSTANCE_NAME CLOUD_SQL_DB_USER CLOUD_SQL_DB_NAME` Replace `CLOUD_SQL_DB_USER` with your NIH email.
-### Running database migrations
+### Creating/Running database migrations
 This project uses [`Flask Migrate`](https://flask-migrate.readthedocs.io/en/latest/) for managing database migrations. To create a new migration and upgrade the database specified in your `.env` config:
 ```bash
 export FLASK_APP=cidc_api/app.py
-# Generate the migration script
+# First, make your changes to the model(s)
+# Then, let flask automatically generate the db change. Double check the migration script!
 flask db migrate -m "<a message describing the changes in this migration>"
 # Apply changes to the database
 flask db upgrade

{nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/config/db.py RENAMED Viewed

@@ -10,8 +10,7 @@ from google.cloud.sql.connector import Connector, IPTypes
 from .secrets import get_secrets_manager
 db = SQLAlchemy()
-BaseModel = declarative_base()
-db.Model = BaseModel
+BaseModel = db.Model
 connector = Connector()
@@ -31,7 +30,6 @@ def getconn():
 def init_db(app: Flask):
     """Connect `app` to the database and run migrations"""
     db.init_app(app)
-    db.Model = BaseModel
     Migrate(app, db, app.config["MIGRATIONS_PATH"])
     with app.app_context():
         upgrade(app.config["MIGRATIONS_PATH"])

{nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/models/__init__.py RENAMED Viewed

@@ -1,3 +1,5 @@
 from .models import *
 from .files import *
 from .schemas import *
+from cidc_api.models.db.base_orm import BaseORM

nci_cidc_api_modules-1.2.19/cidc_api/models/data.py ADDED Viewed

@@ -0,0 +1,15 @@
+from cidc_api.models.pydantic.stage2 import all_models
+standard_data_categories = [model.__data_category__ for model in all_models if hasattr(model, "__data_category__")]
+# A class to hold the representation of a trial's dataset all at once
+class Dataset(dict):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        for data_category in standard_data_categories:
+            self[data_category] = []
+# Maps data categories like "treatment" to their associated pydantic model
+data_category_to_model = {model.__data_category__: model for model in all_models if hasattr(model, "__data_category__")}

{nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/models/migrations.py RENAMED Viewed

@@ -91,15 +91,11 @@ def migration_session():
         session.close()
-def run_metadata_migration(
-    metadata_migration: Callable[[dict], MigrationResult], use_upload_jobs_table: bool
-):
+def run_metadata_migration(metadata_migration: Callable[[dict], MigrationResult], use_upload_jobs_table: bool):
     """Migrate trial metadata, upload job patches, and downloadable files according to `metadata_migration`"""
     with migration_session() as (session, task_queue):
         try:
-            _run_metadata_migration(
-                metadata_migration, use_upload_jobs_table, task_queue, session
-            )
+            _run_metadata_migration(metadata_migration, use_upload_jobs_table, task_queue, session)
         except:
             traceback.print_exc()
             raise
@@ -122,9 +118,7 @@ class ManifestUploads(CommonColumns):
     __tablename__ = "manifest_uploads"
-def _select_successful_assay_uploads(
-    use_upload_jobs_table: bool, session: Session
-) -> List[UploadJobs]:
+def _select_successful_assay_uploads(use_upload_jobs_table: bool, session: Session) -> List[UploadJobs]:
     if use_upload_jobs_table:
         return (
             session.query(UploadJobs)
@@ -133,21 +127,12 @@ def _select_successful_assay_uploads(
             .all()
         )
-    return (
-        session.query(AssayUploads)
-        .filter_by(status=UploadJobStatus.MERGE_COMPLETED.value)
-        .with_for_update()
-        .all()
-    )
+    return session.query(AssayUploads).filter_by(status=UploadJobStatus.MERGE_COMPLETED.value).with_for_update().all()
-def _select_manifest_uploads(
-    use_upload_jobs_table: bool, session: Session
-) -> List[UploadJobs]:
+def _select_manifest_uploads(use_upload_jobs_table: bool, session: Session) -> List[UploadJobs]:
     if use_upload_jobs_table:
-        return (
-            session.query(UploadJobs).filter_by(multifile=False).with_for_update().all()
-        )
+        return session.query(UploadJobs).filter_by(multifile=False).with_for_update().all()
     return session.query(ManifestUploads).with_for_update().all()
@@ -188,21 +173,15 @@ def _run_metadata_migration(
             # Regenerate additional metadata from the migrated clinical trial
             # metadata object.
-            print(
-                f"Regenerating additional metadata for artifact with uuid {artifact['upload_placeholder']}"
-            )
+            print(f"Regenerating additional metadata for artifact with uuid {artifact['upload_placeholder']}")
             artifact_path = uuid_path_map[artifact["upload_placeholder"]]
-            df.additional_metadata = get_source(
-                migration.result, artifact_path, skip_last=True
-            )[1]
+            df.additional_metadata = get_source(migration.result, artifact_path, skip_last=True)[1]
             # If the GCS URI has changed, rename the blob
             # makes call to bucket.rename_blob
             new_gcs_uri = artifact["object_url"]
             if old_gcs_uri != new_gcs_uri:
-                print(
-                    f"Encountered GCS data bucket artifact URI to update: {old_gcs_uri}"
-                )
+                print(f"Encountered GCS data bucket artifact URI to update: {old_gcs_uri}")
                 renamer = PieceOfWork(
                     partial(
                         rename_gcs_blob,
@@ -220,9 +199,7 @@ def _run_metadata_migration(
                 gcs_tasks.schedule(renamer)
     # Migrate all assay upload successes
-    successful_assay_uploads = _select_successful_assay_uploads(
-        use_upload_jobs_table, session
-    )
+    successful_assay_uploads = _select_successful_assay_uploads(use_upload_jobs_table, session)
     for upload in successful_assay_uploads:
         print(f"Running metadata migration for assay upload: {upload.id}")
         if use_upload_jobs_table:
@@ -248,9 +225,7 @@ def _run_metadata_migration(
             if old_target_uri in migration.file_updates:
                 new_target_uri = migration.file_updates[old_target_uri]["object_url"]
                 if old_target_uri != new_target_uri:
-                    print(
-                        f"Encountered GCS upload bucket artifact URI to update: {old_upload_uri}"
-                    )
+                    print(f"Encountered GCS upload bucket artifact URI to update: {old_upload_uri}")
                     new_upload_uri = "/".join([new_target_uri, upload_timestamp])
                     renamer = PieceOfWork(
                         partial(
@@ -325,7 +300,5 @@ def republish_artifact_uploads():
     with migration_session() as (session, _):
         files = session.query(DownloadableFiles).all()
         for f in files:
-            print(
-                f"Publishing to 'artifact_upload' topic for downloadable file with in bucket url {f.object_url}"
-            )
+            print(f"Publishing to 'artifact_upload' topic for downloadable file with in bucket url {f.object_url}")
             publish_artifact_upload(f.object_url)

{nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.19}/cidc_api/models/models.py RENAMED Viewed

@@ -26,6 +26,7 @@ __all__ = [
     "FileValidationErrors",
     "IngestionJobs",
     "JobFileCategories",
+    "CategoryDataElements",
     "ValidationConfigs",
     "TRIAL_APPENDIX_A",
     "TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER",
@@ -95,7 +96,7 @@ from sqlalchemy import (
     String,
     Table,
 )
-from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.dialects.postgresql import JSONB, UUID, CITEXT
 from sqlalchemy.engine import ResultProxy
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.hybrid import hybrid_property
@@ -131,7 +132,6 @@ from ..config.settings import (
     MAX_PAGINATION_PAGE_SIZE,
     TESTING,
     INACTIVE_USER_DAYS,
-    GOOGLE_CLINICAL_DATA_BUCKET,
 )
 from ..shared import emails
 from ..shared.gcloud_client import (
@@ -145,7 +145,6 @@ from ..shared.gcloud_client import (
     revoke_intake_access,
     revoke_lister_access,
     revoke_bigquery_access,
-    gcs_xlsx_or_csv_file_to_pandas_dataframe,
 )
 os.environ["TZ"] = "UTC"
@@ -382,7 +381,7 @@ class Users(CommonColumns):
     last_n = Column(String)
     organization = Column(Enum(*ORGS, name="orgs"))
     approval_date = Column(DateTime)
-    role = Column(Enum(*ROLES, name="role"))
+    role = Column(Enum(*ROLES, name="roles"))
     disabled = Column(Boolean, default=False, server_default="false")
     @validates("approval_date")
@@ -607,6 +606,22 @@ class Permissions(CommonColumns):
             unique=True,
             postgresql_where=file_group_id.isnot(None),
         ),
+        Index(
+            "unique_trial_id_upload_type_is_null_perms",
+            "granted_to_user",
+            "trial_id",
+            literal_column("(upload_type IS NULL)"),
+            unique=True,
+            postgresql_where="(upload_type IS NULL)",
+        ),
+        Index(
+            "unique_upload_type_trial_id_is_null_perms",
+            "granted_to_user",
+            literal_column("(trial_id IS NULL)"),
+            "upload_type",
+            unique=True,
+            postgresql_where="(trial_id IS NULL)",
+        ),
     )
     # Shorthand to make code related to trial- and upload-type-level permissions
@@ -2294,7 +2309,7 @@ class DownloadableFiles(CommonColumns):
     additional_metadata = Column(JSONB, nullable=False)
     # TODO rename upload_type, because we store manifests in there too.
     # NOTE: this column actually has type CITEXT.
-    upload_type = Column(String, nullable=False)
+    upload_type = Column(CITEXT, nullable=False)
     md5_hash = Column(String, nullable=True)
     crc32c_hash = Column(String, nullable=True)
     trial_id = Column(String, nullable=False)
@@ -2314,7 +2329,7 @@ class DownloadableFiles(CommonColumns):
     #   used instead of data_format.
     # The columns are left as optional for short term backwards compatibility.
     file_name = Column(String, nullable=True)
-    data_format = Column(String, nullable=True)
+    data_format = Column(CITEXT, nullable=True)
     file_groups = relationship(
         "FileGroups",
@@ -3262,11 +3277,11 @@ class PreprocessedFiles(CommonColumns):
         ),
     )
-    file_name = Column(String)
-    object_url = Column(String)
+    file_name = Column(String, nullable=False)
+    object_url = Column(String, nullable=False)
     job_id = Column(Integer)
-    file_category = Column(String)
-    uploader_email = Column(String)
+    file_category = Column(String, nullable=False)
+    uploader_email = Column(String, nullable=False)
     status = Column(String)
     version = Column(Integer)
     released_version = Column(String)
@@ -3400,6 +3415,15 @@ class PreprocessedFiles(CommonColumns):
         return query.filter(cls.job_id.is_(None))
+    @with_default_session
+    def category_description(self, session: Session):
+        category = (
+            session.query(JobFileCategories)
+            .filter(JobFileCategories.job_id == self.job_id, JobFileCategories.category == self.file_category)
+            .first()
+        )
+        return category.description if category else None
 INGESTION_JOB_STATUSES = [
     "DRAFT",
@@ -3434,7 +3458,7 @@ class IngestionJobs(CommonColumns):
         ),
     )
-    status = Column("status", Enum(*INGESTION_JOB_STATUSES, name="status"), nullable=False)
+    status = Column("status", Enum(*INGESTION_JOB_STATUSES, name="ingestion_job_status"), nullable=False)
     trial_id = Column(String, nullable=False)
     version = Column(Integer, nullable=False)
     pending = Column(Boolean, nullable=False, default=False)
@@ -3450,30 +3474,9 @@ class IngestionJobs(CommonColumns):
     @with_default_session
     def transition_status(self, status: str, session: Session):
-        # create required categories after opening job for submission
-        if self.status == "DRAFT" and status == "INITIAL SUBMISSION":
-            for category in self.derive_required_categories_from_appendix_a():
-                JobFileCategories.create(category=category, job_id=self.id, type="required")
         self.status = status
         self.update(session=session)
-    def derive_required_categories_from_appendix_a(self) -> List:
-        appendix_a = PreprocessedFiles.get_files_by_category_and_status(TRIAL_APPENDIX_A, "current", job_id=self.id)[0]
-        df = gcs_xlsx_or_csv_file_to_pandas_dataframe(GOOGLE_CLINICAL_DATA_BUCKET, appendix_a.object_url)
-        categories = []
-        headers_ended = False
-        for _index, row in df.iterrows():
-            cell = str(row.iloc[0])
-            if headers_ended:
-                if cell != "nan" and cell not in categories:
-                    categories.append(cell)
-            elif cell.lower() == TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER.lower():
-                headers_ended = True
-        if "data_dictionary" not in categories:
-            # Ensure Data_Dictionary is always a required file category
-            categories.append("data_dictionary")
-        return categories
     @classmethod
     @with_default_session
     def atomic_set_job_as_pending(cls, job_id: int, session: Session) -> Boolean:
@@ -3544,15 +3547,18 @@ class JobFileCategories(CommonColumns):
             ["ingestion_jobs.id"],
         ),
         Index(
-            "idx_categories_job_id" "job_id",
+            "idx_categories_job_id",
+            "job_id",
             "category",
             unique=True,
         ),
     )
     category = Column(String)
-    job_id = Column(Integer)
-    type = Column(Enum("required", "optional", name="type"))
+    description = Column(String)
+    job_id = Column(Integer, nullable=False)
+    type = Column(Enum("required", "optional", name="type"), nullable=False)
+    is_custom = Column(Boolean, nullable=False, default=False, server_default="false")
     @staticmethod
     @with_default_session
@@ -3560,12 +3566,16 @@ class JobFileCategories(CommonColumns):
         category: str,
         job_id: int,
         type: str,
+        description: str = None,
+        is_custom: bool = False,
         session: Session = None,
     ):
         new_category = JobFileCategories(
             category=category,
             job_id=job_id,
             type=type,
+            description=description,
+            is_custom=is_custom,
         )
         new_category.insert(session=session)
         return new_category
@@ -3576,6 +3586,50 @@ class JobFileCategories(CommonColumns):
         categories = session.query(cls).filter(cls.job_id == job_id, cls.type == type).all()
         return [c.category for c in categories]
+    @classmethod
+    @with_default_session
+    def full_categories_for_job(cls, job_id: int, session: Session = None):
+        return session.query(cls).filter_by(job_id=job_id).all()
+class CategoryDataElements(CommonColumns):
+    __tablename__ = "category_data_elements"
+    __table_args__ = (
+        ForeignKeyConstraint(
+            ["category_id"],
+            ["job_file_categories.id"],
+            ondelete="CASCADE",
+        ),
+        Index(
+            "idx_elements_category_id",
+            "category_id",
+            "name",
+            unique=True,
+        ),
+    )
+    category_id = Column(Integer, nullable=False)
+    name = Column(String, nullable=False)
+    is_custom = Column(Boolean, nullable=False, default=False, server_default="false")
+    element_type = Column(String, nullable=False)
+    cardinality = Column(String, nullable=True)
+    @classmethod
+    @with_default_session
+    def elements_for_category(cls, category_id: int, session: Session = None):
+        return session.query(cls).filter_by(category_id=category_id).all()
+    @classmethod
+    @with_default_session
+    def elements_by_category_for_job(cls, job_id: int, session: Session = None):
+        """Fetch all CategoryDataElements for a job, along with the category name."""
+        return (
+            session.query(JobFileCategories.category, cls)
+            .join(cls, cls.category_id == JobFileCategories.id)
+            .filter(JobFileCategories.job_id == job_id)
+            .all()
+        )
 class FileValidationErrors(CommonColumns):
     __tablename__ = "file_validation_errors"

nci-cidc-api-modules 1.2.15__tar.gz → 1.2.19__tar.gz

nci-cidc-api-modules 1.2.15tar.gz → 1.2.19tar.gz