nci-cidc-api-modules 1.2.16__tar.gz → 1.2.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {nci_cidc_api_modules-1.2.16/nci_cidc_api_modules.egg-info → nci_cidc_api_modules-1.2.18}/PKG-INFO +1 -1
  2. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/config/db.py +1 -3
  3. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/models/models.py +53 -14
  4. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/shared/file_handling.py +37 -2
  5. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18/nci_cidc_api_modules.egg-info}/PKG-INFO +1 -1
  6. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/LICENSE +0 -0
  7. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/MANIFEST.in +0 -0
  8. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/README.md +0 -0
  9. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/config/__init__.py +0 -0
  10. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/config/logging.py +0 -0
  11. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/config/secrets.py +0 -0
  12. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/config/settings.py +0 -0
  13. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/models/__init__.py +0 -0
  14. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/models/files/__init__.py +0 -0
  15. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/models/files/details.py +0 -0
  16. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/models/files/facets.py +0 -0
  17. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/models/migrations.py +0 -0
  18. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/models/schemas.py +0 -0
  19. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/shared/__init__.py +0 -0
  20. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/shared/auth.py +0 -0
  21. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/shared/email_layout.html +0 -0
  22. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/shared/emails.py +0 -0
  23. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/shared/gcloud_client.py +0 -0
  24. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/shared/jose.py +0 -0
  25. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/shared/rest_utils.py +0 -0
  26. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/cidc_api/shared/utils.py +0 -0
  27. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/nci_cidc_api_modules.egg-info/SOURCES.txt +0 -0
  28. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/nci_cidc_api_modules.egg-info/dependency_links.txt +0 -0
  29. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/nci_cidc_api_modules.egg-info/not-zip-safe +0 -0
  30. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/nci_cidc_api_modules.egg-info/requires.txt +0 -0
  31. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/nci_cidc_api_modules.egg-info/top_level.txt +0 -0
  32. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/pyproject.toml +0 -0
  33. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/requirements.modules.txt +0 -0
  34. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/setup.cfg +0 -0
  35. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/setup.py +0 -0
  36. {nci_cidc_api_modules-1.2.16 → nci_cidc_api_modules-1.2.18}/tests/test_api.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.2.16
3
+ Version: 1.2.18
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -10,8 +10,7 @@ from google.cloud.sql.connector import Connector, IPTypes
10
10
  from .secrets import get_secrets_manager
11
11
 
12
12
  db = SQLAlchemy()
13
- BaseModel = declarative_base()
14
- db.Model = BaseModel
13
+ BaseModel = db.Model
15
14
 
16
15
  connector = Connector()
17
16
 
@@ -31,7 +30,6 @@ def getconn():
31
30
  def init_db(app: Flask):
32
31
  """Connect `app` to the database and run migrations"""
33
32
  db.init_app(app)
34
- db.Model = BaseModel
35
33
  Migrate(app, db, app.config["MIGRATIONS_PATH"])
36
34
  with app.app_context():
37
35
  upgrade(app.config["MIGRATIONS_PATH"])
@@ -96,7 +96,7 @@ from sqlalchemy import (
96
96
  String,
97
97
  Table,
98
98
  )
99
- from sqlalchemy.dialects.postgresql import JSONB, UUID
99
+ from sqlalchemy.dialects.postgresql import JSONB, UUID, CITEXT
100
100
  from sqlalchemy.engine import ResultProxy
101
101
  from sqlalchemy.exc import IntegrityError
102
102
  from sqlalchemy.ext.hybrid import hybrid_property
@@ -381,7 +381,7 @@ class Users(CommonColumns):
381
381
  last_n = Column(String)
382
382
  organization = Column(Enum(*ORGS, name="orgs"))
383
383
  approval_date = Column(DateTime)
384
- role = Column(Enum(*ROLES, name="role"))
384
+ role = Column(Enum(*ROLES, name="roles"))
385
385
  disabled = Column(Boolean, default=False, server_default="false")
386
386
 
387
387
  @validates("approval_date")
@@ -606,6 +606,22 @@ class Permissions(CommonColumns):
606
606
  unique=True,
607
607
  postgresql_where=file_group_id.isnot(None),
608
608
  ),
609
+ Index(
610
+ "unique_trial_id_upload_type_is_null_perms",
611
+ "granted_to_user",
612
+ "trial_id",
613
+ literal_column("(upload_type IS NULL)"),
614
+ unique=True,
615
+ postgresql_where="(upload_type IS NULL)",
616
+ ),
617
+ Index(
618
+ "unique_upload_type_trial_id_is_null_perms",
619
+ "granted_to_user",
620
+ literal_column("(trial_id IS NULL)"),
621
+ "upload_type",
622
+ unique=True,
623
+ postgresql_where="(trial_id IS NULL)",
624
+ ),
609
625
  )
610
626
 
611
627
  # Shorthand to make code related to trial- and upload-type-level permissions
@@ -2293,7 +2309,7 @@ class DownloadableFiles(CommonColumns):
2293
2309
  additional_metadata = Column(JSONB, nullable=False)
2294
2310
  # TODO rename upload_type, because we store manifests in there too.
2295
2311
  # NOTE: this column actually has type CITEXT.
2296
- upload_type = Column(String, nullable=False)
2312
+ upload_type = Column(CITEXT, nullable=False)
2297
2313
  md5_hash = Column(String, nullable=True)
2298
2314
  crc32c_hash = Column(String, nullable=True)
2299
2315
  trial_id = Column(String, nullable=False)
@@ -2313,7 +2329,7 @@ class DownloadableFiles(CommonColumns):
2313
2329
  # used instead of data_format.
2314
2330
  # The columns are left as optional for short term backwards compatibility.
2315
2331
  file_name = Column(String, nullable=True)
2316
- data_format = Column(String, nullable=True)
2332
+ data_format = Column(CITEXT, nullable=True)
2317
2333
 
2318
2334
  file_groups = relationship(
2319
2335
  "FileGroups",
@@ -3261,11 +3277,11 @@ class PreprocessedFiles(CommonColumns):
3261
3277
  ),
3262
3278
  )
3263
3279
 
3264
- file_name = Column(String)
3265
- object_url = Column(String)
3280
+ file_name = Column(String, nullable=False)
3281
+ object_url = Column(String, nullable=False)
3266
3282
  job_id = Column(Integer)
3267
- file_category = Column(String)
3268
- uploader_email = Column(String)
3283
+ file_category = Column(String, nullable=False)
3284
+ uploader_email = Column(String, nullable=False)
3269
3285
  status = Column(String)
3270
3286
  version = Column(Integer)
3271
3287
  released_version = Column(String)
@@ -3433,7 +3449,7 @@ class IngestionJobs(CommonColumns):
3433
3449
  ),
3434
3450
  )
3435
3451
 
3436
- status = Column("status", Enum(*INGESTION_JOB_STATUSES, name="status"), nullable=False)
3452
+ status = Column("status", Enum(*INGESTION_JOB_STATUSES, name="ingestion_job_status"), nullable=False)
3437
3453
  trial_id = Column(String, nullable=False)
3438
3454
  version = Column(Integer, nullable=False)
3439
3455
  pending = Column(Boolean, nullable=False, default=False)
@@ -3522,16 +3538,17 @@ class JobFileCategories(CommonColumns):
3522
3538
  ["ingestion_jobs.id"],
3523
3539
  ),
3524
3540
  Index(
3525
- "idx_categories_job_id" "job_id",
3541
+ "idx_categories_job_id",
3542
+ "job_id",
3526
3543
  "category",
3527
3544
  unique=True,
3528
3545
  ),
3529
3546
  )
3530
3547
 
3531
3548
  category = Column(String)
3532
- job_id = Column(Integer)
3533
- type = Column(Enum("required", "optional", name="type"))
3534
- is_custom = Column(Boolean, default=False, server_default="false")
3549
+ job_id = Column(Integer, nullable=False)
3550
+ type = Column(Enum("required", "optional", name="type"), nullable=False)
3551
+ is_custom = Column(Boolean, nullable=False, default=False, server_default="false")
3535
3552
 
3536
3553
  @staticmethod
3537
3554
  @with_default_session
@@ -3557,6 +3574,11 @@ class JobFileCategories(CommonColumns):
3557
3574
  categories = session.query(cls).filter(cls.job_id == job_id, cls.type == type).all()
3558
3575
  return [c.category for c in categories]
3559
3576
 
3577
+ @classmethod
3578
+ @with_default_session
3579
+ def full_categories_for_job(cls, job_id: int, session: Session = None):
3580
+ return session.query(cls).filter_by(job_id=job_id).all()
3581
+
3560
3582
 
3561
3583
  class CategoryDataElements(CommonColumns):
3562
3584
  __tablename__ = "category_data_elements"
@@ -3567,7 +3589,8 @@ class CategoryDataElements(CommonColumns):
3567
3589
  ondelete="CASCADE",
3568
3590
  ),
3569
3591
  Index(
3570
- "idx_elements_category_id" "category_id",
3592
+ "idx_elements_category_id",
3593
+ "category_id",
3571
3594
  "name",
3572
3595
  unique=True,
3573
3596
  ),
@@ -3579,6 +3602,22 @@ class CategoryDataElements(CommonColumns):
3579
3602
  element_type = Column(String, nullable=False)
3580
3603
  cardinality = Column(String, nullable=True)
3581
3604
 
3605
+ @classmethod
3606
+ @with_default_session
3607
+ def elements_for_category(cls, category_id: int, session: Session = None):
3608
+ return session.query(cls).filter_by(category_id=category_id).all()
3609
+
3610
+ @classmethod
3611
+ @with_default_session
3612
+ def elements_by_category_for_job(cls, job_id: int, session: Session = None):
3613
+ """Fetch all CategoryDataElements for a job, along with the category name."""
3614
+ return (
3615
+ session.query(JobFileCategories.category, cls)
3616
+ .join(cls, cls.category_id == JobFileCategories.id)
3617
+ .filter(JobFileCategories.job_id == job_id)
3618
+ .all()
3619
+ )
3620
+
3582
3621
 
3583
3622
  class FileValidationErrors(CommonColumns):
3584
3623
  __tablename__ = "file_validation_errors"
@@ -1,14 +1,15 @@
1
1
  from pathlib import Path
2
2
 
3
+ from pandas import Series, DataFrame
4
+ from sqlalchemy.orm.session import Session
3
5
  from werkzeug.datastructures import FileStorage
4
6
  from werkzeug.exceptions import BadRequest, InternalServerError
5
7
 
6
8
  from ..config.logging import get_logger
7
9
  from ..config.settings import GOOGLE_CLINICAL_DATA_BUCKET
8
- from ..models import PreprocessedFiles
10
+ from ..models import PreprocessedFiles, TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER
9
11
  from ..shared.auth import get_current_user
10
12
  from ..shared.gcloud_client import upload_file_to_gcs, move_gcs_file
11
- from sqlalchemy.orm.session import Session
12
13
 
13
14
  logger = get_logger(__name__)
14
15
 
@@ -104,3 +105,37 @@ def strip_filename_and_pending_folder(path_str):
104
105
  if path.parent.name != "pending":
105
106
  raise ValueError("Expected 'pending' folder above file")
106
107
  return str(path.parent.parent)
108
+
109
+
110
+ def get_row_at_condition(df: DataFrame, condition):
111
+ condition_met_index = df[condition].index[0]
112
+ row_at_condition_series = df.iloc[condition_met_index]
113
+
114
+ return row_at_condition_series
115
+
116
+
117
+ def get_column(header_row_series: Series, header_name: str, use_raw_header_val: bool = False):
118
+ for idx, raw_header in enumerate(header_row_series):
119
+ if str(raw_header).lower() == header_name.lower():
120
+ return raw_header if use_raw_header_val else header_row_series.index[idx]
121
+ return None
122
+
123
+
124
+ def get_column_from_appendix_a(appendix_a_df: DataFrame, header_name: str):
125
+ category_column = appendix_a_df.columns[0]
126
+ aa_header_condition = appendix_a_df[category_column] == TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER
127
+ header_row_series = get_row_at_condition(appendix_a_df, aa_header_condition)
128
+ return get_column(header_row_series, header_name)
129
+
130
+
131
+ def get_column_from_first_row(df: DataFrame, header_name: str):
132
+ use_raw_header_val = False
133
+ if df.columns.inferred_type == "integer":
134
+ # If columns are integers (i.e. file was read without headers), treat the first row as header values.
135
+ header_row_series = df.iloc[0]
136
+ else:
137
+ # Otherwise columns already are headers
138
+ header_row_series = Series(df.columns)
139
+ use_raw_header_val = True
140
+
141
+ return get_column(header_row_series, header_name, use_raw_header_val=use_raw_header_val)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.2.16
3
+ Version: 1.2.18
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license