nci-cidc-api-modules 1.2.15__tar.gz → 1.2.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {nci_cidc_api_modules-1.2.15/nci_cidc_api_modules.egg-info → nci_cidc_api_modules-1.2.16}/PKG-INFO +2 -2
  2. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/models/models.py +26 -23
  3. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/shared/gcloud_client.py +6 -4
  4. nci_cidc_api_modules-1.2.16/cidc_api/shared/utils.py +8 -0
  5. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16/nci_cidc_api_modules.egg-info}/PKG-INFO +2 -2
  6. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/nci_cidc_api_modules.egg-info/SOURCES.txt +1 -0
  7. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/nci_cidc_api_modules.egg-info/requires.txt +1 -1
  8. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/requirements.modules.txt +1 -1
  9. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/LICENSE +0 -0
  10. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/MANIFEST.in +0 -0
  11. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/README.md +0 -0
  12. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/config/__init__.py +0 -0
  13. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/config/db.py +0 -0
  14. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/config/logging.py +0 -0
  15. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/config/secrets.py +0 -0
  16. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/config/settings.py +0 -0
  17. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/models/__init__.py +0 -0
  18. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/models/files/__init__.py +0 -0
  19. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/models/files/details.py +0 -0
  20. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/models/files/facets.py +0 -0
  21. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/models/migrations.py +0 -0
  22. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/models/schemas.py +0 -0
  23. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/shared/__init__.py +0 -0
  24. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/shared/auth.py +0 -0
  25. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/shared/email_layout.html +0 -0
  26. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/shared/emails.py +0 -0
  27. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/shared/file_handling.py +0 -0
  28. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/shared/jose.py +0 -0
  29. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/cidc_api/shared/rest_utils.py +0 -0
  30. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/nci_cidc_api_modules.egg-info/dependency_links.txt +0 -0
  31. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/nci_cidc_api_modules.egg-info/not-zip-safe +0 -0
  32. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/nci_cidc_api_modules.egg-info/top_level.txt +0 -0
  33. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/pyproject.toml +0 -0
  34. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/setup.cfg +0 -0
  35. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/setup.py +0 -0
  36. {nci_cidc_api_modules-1.2.15 → nci_cidc_api_modules-1.2.16}/tests/test_api.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.2.15
3
+ Version: 1.2.16
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -12,7 +12,7 @@ Requires-Dist: cloud-sql-python-connector[pg8000]>=1.18.5
12
12
  Requires-Dist: flask>=3.1.2
13
13
  Requires-Dist: flask-migrate>=4.1.0
14
14
  Requires-Dist: flask-sqlalchemy>=3.1.1
15
- Requires-Dist: google-auth>=2.42.0
15
+ Requires-Dist: google-auth==2.41.1
16
16
  Requires-Dist: google-api-python-client>=2.185.0
17
17
  Requires-Dist: google-cloud-bigquery>=3.38.0
18
18
  Requires-Dist: google-cloud-pubsub>=2.32.0
@@ -26,6 +26,7 @@ __all__ = [
26
26
  "FileValidationErrors",
27
27
  "IngestionJobs",
28
28
  "JobFileCategories",
29
+ "CategoryDataElements",
29
30
  "ValidationConfigs",
30
31
  "TRIAL_APPENDIX_A",
31
32
  "TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER",
@@ -131,7 +132,6 @@ from ..config.settings import (
131
132
  MAX_PAGINATION_PAGE_SIZE,
132
133
  TESTING,
133
134
  INACTIVE_USER_DAYS,
134
- GOOGLE_CLINICAL_DATA_BUCKET,
135
135
  )
136
136
  from ..shared import emails
137
137
  from ..shared.gcloud_client import (
@@ -145,7 +145,6 @@ from ..shared.gcloud_client import (
145
145
  revoke_intake_access,
146
146
  revoke_lister_access,
147
147
  revoke_bigquery_access,
148
- gcs_xlsx_or_csv_file_to_pandas_dataframe,
149
148
  )
150
149
 
151
150
  os.environ["TZ"] = "UTC"
@@ -3450,30 +3449,9 @@ class IngestionJobs(CommonColumns):
3450
3449
 
3451
3450
  @with_default_session
3452
3451
  def transition_status(self, status: str, session: Session):
3453
- # create required categories after opening job for submission
3454
- if self.status == "DRAFT" and status == "INITIAL SUBMISSION":
3455
- for category in self.derive_required_categories_from_appendix_a():
3456
- JobFileCategories.create(category=category, job_id=self.id, type="required")
3457
3452
  self.status = status
3458
3453
  self.update(session=session)
3459
3454
 
3460
- def derive_required_categories_from_appendix_a(self) -> List:
3461
- appendix_a = PreprocessedFiles.get_files_by_category_and_status(TRIAL_APPENDIX_A, "current", job_id=self.id)[0]
3462
- df = gcs_xlsx_or_csv_file_to_pandas_dataframe(GOOGLE_CLINICAL_DATA_BUCKET, appendix_a.object_url)
3463
- categories = []
3464
- headers_ended = False
3465
- for _index, row in df.iterrows():
3466
- cell = str(row.iloc[0])
3467
- if headers_ended:
3468
- if cell != "nan" and cell not in categories:
3469
- categories.append(cell)
3470
- elif cell.lower() == TRIAL_APPENDIX_A_CELL_THAT_ENDS_THE_HEADER.lower():
3471
- headers_ended = True
3472
- if "data_dictionary" not in categories:
3473
- # Ensure Data_Dictionary is always a required file category
3474
- categories.append("data_dictionary")
3475
- return categories
3476
-
3477
3455
  @classmethod
3478
3456
  @with_default_session
3479
3457
  def atomic_set_job_as_pending(cls, job_id: int, session: Session) -> Boolean:
@@ -3553,6 +3531,7 @@ class JobFileCategories(CommonColumns):
3553
3531
  category = Column(String)
3554
3532
  job_id = Column(Integer)
3555
3533
  type = Column(Enum("required", "optional", name="type"))
3534
+ is_custom = Column(Boolean, default=False, server_default="false")
3556
3535
 
3557
3536
  @staticmethod
3558
3537
  @with_default_session
@@ -3560,12 +3539,14 @@ class JobFileCategories(CommonColumns):
3560
3539
  category: str,
3561
3540
  job_id: int,
3562
3541
  type: str,
3542
+ is_custom: bool = False,
3563
3543
  session: Session = None,
3564
3544
  ):
3565
3545
  new_category = JobFileCategories(
3566
3546
  category=category,
3567
3547
  job_id=job_id,
3568
3548
  type=type,
3549
+ is_custom=is_custom,
3569
3550
  )
3570
3551
  new_category.insert(session=session)
3571
3552
  return new_category
@@ -3577,6 +3558,28 @@ class JobFileCategories(CommonColumns):
3577
3558
  return [c.category for c in categories]
3578
3559
 
3579
3560
 
3561
+ class CategoryDataElements(CommonColumns):
3562
+ __tablename__ = "category_data_elements"
3563
+ __table_args__ = (
3564
+ ForeignKeyConstraint(
3565
+ ["category_id"],
3566
+ ["job_file_categories.id"],
3567
+ ondelete="CASCADE",
3568
+ ),
3569
+ Index(
3570
+ "idx_elements_category_id" "category_id",
3571
+ "name",
3572
+ unique=True,
3573
+ ),
3574
+ )
3575
+
3576
+ category_id = Column(Integer, nullable=False)
3577
+ name = Column(String, nullable=False)
3578
+ is_custom = Column(Boolean, nullable=False, default=False, server_default="false")
3579
+ element_type = Column(String, nullable=False)
3580
+ cardinality = Column(String, nullable=True)
3581
+
3582
+
3580
3583
  class FileValidationErrors(CommonColumns):
3581
3584
  __tablename__ = "file_validation_errors"
3582
3585
  __table_args__ = (
@@ -1,6 +1,6 @@
1
1
  """Utilities for interacting with the Google Cloud Platform APIs."""
2
2
 
3
- # pylint: disable=logging-fstring-interpolation
3
+ # pylint: disable=logging-fstring-interpolation,too-many-lines
4
4
 
5
5
  import base64
6
6
  import datetime
@@ -37,8 +37,8 @@ from sqlalchemy.orm.session import Session
37
37
  from werkzeug.datastructures import FileStorage
38
38
  from werkzeug.utils import secure_filename
39
39
 
40
- from cidc_api.config.secrets import get_secrets_manager
41
40
  from ..config.logging import get_logger
41
+ from ..config.secrets import get_secrets_manager
42
42
  from ..config.settings import (
43
43
  DEV_USE_GCS,
44
44
  GOOGLE_INTAKE_ROLE,
@@ -62,6 +62,8 @@ from ..config.settings import (
62
62
  DEV_CFUNCTIONS_SERVER,
63
63
  INACTIVE_USER_DAYS,
64
64
  )
65
+ from ..shared.utils import strip_whitespaces
66
+
65
67
 
66
68
  os.environ["TZ"] = "UTC"
67
69
  logger = get_logger(__name__)
@@ -427,9 +429,9 @@ def gcs_xlsx_or_csv_file_to_pandas_dataframe(bucket_name: str, blob_name: str):
427
429
 
428
430
  # TODO: specify sheet in xlsx file and/or accept tsv and xls files
429
431
  if blob_name[-3:] == "csv":
430
- return pd.read_csv(temp_file)
432
+ return strip_whitespaces(pd.read_csv(temp_file))
431
433
  elif blob_name[-4:] == "xlsx":
432
- return pd.read_excel(temp_file)
434
+ return strip_whitespaces(pd.read_excel(temp_file))
433
435
  else:
434
436
  raise Exception("Can only read csv or xlsx files")
435
437
 
@@ -0,0 +1,8 @@
1
+ def strip_whitespaces(df):
2
+ def stripper(x):
3
+ if x and isinstance(x, str):
4
+ return x.strip()
5
+ else:
6
+ return x
7
+
8
+ return df.map(stripper)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.2.15
3
+ Version: 1.2.16
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -12,7 +12,7 @@ Requires-Dist: cloud-sql-python-connector[pg8000]>=1.18.5
12
12
  Requires-Dist: flask>=3.1.2
13
13
  Requires-Dist: flask-migrate>=4.1.0
14
14
  Requires-Dist: flask-sqlalchemy>=3.1.1
15
- Requires-Dist: google-auth>=2.42.0
15
+ Requires-Dist: google-auth==2.41.1
16
16
  Requires-Dist: google-api-python-client>=2.185.0
17
17
  Requires-Dist: google-cloud-bigquery>=3.38.0
18
18
  Requires-Dist: google-cloud-pubsub>=2.32.0
@@ -24,6 +24,7 @@ cidc_api/shared/file_handling.py
24
24
  cidc_api/shared/gcloud_client.py
25
25
  cidc_api/shared/jose.py
26
26
  cidc_api/shared/rest_utils.py
27
+ cidc_api/shared/utils.py
27
28
  nci_cidc_api_modules.egg-info/PKG-INFO
28
29
  nci_cidc_api_modules.egg-info/SOURCES.txt
29
30
  nci_cidc_api_modules.egg-info/dependency_links.txt
@@ -3,7 +3,7 @@ cloud-sql-python-connector[pg8000]>=1.18.5
3
3
  flask>=3.1.2
4
4
  flask-migrate>=4.1.0
5
5
  flask-sqlalchemy>=3.1.1
6
- google-auth>=2.42.0
6
+ google-auth==2.41.1
7
7
  google-api-python-client>=2.185.0
8
8
  google-cloud-bigquery>=3.38.0
9
9
  google-cloud-pubsub>=2.32.0
@@ -3,7 +3,7 @@ cloud-sql-python-connector[pg8000]>=1.18.5
3
3
  flask>=3.1.2
4
4
  flask-migrate>=4.1.0
5
5
  flask-sqlalchemy>=3.1.1
6
- google-auth>=2.42.0
6
+ google-auth==2.41.1 # There is a bug in 2.42.X that causes local to fail when connecting to dev
7
7
  google-api-python-client>=2.185.0
8
8
  google-cloud-bigquery>=3.38.0
9
9
  google-cloud-pubsub>=2.32.0