nci-cidc-api-modules 1.1.39__tar.gz → 1.1.41__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {nci_cidc_api_modules-1.1.39/nci_cidc_api_modules.egg-info → nci_cidc_api_modules-1.1.41}/PKG-INFO +2 -2
  2. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/config/db.py +34 -5
  3. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/models/models.py +36 -1
  4. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/gcloud_client.py +8 -3
  5. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41/nci_cidc_api_modules.egg-info}/PKG-INFO +2 -2
  6. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/nci_cidc_api_modules.egg-info/requires.txt +1 -1
  7. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/requirements.modules.txt +1 -1
  8. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/LICENSE +0 -0
  9. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/MANIFEST.in +0 -0
  10. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/README.md +0 -0
  11. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/config/__init__.py +0 -0
  12. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/config/logging.py +0 -0
  13. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/config/secrets.py +0 -0
  14. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/config/settings.py +0 -0
  15. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/models/__init__.py +0 -0
  16. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/models/files/__init__.py +0 -0
  17. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/models/files/details.py +0 -0
  18. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/models/files/facets.py +0 -0
  19. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/models/migrations.py +0 -0
  20. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/models/schemas.py +0 -0
  21. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/__init__.py +0 -0
  22. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/auth.py +0 -0
  23. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/emails.py +0 -0
  24. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/file_handling.py +0 -0
  25. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/jose.py +0 -0
  26. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/rest_utils.py +0 -0
  27. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/nci_cidc_api_modules.egg-info/SOURCES.txt +0 -0
  28. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/nci_cidc_api_modules.egg-info/dependency_links.txt +0 -0
  29. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/nci_cidc_api_modules.egg-info/not-zip-safe +0 -0
  30. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/nci_cidc_api_modules.egg-info/top_level.txt +0 -0
  31. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/pyproject.toml +0 -0
  32. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/setup.cfg +0 -0
  33. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/setup.py +0 -0
  34. {nci_cidc_api_modules-1.1.39 → nci_cidc_api_modules-1.1.41}/tests/test_api.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.1.39
3
+ Version: 1.1.41
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -29,7 +29,7 @@ Requires-Dist: requests==2.32.4
29
29
  Requires-Dist: jinja2==3.1.6
30
30
  Requires-Dist: certifi==2024.7.4
31
31
  Requires-Dist: cloud-sql-python-connector[pg8000]==1.18.3
32
- Requires-Dist: nci-cidc-schemas==0.27.27
32
+ Requires-Dist: nci-cidc-schemas==0.27.28
33
33
  Dynamic: description
34
34
  Dynamic: description-content-type
35
35
  Dynamic: home-page
@@ -45,7 +45,34 @@ def get_sqlalchemy_database_uri(testing: bool = False) -> str:
45
45
  # Connect to the test database
46
46
  db_uri = environ.get("TEST_POSTGRES_URI", "fake-conn-string")
47
47
  elif not db_uri:
48
- db_uri = f"postgresql+pg8000://{environ.get('CLOUD_SQL_DB_USER')}:xxx@/{environ.get('CLOUD_SQL_DB_NAME')}"
48
+ # TODO: figure out why IAM authentication causes an issue with the huge filter query
49
+ # db_uri = f"postgresql+pg8000://{environ.get('CLOUD_SQL_DB_USER')}:xxx@/{environ.get('CLOUD_SQL_DB_NAME')}"
50
+
51
+ secrets = get_secrets_manager(testing)
52
+
53
+ # If POSTGRES_URI env variable is not set,
54
+ # we're connecting to a Cloud SQL instance.
55
+
56
+ config: dict = {
57
+ "drivername": "postgresql",
58
+ "username": environ.get("CLOUD_SQL_DB_USER"),
59
+ "password": secrets.get(environ.get("CLOUD_SQL_DB_PASS_ID")),
60
+ "database": environ.get("CLOUD_SQL_DB_NAME"),
61
+ }
62
+
63
+ if environ.get("CLOUD_SQL_INSTANCE_NAME"):
64
+ socket_dir = environ.get("CLOUD_SQL_SOCKET_DIR", "/cloudsql/")
65
+
66
+ # If CLOUD_SQL_INSTANCE_NAME is defined, we're connecting
67
+ # via a unix socket from inside App Engine.
68
+ config["query"] = {"host": f'{socket_dir}{environ.get("CLOUD_SQL_INSTANCE_NAME")}'}
69
+ else:
70
+ raise RuntimeError(
71
+ "Either POSTGRES_URI or CLOUD_SQL_INSTANCE_NAME must be defined to connect " + "to a database."
72
+ )
73
+
74
+ db_uri = str(URL.create(**config).render_as_string(hide_password=False))
75
+
49
76
  assert db_uri
50
77
 
51
78
  return db_uri
@@ -53,7 +80,9 @@ def get_sqlalchemy_database_uri(testing: bool = False) -> str:
53
80
 
54
81
  # Use SQLALCHEMY_ENGINE_OPTIONS to connect to the cloud but use uri for local db
55
82
  def cloud_connector(testing: bool = False):
56
- if not testing and not environ.get("POSTGRES_URI"):
57
- return {"creator": getconn}
58
- else:
59
- return {}
83
+ return {}
84
+ # TODO: figure out IAM authentication
85
+ # if not testing and not environ.get("POSTGRES_URI"):
86
+ # return {"creator": getconn}
87
+ # else:
88
+ # return {}
@@ -23,6 +23,7 @@ __all__ = [
23
23
  "ValidationMultiError",
24
24
  "with_default_session",
25
25
  "PreprocessedFiles",
26
+ "FileValidationErrors",
26
27
  "IngestionJobs",
27
28
  "JobFileCategories",
28
29
  "TRIAL_APPENDIX_A",
@@ -3261,6 +3262,7 @@ class PreprocessedFiles(CommonColumns):
3261
3262
  status = Column(String)
3262
3263
  version = Column(Integer)
3263
3264
  released_version = Column(String)
3265
+ error_status = Column(String)
3264
3266
 
3265
3267
  @staticmethod
3266
3268
  @with_default_session
@@ -3457,19 +3459,24 @@ class IngestionJobs(CommonColumns):
3457
3459
  categories.append(cell)
3458
3460
  elif cell == "PATIENT-LEVEL DATA":
3459
3461
  headers_ended = True
3462
+ if "Data_Dictionary" not in categories:
3463
+ # Ensure Data_Dictionary is always a required file category
3464
+ categories.append("Data_Dictionary")
3460
3465
  return categories
3461
3466
 
3462
3467
  @classmethod
3463
3468
  @with_default_session
3464
3469
  def atomic_set_job_as_pending(cls, job_id: int, session: Session) -> Boolean:
3465
3470
  # Preventing rare race condition where multiple people try and submit a job for validation
3466
- return bool(
3471
+ result = bool(
3467
3472
  session.execute(
3468
3473
  update(IngestionJobs)
3469
3474
  .where(and_(IngestionJobs.id == job_id, IngestionJobs.pending == False))
3470
3475
  .values(pending=True)
3471
3476
  ).rowcount
3472
3477
  )
3478
+ session.commit()
3479
+ return result
3473
3480
 
3474
3481
  @classmethod
3475
3482
  @with_default_session
@@ -3536,3 +3543,31 @@ class JobFileCategories(CommonColumns):
3536
3543
  def categories_for_job(cls, job_id: int, type: str, session: Session = None):
3537
3544
  categories = session.query(cls).filter(cls.job_id == job_id, cls.type == type).all()
3538
3545
  return [c.category for c in categories]
3546
+
3547
+
3548
+ class FileValidationErrors(CommonColumns):
3549
+ __tablename__ = "file_validation_errors"
3550
+ __table_args__ = (
3551
+ ForeignKeyConstraint(
3552
+ ["job_id"],
3553
+ ["ingestion_jobs.id"],
3554
+ ondelete="CASCADE",
3555
+ ),
3556
+ ForeignKeyConstraint(
3557
+ ["file_id"],
3558
+ ["preprocessed_files.id"],
3559
+ ondelete="CASCADE",
3560
+ ),
3561
+ )
3562
+
3563
+ error_message = Column(String, nullable=False)
3564
+ location = Column(String, nullable=True)
3565
+ job_id = Column(Integer, nullable=False)
3566
+ file_id = Column(Integer, nullable=False)
3567
+
3568
+ @staticmethod
3569
+ @with_default_session
3570
+ def create(file_id: int, job_id: int, error_message: str, location: str = None, session: Session = None):
3571
+ new_error = FileValidationErrors(file_id=file_id, job_id=job_id, error_message=error_message, location=location)
3572
+ new_error.insert(session=session)
3573
+ return new_error
@@ -421,9 +421,8 @@ def upload_xlsx_to_intake_bucket(user_email: str, trial_id: str, upload_type: st
421
421
 
422
422
 
423
423
  def gcs_xlsx_or_csv_file_to_pandas_dataframe(bucket_name: str, blob_name: str):
424
- """Reads an XLSX file from Google Cloud Storage into a Pandas DataFrame."""
425
- sheet_data = storage.Client().bucket(bucket_name).blob(blob_name).download_as_bytes()
426
- temp_file = io.BytesIO(sheet_data)
424
+ """Reads an XLSX or CSV file from Google Cloud Storage into a Pandas DataFrame."""
425
+ temp_file = get_file_bytes_from_gcs(bucket_name, blob_name)
427
426
 
428
427
  # TODO: specify sheet in xlsx file and/or accept tsv and xls files
429
428
  if blob_name[-3:] == "csv":
@@ -434,6 +433,12 @@ def gcs_xlsx_or_csv_file_to_pandas_dataframe(bucket_name: str, blob_name: str):
434
433
  raise Exception("Can only read csv or xlsx files")
435
434
 
436
435
 
436
+ def get_file_bytes_from_gcs(bucket_name: str, blob_name: str) -> io.BytesIO:
437
+ """Reads a file from Google Cloud Storage and returns it as BytesIO."""
438
+ sheet_data = storage.Client().bucket(bucket_name).blob(blob_name).download_as_bytes()
439
+ return io.BytesIO(sheet_data)
440
+
441
+
437
442
  def _execute_multiblob_acl_change(
438
443
  user_email_list: List[str],
439
444
  blob_list: List[storage.Blob],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.1.39
3
+ Version: 1.1.41
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -29,7 +29,7 @@ Requires-Dist: requests==2.32.4
29
29
  Requires-Dist: jinja2==3.1.6
30
30
  Requires-Dist: certifi==2024.7.4
31
31
  Requires-Dist: cloud-sql-python-connector[pg8000]==1.18.3
32
- Requires-Dist: nci-cidc-schemas==0.27.27
32
+ Requires-Dist: nci-cidc-schemas==0.27.28
33
33
  Dynamic: description
34
34
  Dynamic: description-content-type
35
35
  Dynamic: home-page
@@ -20,4 +20,4 @@ requests==2.32.4
20
20
  jinja2==3.1.6
21
21
  certifi==2024.7.4
22
22
  cloud-sql-python-connector[pg8000]==1.18.3
23
- nci-cidc-schemas==0.27.27
23
+ nci-cidc-schemas==0.27.28
@@ -20,4 +20,4 @@ requests==2.32.4
20
20
  jinja2==3.1.6
21
21
  certifi==2024.7.4
22
22
  cloud-sql-python-connector[pg8000]==1.18.3
23
- nci-cidc-schemas==0.27.27
23
+ nci-cidc-schemas==0.27.28