nci-cidc-api-modules 1.1.40__tar.gz → 1.1.41__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {nci_cidc_api_modules-1.1.40/nci_cidc_api_modules.egg-info → nci_cidc_api_modules-1.1.41}/PKG-INFO +1 -1
  2. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/models/models.py +33 -0
  3. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/gcloud_client.py +8 -3
  4. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41/nci_cidc_api_modules.egg-info}/PKG-INFO +1 -1
  5. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/LICENSE +0 -0
  6. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/MANIFEST.in +0 -0
  7. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/README.md +0 -0
  8. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/config/__init__.py +0 -0
  9. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/config/db.py +0 -0
  10. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/config/logging.py +0 -0
  11. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/config/secrets.py +0 -0
  12. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/config/settings.py +0 -0
  13. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/models/__init__.py +0 -0
  14. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/models/files/__init__.py +0 -0
  15. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/models/files/details.py +0 -0
  16. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/models/files/facets.py +0 -0
  17. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/models/migrations.py +0 -0
  18. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/models/schemas.py +0 -0
  19. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/__init__.py +0 -0
  20. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/auth.py +0 -0
  21. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/emails.py +0 -0
  22. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/file_handling.py +0 -0
  23. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/jose.py +0 -0
  24. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/cidc_api/shared/rest_utils.py +0 -0
  25. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/nci_cidc_api_modules.egg-info/SOURCES.txt +0 -0
  26. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/nci_cidc_api_modules.egg-info/dependency_links.txt +0 -0
  27. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/nci_cidc_api_modules.egg-info/not-zip-safe +0 -0
  28. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/nci_cidc_api_modules.egg-info/requires.txt +0 -0
  29. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/nci_cidc_api_modules.egg-info/top_level.txt +0 -0
  30. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/pyproject.toml +0 -0
  31. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/requirements.modules.txt +0 -0
  32. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/setup.cfg +0 -0
  33. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/setup.py +0 -0
  34. {nci_cidc_api_modules-1.1.40 → nci_cidc_api_modules-1.1.41}/tests/test_api.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.1.40
3
+ Version: 1.1.41
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -23,6 +23,7 @@ __all__ = [
23
23
  "ValidationMultiError",
24
24
  "with_default_session",
25
25
  "PreprocessedFiles",
26
+ "FileValidationErrors",
26
27
  "IngestionJobs",
27
28
  "JobFileCategories",
28
29
  "TRIAL_APPENDIX_A",
@@ -3261,6 +3262,7 @@ class PreprocessedFiles(CommonColumns):
3261
3262
  status = Column(String)
3262
3263
  version = Column(Integer)
3263
3264
  released_version = Column(String)
3265
+ error_status = Column(String)
3264
3266
 
3265
3267
  @staticmethod
3266
3268
  @with_default_session
@@ -3457,6 +3459,9 @@ class IngestionJobs(CommonColumns):
3457
3459
  categories.append(cell)
3458
3460
  elif cell == "PATIENT-LEVEL DATA":
3459
3461
  headers_ended = True
3462
+ if "Data_Dictionary" not in categories:
3463
+ # Ensure Data_Dictionary is always a required file category
3464
+ categories.append("Data_Dictionary")
3460
3465
  return categories
3461
3466
 
3462
3467
  @classmethod
@@ -3538,3 +3543,31 @@ class JobFileCategories(CommonColumns):
3538
3543
  def categories_for_job(cls, job_id: int, type: str, session: Session = None):
3539
3544
  categories = session.query(cls).filter(cls.job_id == job_id, cls.type == type).all()
3540
3545
  return [c.category for c in categories]
3546
+
3547
+
3548
+ class FileValidationErrors(CommonColumns):
3549
+ __tablename__ = "file_validation_errors"
3550
+ __table_args__ = (
3551
+ ForeignKeyConstraint(
3552
+ ["job_id"],
3553
+ ["ingestion_jobs.id"],
3554
+ ondelete="CASCADE",
3555
+ ),
3556
+ ForeignKeyConstraint(
3557
+ ["file_id"],
3558
+ ["preprocessed_files.id"],
3559
+ ondelete="CASCADE",
3560
+ ),
3561
+ )
3562
+
3563
+ error_message = Column(String, nullable=False)
3564
+ location = Column(String, nullable=True)
3565
+ job_id = Column(Integer, nullable=False)
3566
+ file_id = Column(Integer, nullable=False)
3567
+
3568
+ @staticmethod
3569
+ @with_default_session
3570
+ def create(file_id: int, job_id: int, error_message: str, location: str = None, session: Session = None):
3571
+ new_error = FileValidationErrors(file_id=file_id, job_id=job_id, error_message=error_message, location=location)
3572
+ new_error.insert(session=session)
3573
+ return new_error
@@ -421,9 +421,8 @@ def upload_xlsx_to_intake_bucket(user_email: str, trial_id: str, upload_type: st
421
421
 
422
422
 
423
423
  def gcs_xlsx_or_csv_file_to_pandas_dataframe(bucket_name: str, blob_name: str):
424
- """Reads an XLSX file from Google Cloud Storage into a Pandas DataFrame."""
425
- sheet_data = storage.Client().bucket(bucket_name).blob(blob_name).download_as_bytes()
426
- temp_file = io.BytesIO(sheet_data)
424
+ """Reads an XLSX or CSV file from Google Cloud Storage into a Pandas DataFrame."""
425
+ temp_file = get_file_bytes_from_gcs(bucket_name, blob_name)
427
426
 
428
427
  # TODO: specify sheet in xlsx file and/or accept tsv and xls files
429
428
  if blob_name[-3:] == "csv":
@@ -434,6 +433,12 @@ def gcs_xlsx_or_csv_file_to_pandas_dataframe(bucket_name: str, blob_name: str):
434
433
  raise Exception("Can only read csv or xlsx files")
435
434
 
436
435
 
436
+ def get_file_bytes_from_gcs(bucket_name: str, blob_name: str) -> io.BytesIO:
437
+ """Reads a file from Google Cloud Storage and returns it as BytesIO."""
438
+ sheet_data = storage.Client().bucket(bucket_name).blob(blob_name).download_as_bytes()
439
+ return io.BytesIO(sheet_data)
440
+
441
+
437
442
  def _execute_multiblob_acl_change(
438
443
  user_email_list: List[str],
439
444
  blob_list: List[storage.Blob],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.1.40
3
+ Version: 1.1.41
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license