nci-cidc-api-modules 1.1.34__tar.gz → 1.1.37__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {nci_cidc_api_modules-1.1.34/nci_cidc_api_modules.egg-info → nci_cidc_api_modules-1.1.37}/PKG-INFO +5 -5
  2. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/config/db.py +3 -4
  3. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/models/files/details.py +31 -0
  4. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/models/files/facets.py +57 -0
  5. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/models/models.py +300 -58
  6. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/models/schemas.py +1 -0
  7. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/shared/emails.py +1 -1
  8. nci_cidc_api_modules-1.1.37/cidc_api/shared/file_handling.py +56 -0
  9. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/shared/gcloud_client.py +18 -1
  10. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37/nci_cidc_api_modules.egg-info}/PKG-INFO +5 -5
  11. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/nci_cidc_api_modules.egg-info/SOURCES.txt +1 -0
  12. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/nci_cidc_api_modules.egg-info/requires.txt +4 -4
  13. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/requirements.modules.txt +4 -4
  14. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/tests/test_api.py +16 -8
  15. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/LICENSE +0 -0
  16. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/MANIFEST.in +0 -0
  17. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/README.md +0 -0
  18. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/config/__init__.py +0 -0
  19. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/config/logging.py +0 -0
  20. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/config/secrets.py +0 -0
  21. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/config/settings.py +0 -0
  22. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/models/__init__.py +0 -0
  23. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/models/files/__init__.py +0 -0
  24. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/models/migrations.py +0 -0
  25. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/shared/__init__.py +0 -0
  26. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/shared/auth.py +0 -0
  27. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/shared/jose.py +0 -0
  28. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/cidc_api/shared/rest_utils.py +0 -0
  29. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/nci_cidc_api_modules.egg-info/dependency_links.txt +0 -0
  30. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/nci_cidc_api_modules.egg-info/not-zip-safe +0 -0
  31. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/nci_cidc_api_modules.egg-info/top_level.txt +0 -0
  32. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/pyproject.toml +0 -0
  33. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/setup.cfg +0 -0
  34. {nci_cidc_api_modules-1.1.34 → nci_cidc_api_modules-1.1.37}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.1.34
3
+ Version: 1.1.37
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -10,10 +10,10 @@ License-File: LICENSE
10
10
  Requires-Dist: werkzeug==3.0.6
11
11
  Requires-Dist: flask==3.0.3
12
12
  Requires-Dist: flask-migrate==3.1.0
13
- Requires-Dist: flask-sqlalchemy==3.0.2
14
- Requires-Dist: sqlalchemy==1.4.54
13
+ Requires-Dist: flask-sqlalchemy==3.1.1
14
+ Requires-Dist: sqlalchemy==2.0.41
15
15
  Requires-Dist: marshmallow==3.19.0
16
- Requires-Dist: marshmallow-sqlalchemy==0.22.3
16
+ Requires-Dist: marshmallow-sqlalchemy==1.4.2
17
17
  Requires-Dist: google-cloud-storage==2.18.0
18
18
  Requires-Dist: google-cloud-secret-manager==2.20.1
19
19
  Requires-Dist: google-cloud-pubsub==2.22.0
@@ -28,7 +28,7 @@ Requires-Dist: python-dotenv==0.10.3
28
28
  Requires-Dist: requests==2.32.4
29
29
  Requires-Dist: jinja2==3.1.6
30
30
  Requires-Dist: certifi==2024.7.4
31
- Requires-Dist: nci-cidc-schemas==0.27.25
31
+ Requires-Dist: nci-cidc-schemas==0.27.27
32
32
  Dynamic: description
33
33
  Dynamic: description-content-type
34
34
  Dynamic: home-page
@@ -4,13 +4,12 @@ from flask import Flask
4
4
  from flask_sqlalchemy import SQLAlchemy
5
5
  from flask_migrate import Migrate, upgrade
6
6
  from sqlalchemy.engine.url import URL
7
- from sqlalchemy.ext.declarative import declarative_base
8
-
7
+ from sqlalchemy.orm import declarative_base
9
8
 
10
9
  from .secrets import get_secrets_manager
11
10
 
12
11
  db = SQLAlchemy()
13
- BaseModel = declarative_base(bind=db)
12
+ BaseModel = declarative_base()
14
13
  db.Model = BaseModel
15
14
 
16
15
 
@@ -54,7 +53,7 @@ def get_sqlalchemy_database_uri(testing: bool = False) -> str:
54
53
  "Either POSTGRES_URI or CLOUD_SQL_INSTANCE_NAME must be defined to connect " + "to a database."
55
54
  )
56
55
 
57
- db_uri = str(URL(**config))
56
+ db_uri = str(URL.create(**config).render_as_string(hide_password=False))
58
57
 
59
58
  assert db_uri
60
59
 
@@ -993,4 +993,35 @@ details_dict = {
993
993
  "",
994
994
  "",
995
995
  ),
996
+ # scrna
997
+ "/scrnaseq/samples_metadata.csv": FileDetails("source", "", ""),
998
+ "/scrnaseq/read_1.gz": FileDetails("source", "", ""),
999
+ "/scrnaseq/read_2.gz": FileDetails("source", "", ""),
1000
+ "/scrnaseq_analysis/samples_metadata.csv": FileDetails("source", "", ""),
1001
+ "/scrnaseq_analysis/config.yaml": FileDetails("source", "", ""),
1002
+ "/scrnaseq_analysis/R_package_versions.csv": FileDetails("source", "", ""),
1003
+ "/scrnaseq_analysis/integration.rds": FileDetails("source", "", ""),
1004
+ "/scrnaseq_analysis/integration_heatmap_plots.zip": FileDetails("source", "", ""),
1005
+ "/scrnaseq_analysis/integration_markers.zip": FileDetails("source", "", ""),
1006
+ "/scrnaseq_analysis/integration_split_percent_plots.zip": FileDetails("source", "", ""),
1007
+ "/scrnaseq_analysis/integration_split_umap_plots.zip": FileDetails("source", "", ""),
1008
+ "/scrnaseq_analysis/integration_umap_plots.zip": FileDetails("source", "", ""),
1009
+ "/scrnaseq_analysis/clustering.rds": FileDetails("source", "", ""),
1010
+ "/scrnaseq_analysis/report.html": FileDetails("source", "", ""),
1011
+ "/scrnaseq_analysis/star_sorted_by_cord.bam": FileDetails("source", "", ""),
1012
+ "/scrnaseq_analysis/star_sorted_by_cord.bam.bai": FileDetails("source", "", ""),
1013
+ "/scrnaseq_analysis/log_final.out": FileDetails("source", "", ""),
1014
+ "/scrnaseq_analysis/log.out": FileDetails("source", "", ""),
1015
+ "/scrnaseq_analysis/log_progress.out": FileDetails("source", "", ""),
1016
+ "/scrnaseq_analysis/sj_out.tab": FileDetails("source", "", ""),
1017
+ "/scrnaseq_analysis/barcodes.stats": FileDetails("source", "", ""),
1018
+ "/scrnaseq_analysis/gene_features.stats": FileDetails("source", "", ""),
1019
+ "/scrnaseq_analysis/gene_summary.csv": FileDetails("source", "", ""),
1020
+ "/scrnaseq_analysis/gene_umi_per_cell_sorted.txt": FileDetails("source", "", ""),
1021
+ "/scrnaseq_analysis/gene_filtered_features.tsv": FileDetails("source", "", ""),
1022
+ "/scrnaseq_analysis/gene_filtered_barcodes.tsv": FileDetails("source", "", ""),
1023
+ "/scrnaseq_analysis/gene_filtered_matrix.mtx": FileDetails("source", "", ""),
1024
+ "/scrnaseq_analysis/gene_raw_features.tsv": FileDetails("source", "", ""),
1025
+ "/scrnaseq_analysis/gene_raw_barcodes.tsv": FileDetails("source", "", ""),
1026
+ "/scrnaseq_analysis/gene_raw_matrix.mtx": FileDetails("source", "", ""),
996
1027
  }
@@ -346,6 +346,21 @@ assay_facets: Facets = {
346
346
  "H and E file from MIBI analysis",
347
347
  ),
348
348
  },
349
+ "scRNA": {
350
+ "Samples Metadata": FacetConfig(["/scrnaseq/samples_metadata.csv"], "Sample metadata for scRNA run"),
351
+ "Read 1 gz": FacetConfig(["/scrnaseq/read_1.gz"], "Gz file for read 1"),
352
+ "Read 2 gz": FacetConfig(["/scrnaseq/read_2.gz"], "Gz file for read 2"),
353
+ },
354
+ "Visium": {
355
+ "Samples Metadata": FacetConfig(["/visium/samples_metadata.csv"], "Sample metadata for visium run"),
356
+ "Read 1 fastq gz": FacetConfig(["/visium/R1_001.fastq.gz"], "Gz file for read 1"),
357
+ "Read 2 fastq gz": FacetConfig(["/visium/R2_001.fastq.gz"], "Gz file for read 2"),
358
+ "loupe alignment file": FacetConfig(["/visium/loupe_alignment_file.json"]),
359
+ "brightfield image": FacetConfig(["/visium/brightfield.tiff"]),
360
+ "dark image": FacetConfig(["/visium/dark_image.tiff"]),
361
+ "colorized image": FacetConfig(["/visium/colorized.tiff"]),
362
+ "cytassist image": FacetConfig(["/visium/cytassist.tiff"]),
363
+ },
349
364
  "mIHC": {
350
365
  "Samples Report": FacetConfig(["/mihc/sample_report.csv"], "Samples report for mIHC run"),
351
366
  "Multitiffs": FacetConfig(["/mihc/multitiffs.tar.gz"], "Multi Tiffs file from mIHC run"),
@@ -549,6 +564,48 @@ analysis_ready_facets = {
549
564
  "WES Analysis": FacetConfig(["/wes/analysis/report.tar.gz"]),
550
565
  "TCR": FacetConfig(["/tcr_analysis/report_trial.tar.gz"]),
551
566
  "mIF": FacetConfig(["/mif/roi_/cell_seg_data.txt"]),
567
+ "scRNA": FacetConfig(
568
+ [
569
+ "/scrnaseq_analysis/samples_metadata.csv",
570
+ "/scrnaseq_analysis/config.yaml",
571
+ "/scrnaseq_analysis/R_package_versions.csv",
572
+ "/scrnaseq_analysis/integration.rds",
573
+ "/scrnaseq_analysis/integration_heatmap_plots.zip",
574
+ "/scrnaseq_analysis/integration_markers.zip",
575
+ "/scrnaseq_analysis/integration_split_percent_plots.zip",
576
+ "/scrnaseq_analysis/integration_split_umap_plots.zip",
577
+ "/scrnaseq_analysis/integration_umap_plots.zip",
578
+ "/scrnaseq_analysis/clustering.rds",
579
+ "/scrnaseq_analysis/report.html",
580
+ "/scrnaseq_analysis/star_sorted_by_cord.bam",
581
+ "/scrnaseq_analysis/star_sorted_by_cord.bam.bai",
582
+ "/scrnaseq_analysis/log_final.out",
583
+ "/scrnaseq_analysis/log.out",
584
+ "/scrnaseq_analysis/log_progress.out",
585
+ "/scrnaseq_analysis/sj_out.tab",
586
+ "/scrnaseq_analysis/barcodes.stats",
587
+ "/scrnaseq_analysis/gene_features.stats",
588
+ "/scrnaseq_analysis/gene_summary.csv",
589
+ "/scrnaseq_analysis/gene_umi_per_cell_sorted.txt",
590
+ "/scrnaseq_analysis/gene_filtered_features.tsv",
591
+ "/scrnaseq_analysis/gene_filtered_barcodes.tsv",
592
+ "/scrnaseq_analysis/gene_filtered_matrix.mtx",
593
+ "/scrnaseq_analysis/gene_raw_features.tsv",
594
+ "/scrnaseq_analysis/gene_raw_barcodes.tsv",
595
+ "/scrnaseq_analysis/gene_raw_matrix.mtx",
596
+ ]
597
+ ),
598
+ "Visium": FacetConfig(
599
+ [
600
+ "/visium_analysis/samples_metadata.csv",
601
+ "/visium_analysis/config.yaml",
602
+ "/visium_analysis/R_package_versions.csv",
603
+ "/visium_analysis/merged.rds",
604
+ "/visium_analysis/spatial_variable_features.rds",
605
+ "/visium_analysis/report.html",
606
+ "/visium_analysis/visium_spaceranger_output.zip",
607
+ ]
608
+ ),
552
609
  }
553
610
 
554
611
  facets_dict: Dict[str, Facets] = {
@@ -23,6 +23,14 @@ __all__ = [
23
23
  "ValidationMultiError",
24
24
  "with_default_session",
25
25
  "PreprocessedFiles",
26
+ "IngestionJobs",
27
+ "JobFileCategories",
28
+ "TRIAL_APPENDIX_A",
29
+ "REQUEST_LETTER",
30
+ "ADMIN_FILE_CATEGORIES",
31
+ "FINAL_JOB_STATUS",
32
+ "INGESTION_JOB_STATUSES",
33
+ "INGESTION_JOB_COLORS",
26
34
  ]
27
35
 
28
36
  import hashlib
@@ -36,6 +44,7 @@ from functools import wraps
36
44
  from typing import (
37
45
  Any,
38
46
  BinaryIO,
47
+ ClassVar,
39
48
  Dict,
40
49
  Optional,
41
50
  List,
@@ -54,32 +63,33 @@ from google.cloud.storage import Blob
54
63
  from jsonschema.exceptions import ValidationError
55
64
  from sqlalchemy import (
56
65
  and_,
57
- Column,
66
+ asc,
67
+ case,
68
+ desc,
69
+ func,
70
+ literal,
71
+ literal_column,
72
+ not_,
73
+ or_,
74
+ select,
75
+ text,
76
+ true,
77
+ tuple_,
78
+ update,
79
+ BigInteger,
58
80
  Boolean,
81
+ CheckConstraint,
82
+ Column,
59
83
  DateTime,
60
- Integer,
61
- BigInteger,
62
- String,
63
84
  Enum,
64
- Index,
65
- func,
66
- CheckConstraint,
67
85
  ForeignKey,
68
86
  ForeignKeyConstraint,
87
+ Index,
88
+ Integer,
89
+ MetaData,
69
90
  PrimaryKeyConstraint,
70
- tuple_,
71
- asc,
72
- desc,
73
- update,
74
- case,
75
- select,
76
- literal_column,
77
- not_,
78
- literal,
79
- or_,
91
+ String,
80
92
  Table,
81
- MetaData,
82
- true,
83
93
  )
84
94
  from sqlalchemy.dialects.postgresql import JSONB, UUID
85
95
  from sqlalchemy.engine import ResultProxy
@@ -96,8 +106,6 @@ from sqlalchemy.sql import (
96
106
  # break up this giant file.
97
107
  and_ as sql_and,
98
108
  or_ as sql_or,
99
- # select, # ALREADY IMPORTED
100
- text,
101
109
  )
102
110
  from sqlalchemy.sql.elements import BooleanClauseList
103
111
  from sqlalchemy.sql.functions import coalesce
@@ -119,6 +127,7 @@ from ..config.settings import (
119
127
  MAX_PAGINATION_PAGE_SIZE,
120
128
  TESTING,
121
129
  INACTIVE_USER_DAYS,
130
+ GOOGLE_CLINICAL_DATA_BUCKET,
122
131
  )
123
132
  from ..shared import emails
124
133
  from ..shared.gcloud_client import (
@@ -132,6 +141,7 @@ from ..shared.gcloud_client import (
132
141
  revoke_intake_access,
133
142
  revoke_lister_access,
134
143
  revoke_bigquery_access,
144
+ gcs_xlsx_or_csv_file_to_pandas_dataframe,
135
145
  )
136
146
 
137
147
  os.environ["TZ"] = "UTC"
@@ -309,7 +319,7 @@ class CommonColumns(BaseModel): # type: ignore
309
319
  @with_default_session
310
320
  def find_by_id(cls, id: int, session: Session):
311
321
  """Find the record with this id"""
312
- return session.query(cls).get(id)
322
+ return session.get(cls, id)
313
323
 
314
324
  @classmethod
315
325
  @with_default_session
@@ -1207,9 +1217,10 @@ class TrialMetadata(CommonColumns):
1207
1217
  raise NoResultFound(f"No trial found with id {trial_id}")
1208
1218
  return unprism.unprism_samples(trial.metadata_json)
1209
1219
 
1210
- file_bundle: Optional[FileBundle]
1211
- num_participants: Optional[int]
1212
- num_samples: Optional[int]
1220
+ file_bundle: ClassVar[Optional[FileBundle]]
1221
+ num_participants: ClassVar[Optional[int]]
1222
+ num_samples: ClassVar[Optional[int]]
1223
+ ready_for_submission: ClassVar[Optional[Boolean]]
1213
1224
 
1214
1225
  # List of metadata JSON fields that should not be sent to clients
1215
1226
  # in queries that list trial metadata, because they may contain a lot
@@ -1330,11 +1341,26 @@ class TrialMetadata(CommonColumns):
1330
1341
  del trial.file_bundle[assay][purpose]
1331
1342
  if not trial.file_bundle[assay]:
1332
1343
  del trial.file_bundle[assay]
1344
+ # Check if trial is ready for submission
1345
+ setattr(trial, "ready_for_submission", trial.ready_for_submission())
1333
1346
 
1334
1347
  trials.append(trial)
1335
1348
 
1336
1349
  return trials
1337
1350
 
1351
+ @with_default_session
1352
+ def ready_for_submission(self, session: Session) -> Boolean:
1353
+ open_job = IngestionJobs.get_open_job_by_trial(self.trial_id)
1354
+ if not open_job:
1355
+ return False
1356
+ appendix_a_files = PreprocessedFiles.get_files_by_category_and_status(
1357
+ "trial_appendix_a", "current", job_id=open_job.id
1358
+ )
1359
+ trial_letters = PreprocessedFiles.get_files_by_category_and_status(
1360
+ "request_letter", "current", job_id=open_job.id
1361
+ )
1362
+ return appendix_a_files and trial_letters and open_job.status == "DRAFT"
1363
+
1338
1364
  @with_default_session
1339
1365
  def insert(
1340
1366
  self,
@@ -1711,6 +1737,30 @@ class TrialMetadata(CommonColumns):
1711
1737
  jsonb_array_elements(batch->'records') record
1712
1738
  """
1713
1739
 
1740
+ # Find all samples associated with scrnaseq analysis uploads.
1741
+ scrnaseq_analysis_subquery = """
1742
+ select
1743
+ trial_id,
1744
+ 'scrnaseq_analysis' as key,
1745
+ record->>'cimac_id' as cimac_id
1746
+ from
1747
+ trial_metadata,
1748
+ jsonb_array_elements(metadata_json#>'{analysis,scrnaseq_analysis}') batch,
1749
+ jsonb_array_elements(batch->'records') record
1750
+ """
1751
+
1752
+ # Find all samples associated with visium analysis uploads.
1753
+ visium_analysis_subquery = """
1754
+ select
1755
+ trial_id,
1756
+ 'visium_analysis' as key,
1757
+ record->>'cimac_id' as cimac_id
1758
+ from
1759
+ trial_metadata,
1760
+ jsonb_array_elements(metadata_json#>'{analysis,visium_analysis}') batch,
1761
+ jsonb_array_elements(batch->'records') record
1762
+ """
1763
+
1714
1764
  # Build up a JSON object mapping analysis types to arrays of excluded samples.
1715
1765
  # The resulting object will have structure like:
1716
1766
  # {
@@ -1866,6 +1916,10 @@ class TrialMetadata(CommonColumns):
1866
1916
  {cytof_analysis_subquery}
1867
1917
  union all
1868
1918
  {atacseq_analysis_subquery}
1919
+ union all
1920
+ {scrnaseq_analysis_subquery}
1921
+ union all
1922
+ {visium_analysis_subquery}
1869
1923
  ) assays_and_analysis
1870
1924
  group by
1871
1925
  trial_id, key
@@ -1924,7 +1978,7 @@ class TrialMetadata(CommonColumns):
1924
1978
  - `"wes_tumor_only_analysis"` counts (tumor) samples with tumor-only analysis
1925
1979
  For `"total_[participants/samples]"`, ALL (ie tumor AND normal) WES assay samples are included.
1926
1980
  """
1927
- summaries_query = "SELECT result FROM trial_summaries_mv"
1981
+ summaries_query = text("SELECT result FROM trial_summaries_mv")
1928
1982
  # Retrieve trial-level summary results from data cached in trial_summaries_mv materialized view.
1929
1983
  # The source of the SQL query used in trial_summaries_mv is get_summaries_query()
1930
1984
  summaries = [summary for (summary,) in session.execute(summaries_query) if summary]
@@ -2256,6 +2310,7 @@ class DownloadableFiles(CommonColumns):
2256
2310
  "FileGroups",
2257
2311
  secondary="files_to_file_groups",
2258
2312
  back_populates="downloadable_files",
2313
+ cascade="save-update",
2259
2314
  )
2260
2315
 
2261
2316
  FILE_EXT_REGEX = r"\.([^./]*(\.gz)?)$"
@@ -2751,7 +2806,7 @@ class DownloadableFiles(CommonColumns):
2751
2806
  """
2752
2807
 
2753
2808
  where_clause = DownloadableFiles._generate_where_clause_with_permissions(user)
2754
- statement = select([DownloadableFiles.id]).where(sql_and(DownloadableFiles.id.in_(ids), where_clause))
2809
+ statement = select(DownloadableFiles.id).where(sql_and(DownloadableFiles.id.in_(ids), where_clause))
2755
2810
 
2756
2811
  return [row[0] for row in session.execute(statement).fetchall()]
2757
2812
 
@@ -2783,7 +2838,7 @@ class DownloadableFiles(CommonColumns):
2783
2838
 
2784
2839
  for file in files_to_delete:
2785
2840
  file.delete(commit=True)
2786
- session.execute("REFRESH MATERIALIZED VIEW CONCURRENTLY trial_summaries_mv")
2841
+ session.execute(text("REFRESH MATERIALIZED VIEW CONCURRENTLY trial_summaries_mv"))
2787
2842
 
2788
2843
  @classmethod
2789
2844
  @with_default_session
@@ -3054,23 +3109,19 @@ class DownloadableFiles(CommonColumns):
3054
3109
 
3055
3110
  id_bundles = (
3056
3111
  select(
3057
- [
3058
- cls.trial_id,
3059
- cls.data_category_prefix.label(type_col.key),
3060
- cls.file_purpose.label(purp_col.key),
3061
- func.json_agg(cls.id).label(ids_col.key),
3062
- ]
3112
+ cls.trial_id,
3113
+ cls.data_category_prefix.label(type_col.key),
3114
+ cls.file_purpose.label(purp_col.key),
3115
+ func.json_agg(cls.id).label(ids_col.key),
3063
3116
  )
3064
3117
  .group_by(cls.trial_id, cls.data_category_prefix, cls.file_purpose)
3065
3118
  .alias("id_bundles")
3066
3119
  )
3067
3120
  purpose_bundles = (
3068
3121
  select(
3069
- [
3070
- tid_col,
3071
- type_col,
3072
- func.json_object_agg(func.coalesce(purp_col, "miscellaneous"), ids_col).label(purps_col.key),
3073
- ]
3122
+ tid_col,
3123
+ type_col,
3124
+ func.json_object_agg(func.coalesce(purp_col, "miscellaneous"), ids_col).label(purps_col.key),
3074
3125
  )
3075
3126
  .select_from(id_bundles)
3076
3127
  .group_by(tid_col, type_col)
@@ -3078,10 +3129,8 @@ class DownloadableFiles(CommonColumns):
3078
3129
  )
3079
3130
  file_bundles = (
3080
3131
  select(
3081
- [
3082
- tid_col.label(tid_col.key),
3083
- func.json_object_agg(func.coalesce(type_col, "other"), purps_col).label("file_bundle"),
3084
- ]
3132
+ tid_col.label(tid_col.key),
3133
+ func.json_object_agg(func.coalesce(type_col, "other"), purps_col).label("file_bundle"),
3085
3134
  )
3086
3135
  .select_from(purpose_bundles)
3087
3136
  .group_by(tid_col)
@@ -3131,13 +3180,13 @@ class DownloadableFiles(CommonColumns):
3131
3180
  # Query clause for computing a downloadable file's data category.
3132
3181
  # Used above in the DownloadableFiles.data_category computed property.
3133
3182
  DATA_CATEGORY_CASE_CLAUSE = case(
3134
- [(DownloadableFiles.facet_group == k, v) for k, v in facet_groups_to_categories.items()]
3183
+ *[(DownloadableFiles.facet_group == k, v) for k, v in facet_groups_to_categories.items()]
3135
3184
  )
3136
3185
 
3137
3186
  # Query clause for computing a downloadable file's file purpose.
3138
3187
  # Used above in the DownloadableFiles.file_purpose computed property.
3139
3188
  FILE_PURPOSE_CASE_CLAUSE = case(
3140
- [
3189
+ *[
3141
3190
  (DownloadableFiles.facet_group == facet_group, file_details.file_purpose)
3142
3191
  for facet_group, file_details in details_dict.items()
3143
3192
  ]
@@ -3146,7 +3195,7 @@ FILE_PURPOSE_CASE_CLAUSE = case(
3146
3195
 
3147
3196
  def result_proxy_to_models(result_proxy: ResultProxy, model: BaseModel) -> List[BaseModel]:
3148
3197
  """Materialize a sqlalchemy `result_proxy` iterable as a list of `model` instances"""
3149
- return [model(**dict(row_proxy)) for row_proxy in result_proxy.all()]
3198
+ return [model(**dict(row_proxy._mapping)) for row_proxy in result_proxy.all()]
3150
3199
 
3151
3200
 
3152
3201
  @with_default_session
@@ -3187,12 +3236,24 @@ def upload_manifest_json(
3187
3236
  return manifest_upload.id
3188
3237
 
3189
3238
 
3239
+ TRIAL_APPENDIX_A = "trial_appendix_a"
3240
+ REQUEST_LETTER = "request_letter"
3241
+ ADMIN_FILE_CATEGORIES = [TRIAL_APPENDIX_A, REQUEST_LETTER]
3242
+
3243
+
3190
3244
  class PreprocessedFiles(CommonColumns):
3191
3245
  __tablename__ = "preprocessed_files"
3246
+ __table_args__ = (
3247
+ ForeignKeyConstraint(
3248
+ ["job_id"],
3249
+ ["ingestion_jobs.id"],
3250
+ name="preprocessed_files_job_id_fkey",
3251
+ ),
3252
+ )
3192
3253
 
3193
3254
  file_name = Column(String)
3194
3255
  object_url = Column(String)
3195
- trial_id = Column(String)
3256
+ job_id = Column(Integer)
3196
3257
  file_category = Column(String)
3197
3258
  uploader_email = Column(String)
3198
3259
  status = Column(String)
@@ -3207,7 +3268,7 @@ class PreprocessedFiles(CommonColumns):
3207
3268
  file_category: str,
3208
3269
  uploader_email: str,
3209
3270
  status: str = "pending",
3210
- trial_id: str = None,
3271
+ job_id: int = None,
3211
3272
  version: int = None,
3212
3273
  released_version: str = None,
3213
3274
  session: Session = None,
@@ -3219,7 +3280,7 @@ class PreprocessedFiles(CommonColumns):
3219
3280
  file_category=file_category,
3220
3281
  uploader_email=uploader_email,
3221
3282
  status=status,
3222
- trial_id=trial_id,
3283
+ job_id=job_id,
3223
3284
  version=version,
3224
3285
  released_version=released_version,
3225
3286
  )
@@ -3228,10 +3289,10 @@ class PreprocessedFiles(CommonColumns):
3228
3289
 
3229
3290
  @classmethod
3230
3291
  @with_default_session
3231
- def archive_current_files(cls, file_category: str, session: Session = None):
3292
+ def archive_current_files(cls, file_category: str, job_id: int = None, session: Session = None):
3232
3293
  """Update any 'current' files in the given category to 'archived'. Returns latest existing version number."""
3233
3294
  current_version = 0
3234
- current_files = cls.get_files_by_category_and_status(file_category, "current", session=session)
3295
+ current_files = cls.get_files_by_category_and_status(file_category, "current", job_id=job_id, session=session)
3235
3296
  for file in current_files:
3236
3297
  file.status = "archived"
3237
3298
  file._updated = datetime.now()
@@ -3241,22 +3302,35 @@ class PreprocessedFiles(CommonColumns):
3241
3302
 
3242
3303
  @classmethod
3243
3304
  @with_default_session
3244
- def delete_pending_files_by_category(cls, file_category: str, trial_id: str = None, session: Session = None):
3245
- """Delete all pending files matching given file_category and optional trial_id."""
3246
- records = cls.get_files_by_category_and_status(file_category, "pending", trial_id=trial_id, session=session)
3305
+ def delete_pending_files_by_category(cls, file_category: str, job_id: int = None, session: Session = None):
3306
+ """Delete all pending files matching given file_category and optional job_id."""
3307
+ records = cls.get_files_by_category_and_status(file_category, "pending", job_id=job_id, session=session)
3308
+ for record in records:
3309
+ session.delete(record)
3310
+ session.commit()
3311
+
3312
+ @classmethod
3313
+ @with_default_session
3314
+ def delete_files_by_category(cls, file_category: str, job_id: int = None, session: Session = None):
3315
+ """Delete all files matching a given file_category and job_id (or system files if job_id is None)."""
3316
+ query = session.query(cls).filter_by(file_category=file_category)
3317
+ query = cls.add_job_filter(query, job_id)
3318
+ records = query.all()
3319
+ if not records:
3320
+ return False
3247
3321
  for record in records:
3248
3322
  session.delete(record)
3249
3323
  session.commit()
3324
+ return True
3250
3325
 
3251
3326
  @classmethod
3252
3327
  @with_default_session
3253
3328
  def get_files_by_category_and_status(
3254
- cls, file_category: str, status: str, trial_id: str = None, session: Session = None
3329
+ cls, file_category: str, status: str, job_id: int = None, session: Session = None
3255
3330
  ) -> list["PreprocessedFiles"]:
3256
- """Return all files matching given file_category and status, optionally filtered by trial_id."""
3331
+ """Return all files matching file_category and status, with job_id filter (job_id is NULL if not provided)."""
3257
3332
  query = session.query(cls).filter_by(file_category=file_category, status=status)
3258
- if trial_id:
3259
- query = query.filter_by(trial_id=trial_id)
3333
+ query = cls.add_job_filter(query, job_id)
3260
3334
  return query.all()
3261
3335
 
3262
3336
  @classmethod
@@ -3266,3 +3340,171 @@ class PreprocessedFiles(CommonColumns):
3266
3340
  ) -> Optional["PreprocessedFiles"]:
3267
3341
  """Return the file matching the given category and version number."""
3268
3342
  return session.query(cls).filter_by(file_category=file_category, version=version).one_or_none()
3343
+
3344
+ @classmethod
3345
+ @with_default_session
3346
+ def get_system_reference_files(cls, status: str = "current", session: Session = None) -> list["PreprocessedFiles"]:
3347
+ """Return static reference files that are not linked to any job and not Master Appendix A."""
3348
+ return (
3349
+ session.query(cls)
3350
+ .filter(cls.job_id.is_(None))
3351
+ .filter(cls.file_category != "master_appendix_a")
3352
+ .filter_by(status=status)
3353
+ .all()
3354
+ )
3355
+
3356
+ # TODO: logic for pending vs current files after high level validation
3357
+ @classmethod
3358
+ @with_default_session
3359
+ def get_pending_non_admin_files(cls, job_id: int, session: Session) -> list["PreprocessedFiles"]:
3360
+ return (
3361
+ session.query(cls)
3362
+ .filter(cls.job_id == job_id)
3363
+ .filter(cls.status == "pending", cls.file_category.notin_(ADMIN_FILE_CATEGORIES))
3364
+ .all()
3365
+ )
3366
+
3367
+ @classmethod
3368
+ def add_job_filter(cls, query, job_id):
3369
+ """
3370
+ Add a job_id filter to the SQLAlchemy query:
3371
+ - If job_id is provided, filters for exact match.
3372
+ - If not, filters for system-wide files (where job_id IS NULL).
3373
+ """
3374
+ if job_id is not None:
3375
+ return query.filter_by(job_id=job_id)
3376
+ else:
3377
+ return query.filter(cls.job_id.is_(None))
3378
+
3379
+
3380
+ INGESTION_JOB_STATUSES = [
3381
+ "DRAFT",
3382
+ "INITIAL SUBMISSION",
3383
+ "VALIDATION REVIEW",
3384
+ "REVISION SUBMISSION",
3385
+ "INGESTION",
3386
+ "PUBLISHED",
3387
+ ]
3388
+
3389
+ # Business decision to pass hex codes from the backend though that should be done by the front end...
3390
+ INGESTION_JOB_COLORS = {
3391
+ "DRAFT": "",
3392
+ "INITIAL SUBMISSION": "#ACCAD7",
3393
+ "VALIDATION REVIEW": "#DABE90",
3394
+ "REVISION SUBMISSION": "#C8BAE5",
3395
+ "INGESTION": "#8FCEC7",
3396
+ "PUBLISHED": "#90D9E6",
3397
+ }
3398
+ # TODO If have "CANCELLED" concept or other final status, add here
3399
+ FINAL_JOB_STATUS = ["PUBLISHED"]
3400
+
3401
+
3402
+ class IngestionJobs(CommonColumns):
3403
+ __tablename__ = "ingestion_jobs"
3404
+ __table_args__ = (
3405
+ ForeignKeyConstraint(
3406
+ ["trial_id"],
3407
+ ["trial_metadata.trial_id"],
3408
+ name="ingestion_jobs_trial_id_fkey",
3409
+ ),
3410
+ )
3411
+
3412
+ status = Column("status", Enum(*INGESTION_JOB_STATUSES, name="status"), nullable=False)
3413
+ trial_id = Column(String, nullable=False)
3414
+ version = Column(Integer, nullable=False)
3415
+
3416
+ @staticmethod
3417
+ @with_default_session
3418
+ def create(trial_id: str, status: str, version: int, session: Session = None):
3419
+ new_job = IngestionJobs(trial_id=trial_id, status=status, version=version)
3420
+ new_job.insert(session=session)
3421
+ return new_job
3422
+
3423
+ @with_default_session
3424
+ def transition_status(self, status: str, session: Session):
3425
+ # create required categories after opening job for submission
3426
+ if self.status == "DRAFT" and status == "INITIAL SUBMISSION":
3427
+ for category in self.derive_required_categories_from_appendix_a():
3428
+ JobFileCategories.create(category=category, job_id=self.id, type="required")
3429
+ self.status = status
3430
+ self.update(session=session)
3431
+
3432
+ def derive_required_categories_from_appendix_a(self) -> List:
3433
+ appendix_a = PreprocessedFiles.get_files_by_category_and_status(TRIAL_APPENDIX_A, "current", job_id=self.id)[0]
3434
+ df = gcs_xlsx_or_csv_file_to_pandas_dataframe(GOOGLE_CLINICAL_DATA_BUCKET, appendix_a.object_url)
3435
+ categories = []
3436
+ headers_ended = False
3437
+ for index, row in df.iterrows():
3438
+ cell = str(row.iloc[0])
3439
+ if headers_ended:
3440
+ if not cell == "nan" and cell not in categories and cell != "Specialized_Data":
3441
+ categories.append(cell)
3442
+ elif cell == "PATIENT-LEVEL DATA":
3443
+ headers_ended = True
3444
+ return categories
3445
+
3446
+ @classmethod
3447
+ @with_default_session
3448
+ def get_jobs_by_trial(cls, trial_id: str, session: Session = None) -> list["IngestionJobs"]:
3449
+ return session.query(cls).filter(cls.trial_id == trial_id).order_by(cls.version.desc()).all()
3450
+
3451
+ @classmethod
3452
+ @with_default_session
3453
+ def get_open_job_by_trial(cls, trial_id: str, session: Session = None) -> Optional["IngestionJobs"]:
3454
+ """Return the open job for a given trial if it exists."""
3455
+ return (
3456
+ session.query(cls)
3457
+ .filter(
3458
+ cls.trial_id == trial_id,
3459
+ cls.status.notin_(FINAL_JOB_STATUS),
3460
+ )
3461
+ .order_by(cls._created.desc())
3462
+ .first()
3463
+ )
3464
+
3465
+ # TODO: figure out which users have access to which jobs
3466
+ @classmethod
3467
+ @with_default_session
3468
+ def get_open_jobs_for_user(cls, user: Users, session: Session = None) -> list["IngestionJobs"]:
3469
+ return session.query(cls).filter(cls.status.notin_(["DRAFT"])).order_by(cls._created.desc()).all()
3470
+
3471
+
3472
+ class JobFileCategories(CommonColumns):
3473
+ __tablename__ = "job_file_categories"
3474
+ __table_args__ = (
3475
+ ForeignKeyConstraint(
3476
+ ["job_id"],
3477
+ ["ingestion_jobs.id"],
3478
+ ),
3479
+ Index(
3480
+ "idx_categories_job_id" "job_id",
3481
+ "category",
3482
+ unique=True,
3483
+ ),
3484
+ )
3485
+
3486
+ category = Column(String)
3487
+ job_id = Column(Integer)
3488
+ type = Column(Enum("required", "optional", name="type"))
3489
+
3490
+ @staticmethod
3491
+ @with_default_session
3492
+ def create(
3493
+ category: str,
3494
+ job_id: int,
3495
+ type: str,
3496
+ session: Session = None,
3497
+ ):
3498
+ new_category = JobFileCategories(
3499
+ category=category,
3500
+ job_id=job_id,
3501
+ type=type,
3502
+ )
3503
+ new_category.insert(session=session)
3504
+ return new_category
3505
+
3506
+ @classmethod
3507
+ @with_default_session
3508
+ def categories_for_job(cls, job_id: int, type: str, session: Session = None):
3509
+ categories = session.query(cls).filter(cls.job_id == job_id, cls.type == type).all()
3510
+ return [c.category for c in categories]
@@ -104,6 +104,7 @@ class TrialMetadataSchema(BaseSchema):
104
104
  file_bundle = fields.Dict(dump_only=True)
105
105
  num_participants = fields.Int(dump_only=True)
106
106
  num_samples = fields.Int(dump_only=True)
107
+ ready_for_submission = fields.Bool(dump_only=True)
107
108
 
108
109
 
109
110
  TrialMetadataListSchema = _make_list_schema(TrialMetadataSchema())
@@ -16,7 +16,7 @@ from ..config.settings import ENV
16
16
  # - errors from CSMS in update_cidc_from_csms,
17
17
  # - errors from kicking off permissions in grant_download_permissions, and
18
18
  # - errors from implementing permissions in worker > permissions_worker
19
- CIDC_MAILING_LIST = ["essex-alert@cimac-network.org", "mustafa.kucukkal@nih.gov"]
19
+ CIDC_MAILING_LIST = ["essex-alert@cimac-network.org"]
20
20
 
21
21
 
22
22
  def sendable(email_template):
@@ -0,0 +1,56 @@
1
+ from werkzeug.datastructures import FileStorage
2
+ from werkzeug.exceptions import BadRequest
3
+
4
+ from ..config.settings import GOOGLE_CLINICAL_DATA_BUCKET
5
+ from ..models import PreprocessedFiles
6
+ from ..shared.auth import get_current_user
7
+ from ..shared.gcloud_client import upload_file_to_gcs
8
+
9
+
10
+ def set_current_file(file: FileStorage, file_category: str, gcs_folder: str, job_id: int = None) -> PreprocessedFiles:
11
+ """
12
+ Archives any existing 'current' files for the given category and job,
13
+ then uploads the new file as the latest 'current' version.
14
+ """
15
+ latest_version = PreprocessedFiles.archive_current_files(file_category, job_id=job_id)
16
+ latest_file = create_file(file, gcs_folder, file_category, job_id, latest_version + 1)
17
+ return latest_file
18
+
19
+
20
+ def create_file(
21
+ file: FileStorage, gcs_folder: str, file_category: str, job_id: int = None, version: int = None
22
+ ) -> PreprocessedFiles:
23
+ """Upload file to GCS and create corresponding metadata record in the database."""
24
+ status = "pending" if gcs_folder.endswith("pending/") else "current"
25
+ # only need timestamp for current/approved files
26
+ append_timestamp = status == "current"
27
+ # create file in GCS
28
+ gcs_file_path = upload_file_to_gcs(file, GOOGLE_CLINICAL_DATA_BUCKET, gcs_folder, append_timestamp=append_timestamp)
29
+ # create corresponding record in db
30
+ file = PreprocessedFiles.create(
31
+ file_name=file.filename,
32
+ object_url=gcs_file_path,
33
+ file_category=file_category,
34
+ uploader_email=get_current_user().email,
35
+ status=status,
36
+ job_id=job_id,
37
+ version=version,
38
+ )
39
+ return file
40
+
41
+
42
+ def validate_file_extension(filename: str, allowed_extensions: list[str]):
43
+ if not filename or not any(filename.lower().endswith(ext) for ext in allowed_extensions):
44
+ raise BadRequest(f"Invalid file type. Must be one of: {allowed_extensions}")
45
+
46
+
47
+ def format_common_preprocessed_file_response(file: PreprocessedFiles):
48
+ """Format a common response for a single PreprocessedFiles record."""
49
+ return {
50
+ "file_name": file.file_name,
51
+ "gcs_uri": f"gs://{GOOGLE_CLINICAL_DATA_BUCKET}/{file.object_url}",
52
+ "status": file.status,
53
+ "file_category": file.file_category,
54
+ "uploader_email": file.uploader_email,
55
+ "date": file._created.isoformat(),
56
+ }
@@ -26,6 +26,7 @@ from typing import (
26
26
 
27
27
  import googleapiclient.discovery
28
28
  import requests
29
+ import pandas as pd
29
30
  from cidc_schemas.prism.constants import ASSAY_TO_FILEPATH
30
31
  from google.api_core.client_options import ClientOptions
31
32
  from google.api_core.iam import Policy
@@ -217,10 +218,12 @@ def upload_xlsx_to_gcs(
217
218
  return final_object
218
219
 
219
220
 
220
- def upload_file_to_gcs(file: FileStorage, bucket_name: str, gcs_folder: str) -> str:
221
+ def upload_file_to_gcs(file: FileStorage, bucket_name: str, gcs_folder: str, append_timestamp: bool = False) -> str:
221
222
  """Upload a file to the specified GCS folder and return the GCS path from the bucket."""
222
223
  # Secure the filename and prepare file
223
224
  filename = secure_filename(file.filename)
225
+ if append_timestamp:
226
+ filename = _append_iso_timestamp_to_filename(filename)
224
227
  gcs_file_path = os.path.join(gcs_folder, filename)
225
228
  binary_file = io.BytesIO(file.read())
226
229
 
@@ -416,6 +419,20 @@ def upload_xlsx_to_intake_bucket(user_email: str, trial_id: str, upload_type: st
416
419
  return f"https://console.cloud.google.com/storage/browser/_details/{bucket_name}/{blob_name}"
417
420
 
418
421
 
422
+ def gcs_xlsx_or_csv_file_to_pandas_dataframe(bucket_name: str, blob_name: str):
423
+ """Reads an XLSX file from Google Cloud Storage into a Pandas DataFrame."""
424
+ sheet_data = storage.Client().bucket(bucket_name).blob(blob_name).download_as_bytes()
425
+ temp_file = io.BytesIO(sheet_data)
426
+
427
+ # TODO: specify sheet in xlsx file and/or accept tsv and xls files
428
+ if blob_name[-3:] == "csv":
429
+ return pd.read_csv(temp_file)
430
+ elif blob_name[-4:] == "xlsx":
431
+ return pd.read_excel(temp_file)
432
+ else:
433
+ raise Exception("Can only read csv or xlsx files")
434
+
435
+
419
436
  def _execute_multiblob_acl_change(
420
437
  user_email_list: List[str],
421
438
  blob_list: List[storage.Blob],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.1.34
3
+ Version: 1.1.37
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -10,10 +10,10 @@ License-File: LICENSE
10
10
  Requires-Dist: werkzeug==3.0.6
11
11
  Requires-Dist: flask==3.0.3
12
12
  Requires-Dist: flask-migrate==3.1.0
13
- Requires-Dist: flask-sqlalchemy==3.0.2
14
- Requires-Dist: sqlalchemy==1.4.54
13
+ Requires-Dist: flask-sqlalchemy==3.1.1
14
+ Requires-Dist: sqlalchemy==2.0.41
15
15
  Requires-Dist: marshmallow==3.19.0
16
- Requires-Dist: marshmallow-sqlalchemy==0.22.3
16
+ Requires-Dist: marshmallow-sqlalchemy==1.4.2
17
17
  Requires-Dist: google-cloud-storage==2.18.0
18
18
  Requires-Dist: google-cloud-secret-manager==2.20.1
19
19
  Requires-Dist: google-cloud-pubsub==2.22.0
@@ -28,7 +28,7 @@ Requires-Dist: python-dotenv==0.10.3
28
28
  Requires-Dist: requests==2.32.4
29
29
  Requires-Dist: jinja2==3.1.6
30
30
  Requires-Dist: certifi==2024.7.4
31
- Requires-Dist: nci-cidc-schemas==0.27.25
31
+ Requires-Dist: nci-cidc-schemas==0.27.27
32
32
  Dynamic: description
33
33
  Dynamic: description-content-type
34
34
  Dynamic: home-page
@@ -19,6 +19,7 @@ cidc_api/models/files/facets.py
19
19
  cidc_api/shared/__init__.py
20
20
  cidc_api/shared/auth.py
21
21
  cidc_api/shared/emails.py
22
+ cidc_api/shared/file_handling.py
22
23
  cidc_api/shared/gcloud_client.py
23
24
  cidc_api/shared/jose.py
24
25
  cidc_api/shared/rest_utils.py
@@ -1,10 +1,10 @@
1
1
  werkzeug==3.0.6
2
2
  flask==3.0.3
3
3
  flask-migrate==3.1.0
4
- flask-sqlalchemy==3.0.2
5
- sqlalchemy==1.4.54
4
+ flask-sqlalchemy==3.1.1
5
+ sqlalchemy==2.0.41
6
6
  marshmallow==3.19.0
7
- marshmallow-sqlalchemy==0.22.3
7
+ marshmallow-sqlalchemy==1.4.2
8
8
  google-cloud-storage==2.18.0
9
9
  google-cloud-secret-manager==2.20.1
10
10
  google-cloud-pubsub==2.22.0
@@ -19,4 +19,4 @@ python-dotenv==0.10.3
19
19
  requests==2.32.4
20
20
  jinja2==3.1.6
21
21
  certifi==2024.7.4
22
- nci-cidc-schemas==0.27.25
22
+ nci-cidc-schemas==0.27.27
@@ -1,10 +1,10 @@
1
1
  werkzeug==3.0.6
2
2
  flask==3.0.3
3
3
  flask-migrate==3.1.0
4
- flask-sqlalchemy==3.0.2
5
- sqlalchemy==1.4.54
4
+ flask-sqlalchemy==3.1.1
5
+ sqlalchemy==2.0.41
6
6
  marshmallow==3.19.0
7
- marshmallow-sqlalchemy==0.22.3
7
+ marshmallow-sqlalchemy==1.4.2
8
8
  google-cloud-storage==2.18.0
9
9
  google-cloud-secret-manager==2.20.1
10
10
  google-cloud-pubsub==2.22.0
@@ -19,4 +19,4 @@ python-dotenv==0.10.3
19
19
  requests==2.32.4
20
20
  jinja2==3.1.6
21
21
  certifi==2024.7.4
22
- nci-cidc-schemas==0.27.25
22
+ nci-cidc-schemas==0.27.27
@@ -5,16 +5,13 @@ to data resources, like endpoints that handle upload-related functionality.
5
5
  """
6
6
 
7
7
  import os
8
-
9
- os.environ["TZ"] = "UTC"
8
+ import uuid
10
9
  from copy import deepcopy
11
10
  from unittest.mock import MagicMock
12
11
  from datetime import datetime
13
12
  from dateutil.parser import parse as parse_date
14
13
 
15
-
16
14
  import pytest
17
- from werkzeug.exceptions import BadRequest
18
15
 
19
16
  from cidc_api.models import (
20
17
  Users,
@@ -29,6 +26,7 @@ from cidc_api.models import (
29
26
 
30
27
  from .utils import mock_current_user, mock_gcloud_client
31
28
 
29
+ os.environ["TZ"] = "UTC"
32
30
  TEST_RECORD_ID = 1
33
31
 
34
32
  # Configuration for resource tests below. For each resource, the following keywords are supported:
@@ -128,7 +126,9 @@ permissions = {
128
126
  "filters": {"empty": {"user_id": 2}, "one": {"user_id": TEST_RECORD_ID}},
129
127
  }
130
128
 
131
- upload_token = "53b455a5-d25b-428b-8c83-86a3120188da"
129
+ # UUID object is returned with sqlchemy 2
130
+ upload_token = uuid.uuid4()
131
+
132
132
  upload_jobs = {
133
133
  "json": {
134
134
  "id": TEST_RECORD_ID,
@@ -139,7 +139,7 @@ upload_jobs = {
139
139
  "gcs_xlsx_uri": "",
140
140
  "multifile": False,
141
141
  "status": UploadJobStatus.STARTED.value,
142
- "token": upload_token,
142
+ "token": str(upload_token),
143
143
  },
144
144
  "model": UploadJobs,
145
145
  "lookup_func": lambda cfg: f"{cfg['id']}?token={upload_token}",
@@ -265,6 +265,7 @@ def test_resource_and_item_get(resource, config, cidc_api, clean_db, monkeypatch
265
265
  assert_dict_contains(item, json)
266
266
  else:
267
267
  assert_dict_contains(item, config["json"])
268
+
268
269
  if config.get("pagination"):
269
270
  assert response.json["_meta"]["total"] == 3
270
271
  elif resource == "users":
@@ -414,14 +415,16 @@ def test_endpoint_urls(cidc_api):
414
415
  "/clinical_data/files/master_appendix_a/pending",
415
416
  "/clinical_data/files/master_appendix_a/versions",
416
417
  "/clinical_data/files/master_appendix_a/versions/<int:version>",
417
- "/clinical_data/jobs",
418
- "/clinical_data/trials/<string:trial_id>/files/trial_appendix_a/pending",
419
418
  "/downloadable_files/",
420
419
  "/downloadable_files/filelist",
421
420
  "/downloadable_files/compressed_batch",
422
421
  "/downloadable_files/download_url",
423
422
  "/downloadable_files/facet_groups_for_links",
424
423
  "/downloadable_files/filter_facets",
424
+ "/files/<string:data_type>",
425
+ "/files/<string:data_type>/<string:file_category>",
426
+ "/files/<string:data_type>/<string:file_category>/versions",
427
+ "/files/<string:data_type>/current",
425
428
  "/downloadable_files/<int:downloadable_file>",
426
429
  "/downloadable_files/<int:downloadable_file>/related_files",
427
430
  "/info/assays",
@@ -438,6 +441,11 @@ def test_endpoint_urls(cidc_api):
438
441
  "/ingestion/poll_upload_merge_status/<int:upload_job>",
439
442
  "/ingestion/intake_bucket",
440
443
  "/ingestion/intake_metadata",
444
+ "/jobs/",
445
+ "/jobs/<int:job_id>",
446
+ "/jobs/<int:job_id>/files",
447
+ "/jobs/<int:job_id>/status",
448
+ "/jobs/trials/<string:trial_id>/current",
441
449
  "/permissions/",
442
450
  "/permissions/<int:permission>",
443
451
  "/samples/",