nci-cidc-api-modules 1.2.33__py3-none-any.whl → 1.2.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cidc_api/models/models.py CHANGED
@@ -3465,11 +3465,41 @@ class IngestionJobs(CommonColumns):
3465
3465
  pending = Column(Boolean, nullable=False, default=False)
3466
3466
  start_date = Column(DateTime, nullable=True)
3467
3467
  error_status = Column(String, nullable=True)
3468
+ job_type = Column(String, nullable=False, default="clinical")
3469
+ assay_type = Column(String, nullable=True)
3470
+ batch_id = Column(String, nullable=True)
3471
+ submission_id = Column(String, nullable=True)
3472
+ intake_path = Column(String, nullable=True)
3468
3473
 
3469
3474
  @staticmethod
3470
3475
  @with_default_session
3471
- def create(trial_id: str, status: str, version: int, pending: Boolean = False, session: Session = None):
3472
- new_job = IngestionJobs(trial_id=trial_id, status=status, version=version, pending=pending)
3476
+ def create(
3477
+ trial_id: str,
3478
+ status: str,
3479
+ version: int,
3480
+ error_status: str = None,
3481
+ pending: Boolean = False,
3482
+ job_type: str = "clinical",
3483
+ assay_type: str = None,
3484
+ batch_id: str = None,
3485
+ submission_id: str = None,
3486
+ intake_path: str = None,
3487
+ start_date: datetime = None,
3488
+ session: Session = None,
3489
+ ):
3490
+ new_job = IngestionJobs(
3491
+ trial_id=trial_id,
3492
+ status=status,
3493
+ error_status=error_status,
3494
+ version=version,
3495
+ pending=pending,
3496
+ job_type=job_type,
3497
+ assay_type=assay_type,
3498
+ batch_id=batch_id,
3499
+ submission_id=submission_id,
3500
+ intake_path=intake_path,
3501
+ start_date=start_date,
3502
+ )
3473
3503
  new_job.insert(session=session)
3474
3504
  return new_job
3475
3505
 
@@ -3494,29 +3524,43 @@ class IngestionJobs(CommonColumns):
3494
3524
 
3495
3525
  @classmethod
3496
3526
  @with_default_session
3497
- def get_jobs_by_trial(cls, trial_id: str, session: Session = None) -> list["IngestionJobs"]:
3498
- return session.query(cls).filter(cls.trial_id == trial_id).order_by(cls.version.desc()).all()
3527
+ def get_jobs_by_trial(
3528
+ cls, trial_id: str, job_type: str = "clinical", session: Session = None
3529
+ ) -> list["IngestionJobs"]:
3530
+ return (
3531
+ session.query(cls)
3532
+ .filter(cls.trial_id == trial_id, cls.job_type == job_type)
3533
+ .order_by(cls.version.desc())
3534
+ .all()
3535
+ )
3499
3536
 
3500
3537
  @classmethod
3501
3538
  @with_default_session
3502
- def get_open_job_by_trial(cls, trial_id: str, session: Session = None) -> Optional["IngestionJobs"]:
3539
+ def get_open_job_by_trial(
3540
+ cls, trial_id: str, job_type: str = "clinical", session: Session = None
3541
+ ) -> Optional["IngestionJobs"]:
3503
3542
  """Return the open job for a given trial if it exists."""
3504
3543
  return (
3505
3544
  session.query(cls)
3506
3545
  .filter(
3507
3546
  cls.trial_id == trial_id,
3547
+ cls.job_type == job_type,
3508
3548
  cls.status.notin_(FINAL_JOB_STATUS),
3509
3549
  )
3510
3550
  .order_by(cls._created.desc())
3511
3551
  .first()
3512
3552
  )
3513
3553
 
3554
+ @classmethod
3555
+ def get_jobs_for_user(cls, user: Users, job_type: str = None) -> list["IngestionJobs"]:
3556
+ return cls.get_assay_jobs_for_user(user) if job_type == "assay" else cls.get_clinical_jobs_for_user(user)
3557
+
3514
3558
  @classmethod
3515
3559
  @with_default_session
3516
- def get_open_jobs_for_user(cls, user: Users, session: Session = None) -> list["IngestionJobs"]:
3560
+ def get_clinical_jobs_for_user(cls, user: Users, session: Session = None) -> list["IngestionJobs"]:
3517
3561
  if user.role not in [CIDCRole.ADMIN.value, CIDCRole.CLINICAL_TRIAL_USER.value]:
3518
3562
  return []
3519
- job_query = session.query(cls).filter(cls.status.notin_(["DRAFT"]))
3563
+ job_query = session.query(cls).filter(cls.status.notin_(["DRAFT"]), cls.job_type == "clinical")
3520
3564
  if (
3521
3565
  user.role != CIDCRole.ADMIN.value
3522
3566
  and not session.query(Permissions)
@@ -3539,6 +3583,81 @@ class IngestionJobs(CommonColumns):
3539
3583
  job_query = job_query.filter(cls.trial_id.in_(map(lambda x: x.trial_id, authorized_trials)))
3540
3584
  return job_query.order_by(cls._created.desc()).all()
3541
3585
 
3586
+ @classmethod
3587
+ @with_default_session
3588
+ def get_assay_jobs_for_user(cls, user: Users, session: Session = None) -> list["IngestionJobs"]:
3589
+ # TODO allow more than just Admin role and get authorized trials based on permissions
3590
+ if user.role not in [CIDCRole.ADMIN.value]:
3591
+ return []
3592
+ return session.query(cls).filter(cls.job_type == "assay").order_by(cls._created.desc()).all()
3593
+
3594
+ @classmethod
3595
+ @with_default_session
3596
+ def get_unique_assay_job(
3597
+ cls,
3598
+ trial_id: str,
3599
+ assay_type: str,
3600
+ batch_id: str,
3601
+ session: Session = None,
3602
+ ) -> Optional["IngestionJobs"]:
3603
+ """Look for unique assay job with matching trial_id/assay_type/batch_id combination."""
3604
+ return (
3605
+ session.query(cls)
3606
+ .filter(
3607
+ cls.job_type == "assay",
3608
+ cls.trial_id == trial_id,
3609
+ cls.assay_type == assay_type,
3610
+ cls.batch_id == batch_id,
3611
+ )
3612
+ .first()
3613
+ )
3614
+
3615
+ @classmethod
3616
+ @with_default_session
3617
+ def next_assay_submission_id(cls, trial_id: str, assay_type: str, session: Session = None) -> str:
3618
+ """
3619
+ Generate the next CIDC Submission ID for an assay job.
3620
+
3621
+ Format:
3622
+ <trial_id>-<assay_type>-<yyyymmdd> (first submission of the day)
3623
+ <trial_id>-<assay_type>-<yyyymmdd>-<#> (subsequent submissions on same day)
3624
+
3625
+ Uses only the most recent matching submission_id to determine the next suffix.
3626
+ """
3627
+ today_str = datetime.now().strftime("%Y%m%d")
3628
+ base_submission_id = f"{trial_id}-{assay_type}-{today_str}"
3629
+
3630
+ # Get the most recent submission_id matching this prefix
3631
+ latest = (
3632
+ session.query(cls.submission_id)
3633
+ .filter(
3634
+ cls.trial_id == trial_id,
3635
+ cls.assay_type == assay_type,
3636
+ cls.submission_id.like(f"{base_submission_id}%"),
3637
+ )
3638
+ .order_by(cls._created.desc())
3639
+ .first()
3640
+ )
3641
+
3642
+ # No existing submission for this prefix -> start at 1
3643
+ if not latest or not latest[0]:
3644
+ return base_submission_id
3645
+
3646
+ last_id = latest[0]
3647
+ # Case 1: the latest is exactly the prefix (i.e., first submission today)
3648
+ if last_id == base_submission_id:
3649
+ return f"{base_submission_id}-2"
3650
+
3651
+ # Case 2: latest already has a suffix
3652
+ try:
3653
+ _, last_suffix = last_id.rsplit("-", 1)
3654
+ n = int(last_suffix)
3655
+ return f"{base_submission_id}-{n + 1}"
3656
+ except Exception as e:
3657
+ # If malformed, restart numbering for safety
3658
+ logger.error("Unexpected error parsing Submission ID in next_assay_submission_id: %s", e)
3659
+ return f"{base_submission_id}-2"
3660
+
3542
3661
 
3543
3662
  class JobFileCategories(CommonColumns):
3544
3663
  __tablename__ = "job_file_categories"
@@ -3613,6 +3732,7 @@ class CategoryDataElements(CommonColumns):
3613
3732
  name = Column(String, nullable=False)
3614
3733
  is_custom = Column(Boolean, nullable=False, default=False, server_default="false")
3615
3734
  element_type = Column(String, nullable=False)
3735
+ data_type = Column(String, nullable=True)
3616
3736
  cardinality = Column(String, nullable=True)
3617
3737
 
3618
3738
  @classmethod
@@ -0,0 +1,68 @@
1
+ from datetime import datetime
2
+ from urllib.parse import quote
3
+
4
+ from werkzeug.exceptions import BadRequest
5
+
6
+ from cidc_api.models import IngestionJobs
7
+ from . import gcloud_client
8
+ from ..shared.auth import get_current_user
9
+
10
+ JOB_TYPE_ASSAY = "assay"
11
+ JOB_TYPE_CLINICAL = "clinical"
12
+ ALLOWED_JOB_TYPES = {JOB_TYPE_CLINICAL, JOB_TYPE_ASSAY}
13
+
14
+
15
+ def resolve_job_type_and_assay_fields(data: dict) -> tuple[str, str | None, str | None]:
16
+ """Decide job_type and gather assay_type/batch_id from request JSON."""
17
+ assay_type = data.get("assay_type")
18
+ # If job_type is assay or assay_type is present, treat this as an assay job.
19
+ job_type = data.get("job_type") or (JOB_TYPE_ASSAY if assay_type else JOB_TYPE_CLINICAL)
20
+
21
+ if job_type not in ALLOWED_JOB_TYPES:
22
+ raise BadRequest("Invalid job_type. Allowed values are 'clinical' or 'assay'.")
23
+
24
+ if job_type == JOB_TYPE_ASSAY and (not assay_type or not isinstance(assay_type, str)):
25
+ raise BadRequest("assay_type must be provided for job_type='assay'.")
26
+
27
+ assay_type = assay_type.strip() if assay_type else None
28
+ batch_id = data.get("batch_id").strip() if isinstance(data.get("batch_id"), str) else None
29
+
30
+ return job_type, assay_type, batch_id
31
+
32
+
33
+ def prepare_assay_job(trial_id: str, assay_type: str, batch_id: str) -> tuple[str, str, str, datetime, int, str]:
34
+ """
35
+ Validate assay job uniqueness and generate submission_id, start_date, version, and the trial’s GCS intake path.
36
+ """
37
+ if not assay_type:
38
+ raise BadRequest("assay_type must be provided for job_type='assay'.")
39
+
40
+ # Enforce uniqueness of (trial_id, assay_type, batch_id) when batch_id is present.
41
+ if batch_id:
42
+ existing_job = IngestionJobs.get_unique_assay_job(trial_id, assay_type, batch_id)
43
+ if existing_job:
44
+ raise BadRequest(
45
+ f"Assay job {existing_job.id} already exists for this exact trial_id/assay_type/batch_id combination."
46
+ )
47
+
48
+ submission_id = IngestionJobs.next_assay_submission_id(trial_id, assay_type)
49
+ job_status = "INITIAL SUBMISSION"
50
+ error_status = "Upload Incomplete" # job starts with 'Incomplete' notifier
51
+ start_date = datetime.now()
52
+ version = 1
53
+
54
+ # Create or retrieve intake bucket corresponding to the trial
55
+ intake_bucket = gcloud_client.create_intake_bucket(get_current_user().email, trial_id=trial_id)
56
+ gcs_path = f"{intake_bucket.name}/{trial_id}/{assay_type}"
57
+
58
+ return submission_id, job_status, error_status, start_date, version, gcs_path
59
+
60
+
61
+ def get_google_links(intake_path: str) -> tuple[str, str]:
62
+ """Build the GCS URI and GCS Console URL corresponding to the intake path."""
63
+ gcs_uri = f"gs://{intake_path}"
64
+ # Encode path to ensure link opens correctly
65
+ encoded_path = quote(intake_path)
66
+ console_url = f"https://console.cloud.google.com/storage/browser/{encoded_path}"
67
+
68
+ return gcs_uri, console_url
@@ -8,6 +8,7 @@ import hashlib
8
8
  import io
9
9
  import json
10
10
  import os
11
+ import re
11
12
  import warnings
12
13
  from collections import namedtuple
13
14
  from concurrent.futures import Future
@@ -361,15 +362,34 @@ def get_intake_bucket_name(user_email: str) -> str:
361
362
  return bucket_name
362
363
 
363
364
 
364
- def create_intake_bucket(user_email: str) -> storage.Bucket:
365
+ def get_trial_intake_bucket_name(trial_id: str) -> str:
365
366
  """
366
- Create a new data intake bucket for this user, or get the existing one.
367
+ Return a sanitized GCS bucket name for a given trial_id.
368
+
369
+ Produces: <GOOGLE_INTAKE_BUCKET>-<sanitized_trial_id>
370
+ where the trial_id segment is lowercased and restricted to [a-z0-9-].
371
+ """
372
+ # Replace non-allowed bucket chars with "-"
373
+ sanitized_id = re.sub(r"[^a-z0-9-]", "-", trial_id.lower())
374
+ # Collapse repeated "-" and trim from both ends
375
+ sanitized_id = re.sub(r"-+", "-", sanitized_id).strip("-")
376
+
377
+ return f"{GOOGLE_INTAKE_BUCKET}-{sanitized_id}"
378
+
379
+
380
+ def create_intake_bucket(user_email: str, trial_id: str = None) -> storage.Bucket:
381
+ """
382
+ Create (or retrieve) the appropriate data intake bucket.
383
+ If a trial_id is provided, a trial-specific bucket is used;
384
+ otherwise a user-specific intake bucket is used.
385
+
367
386
  Grant the user GCS object admin permissions on the bucket, or refresh those
368
387
  permissions if they've already been granted.
369
388
  Created with uniform bucket-level IAM access, so expiring permission.
370
389
  """
371
390
  storage_client = _get_storage_client()
372
- bucket_name = get_intake_bucket_name(user_email)
391
+ # Get trial-specific bucket name if trial_id is given, otherwise a user-specific bucket name.
392
+ bucket_name = get_trial_intake_bucket_name(trial_id) if trial_id else get_intake_bucket_name(user_email)
373
393
  bucket = storage_client.bucket(bucket_name)
374
394
 
375
395
  if not bucket.exists():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nci_cidc_api_modules
3
- Version: 1.2.33
3
+ Version: 1.2.35
4
4
  Summary: SQLAlchemy data models and configuration tools used in the NCI CIDC API
5
5
  Home-page: https://github.com/NCI-CIDC/cidc-api-gae
6
6
  License: MIT license
@@ -14,24 +14,25 @@ Requires-Dist: flask-migrate>=4.1.0
14
14
  Requires-Dist: flask-sqlalchemy>=3.1.1
15
15
  Requires-Dist: flask-talisman>=0.7.0
16
16
  Requires-Dist: google-auth==2.43.0
17
- Requires-Dist: google-api-python-client>=2.185.0
17
+ Requires-Dist: google-api-python-client>=2.187.0
18
18
  Requires-Dist: google-cloud-bigquery>=3.38.0
19
19
  Requires-Dist: google-cloud-pubsub>=2.33.0
20
20
  Requires-Dist: google-cloud-secret-manager>=2.25.0
21
21
  Requires-Dist: google-cloud-storage>=3.6.0
22
22
  Requires-Dist: jinja2>=3.1.6
23
+ Requires-Dist: joserfc>=1.5.0
23
24
  Requires-Dist: marshmallow>=4.1.0
24
25
  Requires-Dist: marshmallow-sqlalchemy>=1.4.2
25
26
  Requires-Dist: numpy>=2.3.5
26
27
  Requires-Dist: packaging>=25.0
27
28
  Requires-Dist: pandas>=2.3.3
28
29
  Requires-Dist: pyarrow>=22.0.0
29
- Requires-Dist: pydantic~=2.12.4
30
+ Requires-Dist: pydantic~=2.12.5
30
31
  Requires-Dist: python-dotenv>=1.2.1
31
32
  Requires-Dist: requests>=2.32.5
32
33
  Requires-Dist: sqlalchemy>=2.0.44
33
34
  Requires-Dist: sqlalchemy-mixins~=2.0.5
34
- Requires-Dist: werkzeug>=3.1.3
35
+ Requires-Dist: werkzeug>=3.1.4
35
36
  Requires-Dist: nci-cidc-schemas==0.28.10
36
37
  Dynamic: description
37
38
  Dynamic: description-content-type
@@ -6,7 +6,7 @@ cidc_api/config/settings.py,sha256=ttOGvk_6zVMn4dtxIZ2-0w3wF2fpAUVfGpVZbKJ2b6s,4
6
6
  cidc_api/models/__init__.py,sha256=cTyK0Z1ttLo9itwZVRFr-d6aX-zX633YhqipqPgoGfE,115
7
7
  cidc_api/models/data.py,sha256=uLkgAJ6tCtsi3bOGt8I9esYrZqgbsTYM1rGfL2gx5sY,837
8
8
  cidc_api/models/migrations.py,sha256=UlS5How3J4ryaRuZT6F5VQtAKikkl0LTv9MgMO_ltiQ,11161
9
- cidc_api/models/models.py,sha256=zGOvQMe86kLqC_bbB9ezrBYL9QNA_4GrtYhRGGeqOcU,148525
9
+ cidc_api/models/models.py,sha256=HFy3_CheDe_tfVCQkC88BMdi9C27tnd_H-MGvYqUv44,152709
10
10
  cidc_api/models/schemas.py,sha256=6IE2dJoEMcMbi0Vr1V3cYKnPKU0hv9vRKBixOZHe88s,2766
11
11
  cidc_api/models/types.py,sha256=nq_PvzDz67HzceOI_ve9AxReExAZ-XFE7T3G2bbpqJ4,29667
12
12
  cidc_api/models/db/base_orm.py,sha256=EV78qFBcOR7spK_PhjbkpsGcmcP33AQItX61SidDa_8,813
@@ -94,16 +94,17 @@ cidc_api/reference/icd10cm.py,sha256=K1vbTQB75uAQeKgj0U9izhtMKVb2vqp69_hyx3z_jro
94
94
  cidc_api/reference/icdo3.py,sha256=A19yNX5-9Gs3X83kXcTlGgsXDTTJ9yR2dxEIoBVmpmU,296
95
95
  cidc_api/reference/uberon.py,sha256=BO2mYNDvPzZyLdhL_ZjCyEgHSLHuVifnTJvfktUfVWA,148
96
96
  cidc_api/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
+ cidc_api/shared/assay_handling.py,sha256=kjxPGt4Dw-EkHGHfce32EXAPvWoPfPNFQCTso4Z0ez4,2892
97
98
  cidc_api/shared/auth.py,sha256=PHqmVGkqDjbmUofytVFwD_9ssgUomESl3fFtFHPwZYQ,9062
98
99
  cidc_api/shared/email_layout.html,sha256=pBoTNw3ACHH-ncZFaNvcy5bXMqPwizR78usb0uCYtIc,7670
99
100
  cidc_api/shared/emails.py,sha256=8kNFEaSnKpY-GX_iE59QUhSp3c4_uzy3SpHYt2QjuqI,6121
100
101
  cidc_api/shared/file_handling.py,sha256=UP3KZ61Km5RNmNfk126_upKv-jGkDzpc5Pvk1s7cU2Q,5585
101
- cidc_api/shared/gcloud_client.py,sha256=ovXGS2ynaBgB_23prj23H10GNN4fectiVF7Hj4LJXQk,37302
102
+ cidc_api/shared/gcloud_client.py,sha256=SI8fcKAavLlIsU_5XMKe4ebjYkl-qantX07zihsgkaU,38140
102
103
  cidc_api/shared/jose.py,sha256=-qzGzEDAlokEp9E7WtBtQkXyyfPWTYXlwYpCqVJWmqM,1830
103
104
  cidc_api/shared/rest_utils.py,sha256=RwR30WOUAYCxL7V-i2totEyeriG30GbBDvBcpLXhM9w,6594
104
105
  cidc_api/shared/utils.py,sha256=FdZJiynmh6BIzWyCTcUAcUMKCAtzEyp_HLdXEticNcI,237
105
- nci_cidc_api_modules-1.2.33.dist-info/licenses/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
106
- nci_cidc_api_modules-1.2.33.dist-info/METADATA,sha256=jEyZSAAhFgbCPLP85liz3W48ZZDIj4Ni3AIEeBBMBog,39762
107
- nci_cidc_api_modules-1.2.33.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
108
- nci_cidc_api_modules-1.2.33.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
109
- nci_cidc_api_modules-1.2.33.dist-info/RECORD,,
106
+ nci_cidc_api_modules-1.2.35.dist-info/licenses/LICENSE,sha256=pNYWVTHaYonnmJyplmeAp7tQAjosmDpAWjb34jjv7Xs,1102
107
+ nci_cidc_api_modules-1.2.35.dist-info/METADATA,sha256=r7t33YqkP9xxzOGvJ_9auttusdTNOU3gdsre1qpl7t4,39792
108
+ nci_cidc_api_modules-1.2.35.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
109
+ nci_cidc_api_modules-1.2.35.dist-info/top_level.txt,sha256=rNiRzL0lJGi5Q9tY9uSoMdTbJ-7u5c_D2E86KA94yRA,9
110
+ nci_cidc_api_modules-1.2.35.dist-info/RECORD,,