nci-cidc-api-modules 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cidc_api/config/db.py CHANGED
@@ -52,7 +52,7 @@ def get_sqlalchemy_database_uri(testing: bool = False) -> str:
52
52
  "host": f'{socket_dir}{environ.get("CLOUD_SQL_INSTANCE_NAME")}'
53
53
  }
54
54
  else:
55
- raise Exception(
55
+ raise RuntimeError(
56
56
  "Either POSTGRES_URI or CLOUD_SQL_INSTANCE_NAME must be defined to connect "
57
57
  + "to a database."
58
58
  )
@@ -11,8 +11,8 @@ def get_secrets_manager(is_testing=False):
11
11
 
12
12
  # If we're testing, we shouldn't need access to secrets in Secret Manager
13
13
  return MagicMock()
14
- else:
15
- return GoogleSecretManager()
14
+
15
+ return GoogleSecretManager()
16
16
 
17
17
 
18
18
  class GoogleSecretManager:
@@ -21,10 +21,9 @@ DEV_USE_GCS = environ.get("DEV_USE_GCS") == "True"
21
21
  assert ENV in (
22
22
  "dev",
23
23
  "dev-int",
24
- "featuredev",
25
24
  "staging",
26
25
  "prod",
27
- ), "ENV environment variable must be set to 'dev', 'dev-int', 'featuredev', 'staging', or 'prod'"
26
+ ), "ENV environment variable must be set to 'dev', 'dev-int', 'staging', or 'prod'"
28
27
  DEBUG = environ.get("DEBUG") == "True"
29
28
  assert ENV == "dev" if DEBUG else True, "DEBUG mode is only allowed when ENV='dev'"
30
29
  TESTING = environ.get("TESTING") == "True"
cidc_api/csms/auth.py CHANGED
@@ -1,9 +1,9 @@
1
1
  __all__ = ["get_token", "get_with_authorization", "get_with_paging"]
2
- import os
3
2
 
4
- os.environ["TZ"] = "UTC"
3
+ import os
5
4
  from datetime import datetime, timedelta
6
5
  from typing import Any, Dict, Iterator
6
+
7
7
  import requests
8
8
 
9
9
  from ..config.settings import (
@@ -13,7 +13,9 @@ from ..config.settings import (
13
13
  CSMS_TOKEN_URL,
14
14
  )
15
15
 
16
+ os.environ["TZ"] = "UTC"
16
17
 
18
+ TIMEOUT_IN_SECONDS = 20
17
19
  _TOKEN, _TOKEN_EXPIRY = None, datetime.now()
18
20
 
19
21
 
@@ -29,13 +31,14 @@ def get_token():
29
31
  "client_id": CSMS_CLIENT_ID,
30
32
  "client_secret": CSMS_CLIENT_SECRET,
31
33
  },
34
+ timeout=TIMEOUT_IN_SECONDS,
32
35
  ).json(),
33
36
  datetime.now(),
34
37
  )
35
38
 
36
39
  # res definition from https://developer.okta.com/docs/reference/api/oidc/#response-example-error-7
37
40
  if "errorCode" in res:
38
- raise Exception(res["errorCode"] + ": " + res.get("errorSummary"))
41
+ raise RuntimeError(res["errorCode"] + ": " + res.get("errorSummary"))
39
42
 
40
43
  _TOKEN = res["access_token"]
41
44
  _TOKEN_EXPIRY = time + timedelta(seconds=res["expires_in"])
@@ -54,7 +57,11 @@ def get_with_authorization(url: str, **kwargs) -> requests.Response:
54
57
  kwargs["headers"] = headers
55
58
  if not url.startswith(CSMS_BASE_URL):
56
59
  url = CSMS_BASE_URL + url
57
- return requests.get(url, **kwargs)
60
+ return requests.get(
61
+ url,
62
+ **kwargs,
63
+ timeout=TIMEOUT_IN_SECONDS,
64
+ )
58
65
 
59
66
 
60
67
  def get_with_paging(
@@ -88,7 +95,7 @@ def get_with_paging(
88
95
  else:
89
96
  limit = 1
90
97
 
91
- kwargs.update(dict(limit=limit, offset=offset))
98
+ kwargs.update({"limit": limit, "offset": offset})
92
99
 
93
100
  res = get_with_authorization(url, params=kwargs)
94
101
  while res.status_code < 300 and len(res.json().get("data", [])) > 0:
@@ -96,5 +103,5 @@ def get_with_paging(
96
103
  yield from res.json()["data"]
97
104
  kwargs["offset"] += 1 # get the next page
98
105
  res = get_with_authorization(url, params=kwargs)
99
- else:
100
- res.raise_for_status()
106
+
107
+ res.raise_for_status()
@@ -7,11 +7,8 @@ __all__ = [
7
7
 
8
8
  import os
9
9
  import re
10
-
11
- os.environ["TZ"] = "UTC"
12
10
  from collections import defaultdict
13
11
  from datetime import date, datetime, time
14
- from sqlalchemy.orm.session import Session
15
12
  from typing import (
16
13
  Any,
17
14
  Callable,
@@ -25,6 +22,9 @@ from typing import (
25
22
  Union,
26
23
  )
27
24
 
25
+ from sqlalchemy.orm.session import Session
26
+
27
+ from cidc_schemas.prism.merger import merge_clinical_trial_metadata
28
28
  from cidc_schemas.prism.core import (
29
29
  _check_encrypt_init,
30
30
  _encrypt,
@@ -37,13 +37,16 @@ from .models import with_default_session
37
37
  from ..config.logging import get_logger
38
38
  from ..config.settings import PRISM_ENCRYPT_KEY
39
39
 
40
+
41
+ os.environ["TZ"] = "UTC"
40
42
  logger = get_logger(__name__)
41
43
 
42
- cimac_id_to_cimac_participant_id = lambda cimac_id, _: cimac_id[:7]
43
- cimac_id_regex_pattern = "^C[A-Z0-9]{3}[A-Z0-9]{3}[A-Z0-9]{2}.[0-9]{2}$"
44
- cimac_id_regex = re.compile(cimac_id_regex_pattern)
44
+
45
+ def cimac_id_to_cimac_participant_id(cimac_id, _):
46
+ return cimac_id[:7]
45
47
 
46
48
 
49
+ CIMAC_ID_REGEX = re.compile("^C[A-Z0-9]{3}[A-Z0-9]{3}[A-Z0-9]{2}.[0-9]{2}$")
47
50
  SAMPLE_SCHEMA: dict = load_and_validate_schema("sample.json")
48
51
  PARTICIPANT_SCHEMA: dict = load_and_validate_schema("participant.json")
49
52
  SHIPMENT_SCHEMA: dict = load_and_validate_schema("shipping_core.json")
@@ -54,13 +57,14 @@ TARGET_PROPERTIES_MAP: Dict[str, dict] = {
54
57
  }
55
58
 
56
59
  # make sure that the encryption key is set
60
+ # NOTE: Exception is raised in external core module
57
61
  try:
58
62
  _check_encrypt_init()
59
- except:
63
+ except Exception:
60
64
  set_prism_encrypt_key(PRISM_ENCRYPT_KEY)
61
65
 
62
66
 
63
- def _get_all_values(target: str, old: dict, drop: List[str] = []) -> Dict[str, Any]:
67
+ def _get_all_values(target: str, old: dict, drop: List[str] = None) -> Dict[str, Any]:
64
68
  """
65
69
  Parameters
66
70
  ----------
@@ -74,6 +78,9 @@ def _get_all_values(target: str, old: dict, drop: List[str] = []) -> Dict[str, A
74
78
  all of the values from `old` that are in `target` excepting anything keys in `drop`
75
79
  """
76
80
 
81
+ if drop is None:
82
+ drop = []
83
+
77
84
  ret = {
78
85
  p: old[p]
79
86
  for p in TARGET_PROPERTIES_MAP[target].keys()
@@ -83,7 +90,7 @@ def _get_all_values(target: str, old: dict, drop: List[str] = []) -> Dict[str, A
83
90
  return ret
84
91
 
85
92
 
86
- class NewManifestError(Exception):
93
+ class NewManifestError(RuntimeError):
87
94
  pass
88
95
 
89
96
 
@@ -94,7 +101,7 @@ def _parse_upload_type(sample: dict, upload_type: Set[str]) -> str:
94
101
  # safety
95
102
  return
96
103
 
97
- elif sample_manifest_type == "biofluid_cellular":
104
+ if sample_manifest_type == "biofluid_cellular":
98
105
  upload_type.add("pbmc")
99
106
  elif sample_manifest_type == "tissue_slides":
100
107
  upload_type.add("tissue_slide")
@@ -162,9 +169,9 @@ def _get_and_check(
162
169
  ret = obj.get(key, default)
163
170
 
164
171
  if not check(ret):
165
- raise Exception(msg)
166
- else:
167
- return ret
172
+ raise RuntimeError(msg)
173
+
174
+ return ret
168
175
 
169
176
 
170
177
  def _extract_info_from_manifest(
@@ -181,7 +188,7 @@ def _extract_info_from_manifest(
181
188
  str : manifest_id
182
189
  List[Dict[str, Any]] : samples
183
190
 
184
- Exceptions Raised
191
+ RuntimeErrors Raised
185
192
  -----------------
186
193
  - "Cannot add a manifest that is not qc_complete"
187
194
  if manifest's status is not qc_complete (or null)
@@ -223,7 +230,7 @@ def _extract_details_from_trial(csms_samples: List[Dict[str, Any]]):
223
230
  str : assay_priority
224
231
  str : assay_type
225
232
 
226
- Exceptions Raised
233
+ RuntimeErrors Raised
227
234
  -----------------
228
235
  - f"No assay_priority defined for manifest_id={manifest_id} for trial {trial_id}"
229
236
  - f"No assay_type defined for manifest_id={manifest_id} for trial {trial_id}"
@@ -246,11 +253,11 @@ def _extract_details_from_trial(csms_samples: List[Dict[str, Any]]):
246
253
  def _process_csms_sample(csms_sample: dict):
247
254
  event_name = csms_sample.get("standardized_collection_event_name")
248
255
  if event_name is None:
249
- raise Exception(
256
+ raise RuntimeError(
250
257
  f"No standardized_collection_event_name defined for sample {csms_sample.get('cimac_id', '')} on manifest {csms_sample['manifest_id']} for trial {csms_sample['protocol_identifier']}"
251
258
  )
252
- else:
253
- csms_sample["collection_event_name"] = event_name
259
+
260
+ csms_sample["collection_event_name"] = event_name
254
261
 
255
262
  # encrypt participant ids if not already encrypted
256
263
  if (
@@ -299,18 +306,18 @@ def _convert_csms_samples(
299
306
  trial_id: str,
300
307
  manifest_id: str,
301
308
  csms_samples: List[Dict[str, Any]],
302
- existing_cimac_ids: List[str] = [],
309
+ existing_cimac_ids: List[str] = None,
303
310
  ) -> Iterator[Tuple[str, Dict[str, Any]]]:
304
311
  """
305
312
  Convert a list of CSMS-style samples into an iterator returning CIMAC IDs and CIDC-style samples
306
- Exceptions are raised during the call for each sample; full validation is NOT done first.
313
+ RuntimeErrors are raised during the call for each sample; full validation is NOT done first.
307
314
 
308
315
  Returns
309
316
  -------
310
317
  iterator yielding (str, dict)
311
318
  cimac_id, converted CSMS sample
312
319
 
313
- Exceptions Raised
320
+ RuntimeErrors Raised
314
321
  -----------------
315
322
  - f"No standardized_collection_event_name defined for sample {sample['cimac_id']} on manifest {sample['manifest_id']} for trial {sample['protocol_identifier']}"
316
323
  - f"No cimac_id defined for samples[{n}] on manifest_id={manifest_id} for trial {trial_id}"
@@ -319,6 +326,10 @@ def _convert_csms_samples(
319
326
  - f"Sample with no local participant_id given:\n{sample}"
320
327
  if participant_id and trial_participant_id are both undefined
321
328
  """
329
+
330
+ if existing_cimac_ids is None:
331
+ existing_cimac_ids = []
332
+
322
333
  for n, sample in enumerate(csms_samples):
323
334
  # process the sample
324
335
  _process_csms_sample(csms_sample=sample)
@@ -329,12 +340,13 @@ def _convert_csms_samples(
329
340
  key="cimac_id",
330
341
  msg=f"No cimac_id defined for samples[{n}] on manifest_id={manifest_id} for trial {trial_id}",
331
342
  )
332
- if not cimac_id_regex.match(cimac_id):
333
- raise Exception(
343
+ if not CIMAC_ID_REGEX.match(cimac_id):
344
+ raise RuntimeError(
334
345
  f"Malformatted cimac_id={cimac_id} on manifest_id={manifest_id} for trial {trial_id}"
335
346
  )
336
- elif cimac_id in existing_cimac_ids:
337
- raise Exception(
347
+
348
+ if cimac_id in existing_cimac_ids:
349
+ raise RuntimeError(
338
350
  f"Sample with cimac_id={cimac_id} already exists for trial {trial_id}\nNew samples: {sample}"
339
351
  )
340
352
 
@@ -354,7 +366,7 @@ def insert_manifest_into_blob(
354
366
  Given a CSMS-style manifest, add it into the JSON metadata blob
355
367
  If `dry_run`, calls `session.rollback` instead of `session.commit`
356
368
 
357
- Exceptions Raised
369
+ RuntimeErrors Raised
358
370
  -----------------
359
371
  - "Cannot add a manifest that is not qc_complete"
360
372
  if manifest's status is not qc_complete (or null)
@@ -376,13 +388,11 @@ def insert_manifest_into_blob(
376
388
 
377
389
  - "prism errors: [{errors from merge_clinical_trial_metadata}]"
378
390
  """
379
- # schemas import here to keep JSON-blob code together
380
- from cidc_schemas.prism.merger import merge_clinical_trial_metadata
381
391
 
382
392
  trial_id, manifest_id, csms_samples = _extract_info_from_manifest(manifest)
383
393
  trial_md = TrialMetadata.select_for_update_by_trial_id(trial_id, session=session)
384
394
  if manifest_id in [s["manifest_id"] for s in trial_md.metadata_json["shipments"]]:
385
- raise Exception(
395
+ raise RuntimeError(
386
396
  f"Manifest with manifest_id={manifest_id} already exists for trial {trial_id}"
387
397
  )
388
398
 
@@ -418,9 +428,9 @@ def insert_manifest_into_blob(
418
428
 
419
429
  # each participant has a list of samples
420
430
  for cimac_participant_id, partic_samples in sample_map.items():
421
- partic = dict(
422
- cimac_participant_id=cimac_participant_id,
423
- participant_id=partic_samples[0]["participant_id"],
431
+ partic = {
432
+ "cimac_participant_id": cimac_participant_id,
433
+ "participant_id": partic_samples[0]["participant_id"],
424
434
  **_get_all_values(
425
435
  target="participant",
426
436
  old=partic_samples[0],
@@ -432,7 +442,7 @@ def insert_manifest_into_blob(
432
442
  "trial_participant_id",
433
443
  ],
434
444
  ),
435
- )
445
+ }
436
446
  partic["samples"] = [
437
447
  _get_all_values(
438
448
  target="sample",
@@ -444,12 +454,12 @@ def insert_manifest_into_blob(
444
454
 
445
455
  patch["participants"].append(partic)
446
456
 
447
- logger.info(f"Patch for {trial_id} manifest {manifest_id}:\n{patch}")
457
+ logger.info("Patch for %s manifest %s:\n%s", trial_id, manifest_id, patch)
448
458
  # merge and validate the data
449
459
  # the existence of the correct cohort and collection_event names are checked here
450
460
  merged, errs = merge_clinical_trial_metadata(patch, trial_md.metadata_json)
451
- if len(errs):
452
- raise Exception({"prism errors": [str(e) for e in errs]})
461
+ if errs:
462
+ raise RuntimeError({"prism errors": [str(e) for e in errs]})
453
463
 
454
464
  # save it
455
465
  trial_md.update(changes={"metadata_json": merged}, commit=False, session=session)
@@ -478,15 +488,17 @@ class Change:
478
488
  trial_id: str,
479
489
  manifest_id: str,
480
490
  cimac_id: str = None,
481
- changes: Dict[str, Tuple[Any, Any]] = [],
491
+ changes: Dict[str, Tuple[Any, Any]] = None,
482
492
  ):
493
+ if changes is None:
494
+ changes = []
495
+
483
496
  if entity_type not in ["sample", "shipment", "upload"]:
484
497
  raise ValueError(
485
498
  f"entity_type must be in: sample, shipment, upload\nnot: {entity_type}"
486
499
  )
487
- else:
488
- self.entity_type = entity_type
489
500
 
501
+ self.entity_type = entity_type
490
502
  self.trial_id = trial_id
491
503
  self.manifest_id = manifest_id
492
504
  self.cimac_id = cimac_id
@@ -514,28 +526,7 @@ def _calc_difference(
514
526
  manifest_id: str,
515
527
  cidc: Dict[str, Any],
516
528
  csms: Dict[str, Any],
517
- ignore=[
518
- "barcode",
519
- "biobank_id",
520
- "cimac_participant_id",
521
- "entry_number",
522
- "event",
523
- "excluded",
524
- "json_data",
525
- "modified_time",
526
- "modified_timestamp",
527
- "protocol_identifier",
528
- "qc_comments",
529
- "reason",
530
- "sample_approved",
531
- "sample_manifest_type",
532
- "samples",
533
- "status",
534
- "status_log",
535
- "study_encoding",
536
- "submitter",
537
- "trial_id",
538
- ],
529
+ ignore=None,
539
530
  ) -> Dict[str, Tuple[Any, Any]]:
540
531
  """
541
532
  The actual comparison function that handles comparing values
@@ -544,11 +535,38 @@ def _calc_difference(
544
535
  Do not perform a comparison for ignored keys
545
536
  Add constant critical fields back to anything that changes
546
537
  """
538
+
539
+ if ignore is None:
540
+ ignore = [
541
+ "barcode",
542
+ "biobank_id",
543
+ "cimac_participant_id",
544
+ "entry_number",
545
+ "event",
546
+ "excluded",
547
+ "json_data",
548
+ "modified_time",
549
+ "modified_timestamp",
550
+ "protocol_identifier",
551
+ "qc_comments",
552
+ "reason",
553
+ "sample_approved",
554
+ "sample_manifest_type",
555
+ "samples",
556
+ "status",
557
+ "status_log",
558
+ "study_encoding",
559
+ "submitter",
560
+ "trial_id",
561
+ ]
562
+
547
563
  # handle formatting and ignore
548
564
  cidc1: Dict[str, Any] = {
549
- k: datetime.strftime(v, "%Y-%m-%d %H:%M:%S")
550
- if isinstance(v, (date, time, datetime))
551
- else v
565
+ k: (
566
+ datetime.strftime(v, "%Y-%m-%d %H:%M:%S")
567
+ if isinstance(v, (date, time, datetime))
568
+ else v
569
+ )
552
570
  for k, v in cidc.items()
553
571
  if k not in ignore
554
572
  }
@@ -607,15 +625,15 @@ def _get_csms_sample_map(
607
625
  ) -> Dict[str, Dict[str, Any]]:
608
626
  """Returns a map of CIMAC IDs to the relevant sample details from CSMS"""
609
627
  return {
610
- csms_cimac_id: dict(
628
+ csms_cimac_id: {
611
629
  # participant-level critical field
612
- cohort_name=csms_sample["cohort_name"],
630
+ "cohort_name": csms_sample["cohort_name"],
613
631
  # name changes
614
- trial_id=csms_sample["protocol_identifier"],
615
- participant_id=csms_sample["participant_id"],
632
+ "trial_id": csms_sample["protocol_identifier"],
633
+ "participant_id": csms_sample["participant_id"],
616
634
  # not in CSMS
617
- cimac_participant_id=cimac_id_to_cimac_participant_id(csms_cimac_id, {}),
618
- sample_manifest_type=csms_sample.get("sample_manifest_type"),
635
+ "cimac_participant_id": cimac_id_to_cimac_participant_id(csms_cimac_id, {}),
636
+ "sample_manifest_type": csms_sample.get("sample_manifest_type"),
619
637
  # the rest of the values
620
638
  **_get_all_values(
621
639
  target="sample",
@@ -628,7 +646,7 @@ def _get_csms_sample_map(
628
646
  "trial_id",
629
647
  ],
630
648
  ),
631
- )
649
+ }
632
650
  for csms_cimac_id, csms_sample in _convert_csms_samples(
633
651
  trial_id, manifest_id, csms_samples
634
652
  )
@@ -651,7 +669,7 @@ def _cross_validate_samples(
651
669
  manifest_id,
652
670
  cidc_sample["cimac_id"],
653
671
  )
654
- raise Exception(
672
+ raise RuntimeError(
655
673
  f"Missing sample: {formatted} on CSMS {(trial_id, manifest_id)}"
656
674
  )
657
675
  # make sure that all of the CSMS samples are in CIDC
@@ -682,9 +700,9 @@ def _cross_validate_samples(
682
700
  cidc_sample["cimac_id"],
683
701
  )
684
702
  if cidc_sample is not None
685
- else f"<no sample found>"
703
+ else "<no sample found>"
686
704
  )
687
- raise Exception(
705
+ raise RuntimeError(
688
706
  f"Change in critical field for: {formatted} to CSMS {(trial_id, manifest_id, cimac_id)}"
689
707
  )
690
708
 
@@ -705,7 +723,7 @@ def _initial_manifest_validation(
705
723
  UploadJobs : cidc_uploadjob
706
724
 
707
725
 
708
- Exceptions Raised
726
+ RuntimeErrors Raised
709
727
  -----------------
710
728
  - "Cannot add a manifest that is not qc_complete"
711
729
  if manifest's status is not qc_complete (or null)
@@ -753,7 +771,7 @@ def _initial_manifest_validation(
753
771
  cidc_sample_map = _get_cidc_sample_map(cidc_shipment.metadata_patch)
754
772
  csms_sample_map = _get_csms_sample_map(trial_id, manifest_id, csms_samples)
755
773
 
756
- # raises Exceptions if something is amiss
774
+ # raises RuntimeErrors if something is amiss
757
775
  _cross_validate_samples(
758
776
  trial_id=trial_id,
759
777
  manifest_id=manifest_id,
@@ -790,8 +808,8 @@ def _handle_shipment_differences(
790
808
  )
791
809
  if change:
792
810
  return change
793
- else:
794
- return None
811
+
812
+ return None
795
813
 
796
814
 
797
815
  def _handle_sample_differences(
@@ -854,8 +872,8 @@ def _handle_upload_differences(
854
872
  )
855
873
  if change:
856
874
  return change
857
- else:
858
- return None
875
+
876
+ return None
859
877
 
860
878
 
861
879
  @with_default_session
@@ -877,7 +895,7 @@ def detect_manifest_changes(
877
895
  ------
878
896
  NewManifestError
879
897
  if the manifest_id doesn't correspond to anything in CIDC
880
- Exception
898
+ RuntimeError
881
899
  if the connections between any critical fields is changed
882
900
  namely trial_id, manifest_id, cimac_id
883
901
  """
@@ -886,12 +904,12 @@ def detect_manifest_changes(
886
904
  obj=csms_manifest,
887
905
  key="excluded",
888
906
  default=False,
889
- msg=f"not called",
907
+ msg="not called",
890
908
  check=lambda _: True,
891
909
  ):
892
910
  return []
893
911
 
894
- # ----- Initial validation, raises Exception if issues -----
912
+ # ----- Initial validation, raises RuntimeError if issues -----
895
913
  ret = []
896
914
  (
897
915
  trial_id,