nci-cidc-api-modules 1.1.12__py3-none-any.whl → 1.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cidc_api/config/db.py CHANGED
@@ -48,13 +48,10 @@ def get_sqlalchemy_database_uri(testing: bool = False) -> str:
48
48
 
49
49
  # If CLOUD_SQL_INSTANCE_NAME is defined, we're connecting
50
50
  # via a unix socket from inside App Engine.
51
- config["query"] = {
52
- "host": f'{socket_dir}{environ.get("CLOUD_SQL_INSTANCE_NAME")}'
53
- }
51
+ config["query"] = {"host": f'{socket_dir}{environ.get("CLOUD_SQL_INSTANCE_NAME")}'}
54
52
  else:
55
53
  raise RuntimeError(
56
- "Either POSTGRES_URI or CLOUD_SQL_INSTANCE_NAME must be defined to connect "
57
- + "to a database."
54
+ "Either POSTGRES_URI or CLOUD_SQL_INSTANCE_NAME must be defined to connect " + "to a database."
58
55
  )
59
56
 
60
57
  db_uri = str(URL(**config))
@@ -19,11 +19,7 @@ def get_logger(name: Optional[str]) -> logging.Logger:
19
19
  logger.setLevel(gunicorn_logger.level)
20
20
  else:
21
21
  handler = logging.StreamHandler(sys.stdout)
22
- handler.setFormatter(
23
- logging.Formatter(
24
- "[%(asctime)s] [%(threadName)s] [%(levelname)s]: %(message)s"
25
- )
26
- )
22
+ handler.setFormatter(logging.Formatter("[%(asctime)s] [%(threadName)s] [%(levelname)s]: %(message)s"))
27
23
  logger.addHandler(handler)
28
24
  logger.setLevel(logging.DEBUG if ENV == "dev" else logging.INFO)
29
25
  return logger
@@ -40,9 +40,7 @@ class GoogleSecretManager:
40
40
  Raises a google.api_core.exceptions.NotFound exception
41
41
  if the secret_id/version_id doesn't exist.
42
42
  """
43
- name = (
44
- f"projects/{self.project_id}/secrets/{secret_id}/versions/{self.version_id}"
45
- )
43
+ name = f"projects/{self.project_id}/secrets/{secret_id}/versions/{self.version_id}"
46
44
  response = self.client.access_secret_version(name=name)
47
45
  secret = response.payload.data.decode("UTF-8")
48
46
 
@@ -55,6 +55,7 @@ else:
55
55
  ### Configure Flask-SQLAlchemy ###
56
56
  SQLALCHEMY_DATABASE_URI = get_sqlalchemy_database_uri(TESTING)
57
57
  SQLALCHEMY_TRACK_MODIFICATIONS = False
58
+ SQLALCHEMY_ECHO = False # Set to True to emit all compiled sql statements
58
59
 
59
60
  ### Configure Dev CFn ###
60
61
  DEV_CFUNCTIONS_SERVER = environ.get("DEV_CFUNCTIONS_SERVER")
@@ -78,9 +79,7 @@ GOOGLE_BIGQUERY_USER_ROLE = "roles/bigquery.jobUser" # same across environments
78
79
  GOOGLE_PATIENT_SAMPLE_TOPIC = environ["GOOGLE_PATIENT_SAMPLE_TOPIC"]
79
80
  GOOGLE_EMAILS_TOPIC = environ["GOOGLE_EMAILS_TOPIC"]
80
81
  GOOGLE_ARTIFACT_UPLOAD_TOPIC = environ["GOOGLE_ARTIFACT_UPLOAD_TOPIC"]
81
- GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC = environ[
82
- "GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC"
83
- ]
82
+ GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC = environ["GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC"]
84
83
  GOOGLE_AND_OPERATOR = " && "
85
84
  GOOGLE_OR_OPERATOR = " || "
86
85
 
cidc_api/csms/auth.py CHANGED
@@ -64,9 +64,7 @@ def get_with_authorization(url: str, **kwargs) -> requests.Response:
64
64
  )
65
65
 
66
66
 
67
- def get_with_paging(
68
- url: str, limit: int = None, offset: int = 0, **kwargs
69
- ) -> Iterator[Dict[str, Any]]:
67
+ def get_with_paging(url: str, limit: int = None, offset: int = 0, **kwargs) -> Iterator[Dict[str, Any]]:
70
68
  """
71
69
  Return an iterator of entries via get_with_authorization with handling for CSMS paging
72
70
 
@@ -81,11 +81,7 @@ def _get_all_values(target: str, old: dict, drop: List[str] = None) -> Dict[str,
81
81
  if drop is None:
82
82
  drop = []
83
83
 
84
- ret = {
85
- p: old[p]
86
- for p in TARGET_PROPERTIES_MAP[target].keys()
87
- if p in old and p not in drop
88
- }
84
+ ret = {p: old[p] for p in TARGET_PROPERTIES_MAP[target].keys() if p in old and p not in drop}
89
85
 
90
86
  return ret
91
87
 
@@ -113,9 +109,7 @@ def _parse_upload_type(sample: dict, upload_type: Set[str]) -> str:
113
109
  elif processed_derivative in ["DNA", "RNA"]:
114
110
  unprocessed_type = sample.get("type_of_sample")
115
111
  new_type = "tumor" if "tumor" in unprocessed_type.lower() else "normal"
116
- new_type += (
117
- "_blood_" if sample_manifest_type.startswith("biofluid") else "_tissue_"
118
- )
112
+ new_type += "_blood_" if sample_manifest_type.startswith("biofluid") else "_tissue_"
119
113
  new_type += processed_derivative.lower()
120
114
 
121
115
  upload_type.add(new_type)
@@ -144,9 +138,7 @@ def _get_upload_type(samples: Iterable[Dict[str, Any]]) -> str:
144
138
  # updates upload_type in-place with the given sample
145
139
  _parse_upload_type(sample=sample, upload_type=upload_type)
146
140
 
147
- assert (
148
- len(upload_type) == 1
149
- ), f"Inconsistent value determined for upload_type:{upload_type}"
141
+ assert len(upload_type) == 1, f"Inconsistent value determined for upload_type:{upload_type}"
150
142
  return list(upload_type)[0]
151
143
 
152
144
 
@@ -174,9 +166,7 @@ def _get_and_check(
174
166
  return ret
175
167
 
176
168
 
177
- def _extract_info_from_manifest(
178
- manifest: Dict[str, Any]
179
- ) -> Tuple[str, str, List[Dict[str, Any]]]:
169
+ def _extract_info_from_manifest(manifest: Dict[str, Any]) -> Tuple[str, str, List[Dict[str, Any]]]:
180
170
  """
181
171
  Given a manifest, do initial validation and return some key values
182
172
 
@@ -195,9 +185,7 @@ def _extract_info_from_manifest(
195
185
  - f"Manifest {manifest_id} contains no samples: {manifest}"
196
186
  - f"No consistent protocol_identifier defined for samples on manifest {manifest_id}"
197
187
  """
198
- manifest_id = _get_and_check(
199
- obj=manifest, key="manifest_id", msg=f"No manifest_id in: {manifest}"
200
- )
188
+ manifest_id = _get_and_check(obj=manifest, key="manifest_id", msg=f"No manifest_id in: {manifest}")
201
189
  _ = _get_and_check( # don't need to keep status
202
190
  obj=manifest,
203
191
  key="status",
@@ -260,10 +248,7 @@ def _process_csms_sample(csms_sample: dict):
260
248
  csms_sample["collection_event_name"] = event_name
261
249
 
262
250
  # encrypt participant ids if not already encrypted
263
- if (
264
- "participant_id" in csms_sample
265
- and len(csms_sample["participant_id"]) != _ENCRYPTED_FIELD_LEN
266
- ):
251
+ if "participant_id" in csms_sample and len(csms_sample["participant_id"]) != _ENCRYPTED_FIELD_LEN:
267
252
  csms_sample["participant_id"] = _encrypt(csms_sample["participant_id"])
268
253
 
269
254
  # differences in naming convention
@@ -276,21 +261,15 @@ def _process_csms_sample(csms_sample: dict):
276
261
  "pbmc": "PBMC",
277
262
  }
278
263
  if csms_sample["processed_sample_type"] in processed_sample_type_map:
279
- csms_sample["processed_sample_type"] = processed_sample_type_map[
280
- csms_sample["processed_sample_type"]
281
- ]
264
+ csms_sample["processed_sample_type"] = processed_sample_type_map[csms_sample["processed_sample_type"]]
282
265
 
283
266
  # differences in keys
284
267
  if "fixation_or_stabilization_type" in csms_sample:
285
- csms_sample["fixation_stabilization_type"] = csms_sample.pop(
286
- "fixation_or_stabilization_type"
287
- )
268
+ csms_sample["fixation_stabilization_type"] = csms_sample.pop("fixation_or_stabilization_type")
288
269
 
289
270
  # typing
290
271
  if "sample_derivative_concentration" in csms_sample:
291
- csms_sample["sample_derivative_concentration"] = float(
292
- csms_sample["sample_derivative_concentration"]
293
- )
272
+ csms_sample["sample_derivative_concentration"] = float(csms_sample["sample_derivative_concentration"])
294
273
 
295
274
  if "parent_sample_id" not in csms_sample:
296
275
  csms_sample["parent_sample_id"] = "Not Reported"
@@ -335,9 +314,7 @@ def _convert_csms_samples(
335
314
  msg=f"No cimac_id defined for samples[{n}] on manifest_id={manifest_id} for trial {trial_id}",
336
315
  )
337
316
  if not CIMAC_ID_REGEX.match(cimac_id):
338
- raise RuntimeError(
339
- f"Malformatted cimac_id={cimac_id} on manifest_id={manifest_id} for trial {trial_id}"
340
- )
317
+ raise RuntimeError(f"Malformatted cimac_id={cimac_id} on manifest_id={manifest_id} for trial {trial_id}")
341
318
 
342
319
  if cimac_id in existing_cimac_ids:
343
320
  raise RuntimeError(
@@ -386,16 +363,10 @@ def insert_manifest_into_blob(
386
363
  trial_id, manifest_id, csms_samples = _extract_info_from_manifest(manifest)
387
364
  trial_md = TrialMetadata.select_for_update_by_trial_id(trial_id, session=session)
388
365
  if manifest_id in [s["manifest_id"] for s in trial_md.metadata_json["shipments"]]:
389
- raise RuntimeError(
390
- f"Manifest with manifest_id={manifest_id} already exists for trial {trial_id}"
391
- )
366
+ raise RuntimeError(f"Manifest with manifest_id={manifest_id} already exists for trial {trial_id}")
392
367
 
393
368
  # pull out some additional values we'll need
394
- existing_cimac_ids = [
395
- s["cimac_id"]
396
- for p in trial_md.metadata_json["participants"]
397
- for s in p["samples"]
398
- ]
369
+ existing_cimac_ids = [s["cimac_id"] for p in trial_md.metadata_json["participants"] for s in p["samples"]]
399
370
  assay_priority, assay_type = _extract_details_from_trial(csms_samples)
400
371
  if assay_priority:
401
372
  manifest["assay_priority"] = assay_priority
@@ -405,19 +376,13 @@ def insert_manifest_into_blob(
405
376
  # a patch is just the parts that are new, equivalent to the return of schemas.prismify
406
377
  patch = {
407
378
  "protocol_identifier": trial_id,
408
- "shipments": [
409
- _get_all_values(
410
- target="shipment", old=manifest, drop=["excluded", "json_data"]
411
- )
412
- ],
379
+ "shipments": [_get_all_values(target="shipment", old=manifest, drop=["excluded", "json_data"])],
413
380
  "participants": [],
414
381
  }
415
382
 
416
383
  # sort samples by participants
417
384
  sample_map: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
418
- for cimac_id, sample in _convert_csms_samples(
419
- trial_id, manifest_id, csms_samples, existing_cimac_ids
420
- ):
385
+ for cimac_id, sample in _convert_csms_samples(trial_id, manifest_id, csms_samples, existing_cimac_ids):
421
386
  sample_map[cimac_id_to_cimac_participant_id(cimac_id, {})].append(sample)
422
387
 
423
388
  # each participant has a list of samples
@@ -487,9 +452,7 @@ class Change:
487
452
  changes = []
488
453
 
489
454
  if entity_type not in ["sample", "shipment", "upload"]:
490
- raise ValueError(
491
- f"entity_type must be in: sample, shipment, upload\nnot: {entity_type}"
492
- )
455
+ raise ValueError(f"entity_type must be in: sample, shipment, upload\nnot: {entity_type}")
493
456
 
494
457
  self.entity_type = entity_type
495
458
  self.trial_id = trial_id
@@ -555,11 +518,7 @@ def _calc_difference(
555
518
 
556
519
  # handle formatting and ignore
557
520
  cidc1: Dict[str, Any] = {
558
- k: (
559
- datetime.strftime(v, "%Y-%m-%d %H:%M:%S")
560
- if isinstance(v, (date, time, datetime))
561
- else v
562
- )
521
+ k: (datetime.strftime(v, "%Y-%m-%d %H:%M:%S") if isinstance(v, (date, time, datetime)) else v)
563
522
  for k, v in cidc.items()
564
523
  if k not in ignore
565
524
  }
@@ -574,9 +533,7 @@ def _calc_difference(
574
533
  if cidc1.get(k) != csms1.get(k)
575
534
  }
576
535
  # then get both values once per key to return
577
- changes: Dict[str, Tuple[Any, Any]] = {
578
- k: (cidc.get(k), csms.get(k)) for k in diff_keys
579
- }
536
+ changes: Dict[str, Tuple[Any, Any]] = {k: (cidc.get(k), csms.get(k)) for k in diff_keys}
580
537
 
581
538
  return Change(
582
539
  entity_type=entity_type,
@@ -589,10 +546,7 @@ def _calc_difference(
589
546
 
590
547
  def _get_cidc_sample_map(metadata: dict) -> Dict[str, Dict[str, Any]]:
591
548
  """Returns a map of CIMAC IDs for this shipment to the relevant sample details from CIDC"""
592
- cidc_partic_map = {
593
- partic["cimac_participant_id"]: partic
594
- for partic in metadata.get("participants", [])
595
- }
549
+ cidc_partic_map = {partic["cimac_participant_id"]: partic for partic in metadata.get("participants", [])}
596
550
 
597
551
  ## make maps from cimac_id to a full dict
598
552
  ## need to add participant-level values
@@ -603,19 +557,13 @@ def _get_cidc_sample_map(metadata: dict) -> Dict[str, Dict[str, Any]]:
603
557
  }
604
558
  for cidc_cimac_id in cidc_sample_map.keys():
605
559
  cimac_participant_id = cimac_id_to_cimac_participant_id(cidc_cimac_id, {})
606
- cidc_sample_map[cidc_cimac_id]["cohort_name"] = cidc_partic_map[
607
- cimac_participant_id
608
- ]["cohort_name"]
609
- cidc_sample_map[cidc_cimac_id]["participant_id"] = cidc_partic_map[
610
- cimac_participant_id
611
- ]["participant_id"]
560
+ cidc_sample_map[cidc_cimac_id]["cohort_name"] = cidc_partic_map[cimac_participant_id]["cohort_name"]
561
+ cidc_sample_map[cidc_cimac_id]["participant_id"] = cidc_partic_map[cimac_participant_id]["participant_id"]
612
562
 
613
563
  return cidc_sample_map
614
564
 
615
565
 
616
- def _get_csms_sample_map(
617
- trial_id, manifest_id, csms_samples
618
- ) -> Dict[str, Dict[str, Any]]:
566
+ def _get_csms_sample_map(trial_id, manifest_id, csms_samples) -> Dict[str, Dict[str, Any]]:
619
567
  """Returns a map of CIMAC IDs to the relevant sample details from CSMS"""
620
568
  return {
621
569
  csms_cimac_id: {
@@ -640,9 +588,7 @@ def _get_csms_sample_map(
640
588
  ],
641
589
  ),
642
590
  }
643
- for csms_cimac_id, csms_sample in _convert_csms_samples(
644
- trial_id, manifest_id, csms_samples
645
- )
591
+ for csms_cimac_id, csms_sample in _convert_csms_samples(trial_id, manifest_id, csms_samples)
646
592
  }
647
593
 
648
594
 
@@ -662,9 +608,7 @@ def _cross_validate_samples(
662
608
  manifest_id,
663
609
  cidc_sample["cimac_id"],
664
610
  )
665
- raise RuntimeError(
666
- f"Missing sample: {formatted} on CSMS {(trial_id, manifest_id)}"
667
- )
611
+ raise RuntimeError(f"Missing sample: {formatted} on CSMS {(trial_id, manifest_id)}")
668
612
  # make sure that all of the CSMS samples are in CIDC
669
613
  all_cidc_sample_map: Dict[str, dict] = {
670
614
  sample["cimac_id"]: {
@@ -672,9 +616,7 @@ def _cross_validate_samples(
672
616
  "trial_id": upload.trial_id,
673
617
  "manifest_id": upload.metadata_patch["shipments"][0]["manifest_id"],
674
618
  }
675
- for upload in session.query(UploadJobs)
676
- .filter(UploadJobs.status == UploadJobStatus.MERGE_COMPLETED.value)
677
- .all()
619
+ for upload in session.query(UploadJobs).filter(UploadJobs.status == UploadJobStatus.MERGE_COMPLETED.value).all()
678
620
  for partic in upload.metadata_patch.get("participants", [])
679
621
  for sample in partic.get("samples", [])
680
622
  if len(upload.metadata_patch.get("shipments", []))
@@ -695,9 +637,7 @@ def _cross_validate_samples(
695
637
  if cidc_sample is not None
696
638
  else "<no sample found>"
697
639
  )
698
- raise RuntimeError(
699
- f"Change in critical field for: {formatted} to CSMS {(trial_id, manifest_id, cimac_id)}"
700
- )
640
+ raise RuntimeError(f"Change in critical field for: {formatted} to CSMS {(trial_id, manifest_id, cimac_id)}")
701
641
 
702
642
 
703
643
  def _initial_manifest_validation(
@@ -788,9 +728,7 @@ def _handle_shipment_differences(
788
728
  cidc_uploadjob: Optional[UploadJobs],
789
729
  ) -> Optional[Change]:
790
730
  """Compare the given CSMS and CIDC shipments, returning None's if no changes or the changes"""
791
- cidc_manifest: Dict[str, Any] = (
792
- {} if cidc_uploadjob is None else cidc_uploadjob.metadata_patch["shipments"][0]
793
- )
731
+ cidc_manifest: Dict[str, Any] = {} if cidc_uploadjob is None else cidc_uploadjob.metadata_patch["shipments"][0]
794
732
  change: Change = _calc_difference(
795
733
  entity_type="shipment",
796
734
  trial_id=cidc_uploadjob.trial_id,
@@ -870,9 +808,7 @@ def _handle_upload_differences(
870
808
 
871
809
 
872
810
  @with_default_session
873
- def detect_manifest_changes(
874
- csms_manifest: Dict[str, Any], uploader_email: str, *, session: Session
875
- ) -> List[Change]:
811
+ def detect_manifest_changes(csms_manifest: Dict[str, Any], uploader_email: str, *, session: Session) -> List[Change]:
876
812
  """
877
813
  Given a CSMS-style manifest, see if it has any differences from the current state of the db
878
814
  If a new manifest, throws a NewManifestError
@@ -914,16 +850,12 @@ def detect_manifest_changes(
914
850
  ) = _initial_manifest_validation(csms_manifest, session=session)
915
851
 
916
852
  # ----- Look for shipment-level differences -----
917
- change: Optional[Change] = _handle_shipment_differences(
918
- manifest_id, csms_manifest, cidc_uploadjob
919
- )
853
+ change: Optional[Change] = _handle_shipment_differences(manifest_id, csms_manifest, cidc_uploadjob)
920
854
  if change:
921
855
  ret.append(change)
922
856
 
923
857
  # ----- Look for sample-level differences -----
924
- ret = _handle_sample_differences(
925
- trial_id, manifest_id, csms_sample_map, cidc_sample_map, ret
926
- )
858
+ ret = _handle_sample_differences(trial_id, manifest_id, csms_sample_map, cidc_sample_map, ret)
927
859
 
928
860
  # ----- Look for differences in the Upload -----
929
861
  change: Optional[Change] = _handle_upload_differences(
@@ -408,12 +408,8 @@ details_dict = {
408
408
  "neoantigen: list of predicted neoantigens",
409
409
  "The combined MHC class I and II predicted neoantigens using the pVACseq software. The column definitions are given here (ref: https://pvactools.readthedocs.io/en/latest/pvacseq/output_files.html)",
410
410
  ),
411
- "/wes_tumor_only/analysis/vcf_gz_tnscope_filter.vcf.gz": FileDetails(
412
- "analysis", "", ""
413
- ),
414
- "/wes_tumor_only/analysis/vcf_gz_tnscope_output.vcf.gz": FileDetails(
415
- "analysis", "", ""
416
- ),
411
+ "/wes_tumor_only/analysis/vcf_gz_tnscope_filter.vcf.gz": FileDetails("analysis", "", ""),
412
+ "/wes_tumor_only/analysis/vcf_gz_tnscope_output.vcf.gz": FileDetails("analysis", "", ""),
417
413
  "/wes_tumor_only/analysis/maf_tnscope_filter.maf": FileDetails("analysis", "", ""),
418
414
  "/wes_tumor_only/analysis/tnscope_exons.vcf.gz": FileDetails("analysis", "", ""),
419
415
  "/wes_tumor_only/analysis/HLA_results.tsv": FileDetails("analysis", "", ""),
@@ -481,9 +477,7 @@ details_dict = {
481
477
  "/wes_tumor_only/analysis/config.yaml": FileDetails("analysis", "", ""),
482
478
  "/wes_tumor_only/analysis/metasheet.csv": FileDetails("analysis", "", ""),
483
479
  "/wes_tumor_only/analysis/wes_sample.json": FileDetails("analysis", "", ""),
484
- "/wes_tumor_only/analysis/tumor/xhla_report_hla.json": FileDetails(
485
- "analysis", "", ""
486
- ),
480
+ "/wes_tumor_only/analysis/tumor/xhla_report_hla.json": FileDetails("analysis", "", ""),
487
481
  # RNA
488
482
  "/rna/r1_.fastq.gz": FileDetails(
489
483
  "source",
@@ -748,9 +742,7 @@ details_dict = {
748
742
  by the appropriate CIMAC participant or sample id.",
749
743
  ),
750
744
  # Miscellaneous Data
751
- "/misc_data/": FileDetails(
752
- "source", "Unharmonized, one-off, or not-yet-supported data.", ""
753
- ),
745
+ "/misc_data/": FileDetails("source", "Unharmonized, one-off, or not-yet-supported data.", ""),
754
746
  # TCR
755
747
  "/tcr/reads.tsv": FileDetails("source", "", ""),
756
748
  "/tcr/controls/reads.tsv": FileDetails("source", "", ""),
@@ -917,9 +909,7 @@ details_dict = {
917
909
  "Corrected depth file generated by ichorCNA",
918
910
  "",
919
911
  ),
920
- "/ctdna/OnPrem.params.txt": FileDetails(
921
- "source", "On premise parameters file generated by ichorCNA", ""
922
- ),
912
+ "/ctdna/OnPrem.params.txt": FileDetails("source", "On premise parameters file generated by ichorCNA", ""),
923
913
  # Microbiome Assay
924
914
  "/microbiome/forward.fastq.gz": FileDetails(
925
915
  "source",
@@ -284,9 +284,7 @@ assay_facets: Facets = {
284
284
  "MSI": FacetConfig(["/wes_tumor_only/analysis/msisensor.txt"]),
285
285
  },
286
286
  "RNA": {
287
- "Source": FacetConfig(
288
- ["/rna/r1_.fastq.gz", "/rna/r2_.fastq.gz", "/rna/reads_.bam"]
289
- ),
287
+ "Source": FacetConfig(["/rna/r1_.fastq.gz", "/rna/r2_.fastq.gz", "/rna/reads_.bam"]),
290
288
  "Alignment": FacetConfig(
291
289
  [
292
290
  "/rna/analysis/star/sorted.bam",
@@ -428,16 +426,12 @@ assay_facets: Facets = {
428
426
  "/tcr/replicate_/i2.fastq.gz",
429
427
  ]
430
428
  ),
431
- "Misc.": FacetConfig(
432
- ["/tcr/SampleSheet.csv", "/tcr_analysis/summary_info.csv"]
433
- ),
429
+ "Misc.": FacetConfig(["/tcr/SampleSheet.csv", "/tcr_analysis/summary_info.csv"]),
434
430
  "Analysis Data": FacetConfig(
435
431
  ["/tcr_analysis/tra_clone.csv", "/tcr_analysis/trb_clone.csv"],
436
432
  "Data files indicating TRA & TRB clones' UMI counts",
437
433
  ),
438
- "Reports": FacetConfig(
439
- ["/tcr_analysis/report_trial.tar.gz"], "Report from TCRseq analysis"
440
- ),
434
+ "Reports": FacetConfig(["/tcr_analysis/report_trial.tar.gz"], "Report from TCRseq analysis"),
441
435
  },
442
436
  "ELISA": {"Data": FacetConfig(["/elisa/assay.xlsx"])},
443
437
  "ctDNA": {
@@ -564,9 +558,7 @@ def _build_facet_groups_to_names():
564
558
  for facet_name, subfacet in facets_dict["Assay Type"].items():
565
559
  for subfacet_name, subsubfacet in subfacet.items():
566
560
  for facet_group in subsubfacet.facet_groups:
567
- facet_names[facet_group] = FACET_NAME_DELIM.join(
568
- [facet_name, subfacet_name]
569
- )
561
+ facet_names[facet_group] = FACET_NAME_DELIM.join([facet_name, subfacet_name])
570
562
 
571
563
  for facet_name, subfacet in facets_dict["Clinical Type"].items():
572
564
  for facet_group in subfacet.facet_groups:
@@ -599,14 +591,9 @@ def build_data_category_facets(facet_group_file_counts: Dict[str, int]):
599
591
  def extract_facet_info(facet_config_entries, _prefix):
600
592
  results = []
601
593
  for label, config in facet_config_entries.items():
602
- count = sum(
603
- facet_group_file_counts.get(facet_group, 0)
604
- for facet_group in config.facet_groups
605
- )
594
+ count = sum(facet_group_file_counts.get(facet_group, 0) for facet_group in config.facet_groups)
606
595
  if count:
607
- results.append(
608
- {"label": label, "description": config.description, "count": count}
609
- )
596
+ results.append({"label": label, "description": config.description, "count": count})
610
597
  return results
611
598
 
612
599
  assay_types = {}
@@ -632,10 +619,7 @@ def build_trial_facets(trial_file_counts: Dict[str, int]):
632
619
  """
633
620
  Convert a mapping from trial ids to file counts into a list of facet specifications.
634
621
  """
635
- return [
636
- {"label": trial_id, "count": count}
637
- for trial_id, count in trial_file_counts.items()
638
- ]
622
+ return [{"label": trial_id, "count": count} for trial_id, count in trial_file_counts.items()]
639
623
 
640
624
 
641
625
  def get_facet_groups_for_paths(paths: List[List[str]]) -> List[str]:
@@ -699,9 +683,7 @@ def get_facet_groups_for_links() -> Dict[str, Dict[str, List[str]]]:
699
683
  for first, first_config in facets_dict[category].items():
700
684
  assay = _translate_assay(first)
701
685
  for facet, facet_config in first_config.items():
702
- _process_facet(
703
- assay, [category, first, facet], facet_config, facets_to_return
704
- )
686
+ _process_facet(assay, [category, first, facet], facet_config, facets_to_return)
705
687
 
706
688
  # run through all analysis facets and put them in the return
707
689
  category: str = "Analysis Ready"
@@ -714,9 +696,7 @@ def get_facet_groups_for_links() -> Dict[str, Dict[str, List[str]]]:
714
696
  category: str = "Clinical Type"
715
697
  for facet, facet_config in facets_dict[category].items():
716
698
  # these will all go into received, as none contain "analysis"
717
- _process_facet(
718
- "clinical_participants", [category, facet], facet_config, facets_to_return
719
- )
699
+ _process_facet("clinical_participants", [category, facet], facet_config, facets_to_return)
720
700
 
721
701
  # wes specific, use same values for wes_tumor received as for wes_normal received
722
702
  # because facets refer to the WHOLE of WES assay, not broken up by sample type