nci-cidc-api-modules 1.1.12__py3-none-any.whl → 1.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cidc_api/config/db.py +2 -5
- cidc_api/config/logging.py +1 -5
- cidc_api/config/secrets.py +1 -3
- cidc_api/config/settings.py +2 -3
- cidc_api/csms/auth.py +1 -3
- cidc_api/models/csms_api.py +29 -97
- cidc_api/models/files/details.py +5 -15
- cidc_api/models/files/facets.py +9 -29
- cidc_api/models/models.py +215 -375
- cidc_api/shared/auth.py +3 -9
- cidc_api/shared/emails.py +8 -16
- cidc_api/shared/gcloud_client.py +33 -98
- cidc_api/shared/jose.py +1 -3
- cidc_api/shared/rest_utils.py +2 -6
- {nci_cidc_api_modules-1.1.12.dist-info → nci_cidc_api_modules-1.1.14.dist-info}/METADATA +2 -2
- nci_cidc_api_modules-1.1.14.dist-info/RECORD +26 -0
- nci_cidc_api_modules-1.1.12.dist-info/RECORD +0 -26
- {nci_cidc_api_modules-1.1.12.dist-info → nci_cidc_api_modules-1.1.14.dist-info}/WHEEL +0 -0
- {nci_cidc_api_modules-1.1.12.dist-info → nci_cidc_api_modules-1.1.14.dist-info}/licenses/LICENSE +0 -0
- {nci_cidc_api_modules-1.1.12.dist-info → nci_cidc_api_modules-1.1.14.dist-info}/top_level.txt +0 -0
cidc_api/config/db.py
CHANGED
@@ -48,13 +48,10 @@ def get_sqlalchemy_database_uri(testing: bool = False) -> str:
|
|
48
48
|
|
49
49
|
# If CLOUD_SQL_INSTANCE_NAME is defined, we're connecting
|
50
50
|
# via a unix socket from inside App Engine.
|
51
|
-
config["query"] = {
|
52
|
-
"host": f'{socket_dir}{environ.get("CLOUD_SQL_INSTANCE_NAME")}'
|
53
|
-
}
|
51
|
+
config["query"] = {"host": f'{socket_dir}{environ.get("CLOUD_SQL_INSTANCE_NAME")}'}
|
54
52
|
else:
|
55
53
|
raise RuntimeError(
|
56
|
-
"Either POSTGRES_URI or CLOUD_SQL_INSTANCE_NAME must be defined to connect "
|
57
|
-
+ "to a database."
|
54
|
+
"Either POSTGRES_URI or CLOUD_SQL_INSTANCE_NAME must be defined to connect " + "to a database."
|
58
55
|
)
|
59
56
|
|
60
57
|
db_uri = str(URL(**config))
|
cidc_api/config/logging.py
CHANGED
@@ -19,11 +19,7 @@ def get_logger(name: Optional[str]) -> logging.Logger:
|
|
19
19
|
logger.setLevel(gunicorn_logger.level)
|
20
20
|
else:
|
21
21
|
handler = logging.StreamHandler(sys.stdout)
|
22
|
-
handler.setFormatter(
|
23
|
-
logging.Formatter(
|
24
|
-
"[%(asctime)s] [%(threadName)s] [%(levelname)s]: %(message)s"
|
25
|
-
)
|
26
|
-
)
|
22
|
+
handler.setFormatter(logging.Formatter("[%(asctime)s] [%(threadName)s] [%(levelname)s]: %(message)s"))
|
27
23
|
logger.addHandler(handler)
|
28
24
|
logger.setLevel(logging.DEBUG if ENV == "dev" else logging.INFO)
|
29
25
|
return logger
|
cidc_api/config/secrets.py
CHANGED
@@ -40,9 +40,7 @@ class GoogleSecretManager:
|
|
40
40
|
Raises a google.api_core.exceptions.NotFound exception
|
41
41
|
if the secret_id/version_id doesn't exist.
|
42
42
|
"""
|
43
|
-
name =
|
44
|
-
f"projects/{self.project_id}/secrets/{secret_id}/versions/{self.version_id}"
|
45
|
-
)
|
43
|
+
name = f"projects/{self.project_id}/secrets/{secret_id}/versions/{self.version_id}"
|
46
44
|
response = self.client.access_secret_version(name=name)
|
47
45
|
secret = response.payload.data.decode("UTF-8")
|
48
46
|
|
cidc_api/config/settings.py
CHANGED
@@ -55,6 +55,7 @@ else:
|
|
55
55
|
### Configure Flask-SQLAlchemy ###
|
56
56
|
SQLALCHEMY_DATABASE_URI = get_sqlalchemy_database_uri(TESTING)
|
57
57
|
SQLALCHEMY_TRACK_MODIFICATIONS = False
|
58
|
+
SQLALCHEMY_ECHO = False # Set to True to emit all compiled sql statements
|
58
59
|
|
59
60
|
### Configure Dev CFn ###
|
60
61
|
DEV_CFUNCTIONS_SERVER = environ.get("DEV_CFUNCTIONS_SERVER")
|
@@ -78,9 +79,7 @@ GOOGLE_BIGQUERY_USER_ROLE = "roles/bigquery.jobUser" # same across environments
|
|
78
79
|
GOOGLE_PATIENT_SAMPLE_TOPIC = environ["GOOGLE_PATIENT_SAMPLE_TOPIC"]
|
79
80
|
GOOGLE_EMAILS_TOPIC = environ["GOOGLE_EMAILS_TOPIC"]
|
80
81
|
GOOGLE_ARTIFACT_UPLOAD_TOPIC = environ["GOOGLE_ARTIFACT_UPLOAD_TOPIC"]
|
81
|
-
GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC = environ[
|
82
|
-
"GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC"
|
83
|
-
]
|
82
|
+
GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC = environ["GOOGLE_GRANT_DOWNLOAD_PERMISSIONS_TOPIC"]
|
84
83
|
GOOGLE_AND_OPERATOR = " && "
|
85
84
|
GOOGLE_OR_OPERATOR = " || "
|
86
85
|
|
cidc_api/csms/auth.py
CHANGED
@@ -64,9 +64,7 @@ def get_with_authorization(url: str, **kwargs) -> requests.Response:
|
|
64
64
|
)
|
65
65
|
|
66
66
|
|
67
|
-
def get_with_paging(
|
68
|
-
url: str, limit: int = None, offset: int = 0, **kwargs
|
69
|
-
) -> Iterator[Dict[str, Any]]:
|
67
|
+
def get_with_paging(url: str, limit: int = None, offset: int = 0, **kwargs) -> Iterator[Dict[str, Any]]:
|
70
68
|
"""
|
71
69
|
Return an iterator of entries via get_with_authorization with handling for CSMS paging
|
72
70
|
|
cidc_api/models/csms_api.py
CHANGED
@@ -81,11 +81,7 @@ def _get_all_values(target: str, old: dict, drop: List[str] = None) -> Dict[str,
|
|
81
81
|
if drop is None:
|
82
82
|
drop = []
|
83
83
|
|
84
|
-
ret = {
|
85
|
-
p: old[p]
|
86
|
-
for p in TARGET_PROPERTIES_MAP[target].keys()
|
87
|
-
if p in old and p not in drop
|
88
|
-
}
|
84
|
+
ret = {p: old[p] for p in TARGET_PROPERTIES_MAP[target].keys() if p in old and p not in drop}
|
89
85
|
|
90
86
|
return ret
|
91
87
|
|
@@ -113,9 +109,7 @@ def _parse_upload_type(sample: dict, upload_type: Set[str]) -> str:
|
|
113
109
|
elif processed_derivative in ["DNA", "RNA"]:
|
114
110
|
unprocessed_type = sample.get("type_of_sample")
|
115
111
|
new_type = "tumor" if "tumor" in unprocessed_type.lower() else "normal"
|
116
|
-
new_type += (
|
117
|
-
"_blood_" if sample_manifest_type.startswith("biofluid") else "_tissue_"
|
118
|
-
)
|
112
|
+
new_type += "_blood_" if sample_manifest_type.startswith("biofluid") else "_tissue_"
|
119
113
|
new_type += processed_derivative.lower()
|
120
114
|
|
121
115
|
upload_type.add(new_type)
|
@@ -144,9 +138,7 @@ def _get_upload_type(samples: Iterable[Dict[str, Any]]) -> str:
|
|
144
138
|
# updates upload_type in-place with the given sample
|
145
139
|
_parse_upload_type(sample=sample, upload_type=upload_type)
|
146
140
|
|
147
|
-
assert (
|
148
|
-
len(upload_type) == 1
|
149
|
-
), f"Inconsistent value determined for upload_type:{upload_type}"
|
141
|
+
assert len(upload_type) == 1, f"Inconsistent value determined for upload_type:{upload_type}"
|
150
142
|
return list(upload_type)[0]
|
151
143
|
|
152
144
|
|
@@ -174,9 +166,7 @@ def _get_and_check(
|
|
174
166
|
return ret
|
175
167
|
|
176
168
|
|
177
|
-
def _extract_info_from_manifest(
|
178
|
-
manifest: Dict[str, Any]
|
179
|
-
) -> Tuple[str, str, List[Dict[str, Any]]]:
|
169
|
+
def _extract_info_from_manifest(manifest: Dict[str, Any]) -> Tuple[str, str, List[Dict[str, Any]]]:
|
180
170
|
"""
|
181
171
|
Given a manifest, do initial validation and return some key values
|
182
172
|
|
@@ -195,9 +185,7 @@ def _extract_info_from_manifest(
|
|
195
185
|
- f"Manifest {manifest_id} contains no samples: {manifest}"
|
196
186
|
- f"No consistent protocol_identifier defined for samples on manifest {manifest_id}"
|
197
187
|
"""
|
198
|
-
manifest_id = _get_and_check(
|
199
|
-
obj=manifest, key="manifest_id", msg=f"No manifest_id in: {manifest}"
|
200
|
-
)
|
188
|
+
manifest_id = _get_and_check(obj=manifest, key="manifest_id", msg=f"No manifest_id in: {manifest}")
|
201
189
|
_ = _get_and_check( # don't need to keep status
|
202
190
|
obj=manifest,
|
203
191
|
key="status",
|
@@ -260,10 +248,7 @@ def _process_csms_sample(csms_sample: dict):
|
|
260
248
|
csms_sample["collection_event_name"] = event_name
|
261
249
|
|
262
250
|
# encrypt participant ids if not already encrypted
|
263
|
-
if (
|
264
|
-
"participant_id" in csms_sample
|
265
|
-
and len(csms_sample["participant_id"]) != _ENCRYPTED_FIELD_LEN
|
266
|
-
):
|
251
|
+
if "participant_id" in csms_sample and len(csms_sample["participant_id"]) != _ENCRYPTED_FIELD_LEN:
|
267
252
|
csms_sample["participant_id"] = _encrypt(csms_sample["participant_id"])
|
268
253
|
|
269
254
|
# differences in naming convention
|
@@ -276,21 +261,15 @@ def _process_csms_sample(csms_sample: dict):
|
|
276
261
|
"pbmc": "PBMC",
|
277
262
|
}
|
278
263
|
if csms_sample["processed_sample_type"] in processed_sample_type_map:
|
279
|
-
csms_sample["processed_sample_type"] = processed_sample_type_map[
|
280
|
-
csms_sample["processed_sample_type"]
|
281
|
-
]
|
264
|
+
csms_sample["processed_sample_type"] = processed_sample_type_map[csms_sample["processed_sample_type"]]
|
282
265
|
|
283
266
|
# differences in keys
|
284
267
|
if "fixation_or_stabilization_type" in csms_sample:
|
285
|
-
csms_sample["fixation_stabilization_type"] = csms_sample.pop(
|
286
|
-
"fixation_or_stabilization_type"
|
287
|
-
)
|
268
|
+
csms_sample["fixation_stabilization_type"] = csms_sample.pop("fixation_or_stabilization_type")
|
288
269
|
|
289
270
|
# typing
|
290
271
|
if "sample_derivative_concentration" in csms_sample:
|
291
|
-
csms_sample["sample_derivative_concentration"] = float(
|
292
|
-
csms_sample["sample_derivative_concentration"]
|
293
|
-
)
|
272
|
+
csms_sample["sample_derivative_concentration"] = float(csms_sample["sample_derivative_concentration"])
|
294
273
|
|
295
274
|
if "parent_sample_id" not in csms_sample:
|
296
275
|
csms_sample["parent_sample_id"] = "Not Reported"
|
@@ -335,9 +314,7 @@ def _convert_csms_samples(
|
|
335
314
|
msg=f"No cimac_id defined for samples[{n}] on manifest_id={manifest_id} for trial {trial_id}",
|
336
315
|
)
|
337
316
|
if not CIMAC_ID_REGEX.match(cimac_id):
|
338
|
-
raise RuntimeError(
|
339
|
-
f"Malformatted cimac_id={cimac_id} on manifest_id={manifest_id} for trial {trial_id}"
|
340
|
-
)
|
317
|
+
raise RuntimeError(f"Malformatted cimac_id={cimac_id} on manifest_id={manifest_id} for trial {trial_id}")
|
341
318
|
|
342
319
|
if cimac_id in existing_cimac_ids:
|
343
320
|
raise RuntimeError(
|
@@ -386,16 +363,10 @@ def insert_manifest_into_blob(
|
|
386
363
|
trial_id, manifest_id, csms_samples = _extract_info_from_manifest(manifest)
|
387
364
|
trial_md = TrialMetadata.select_for_update_by_trial_id(trial_id, session=session)
|
388
365
|
if manifest_id in [s["manifest_id"] for s in trial_md.metadata_json["shipments"]]:
|
389
|
-
raise RuntimeError(
|
390
|
-
f"Manifest with manifest_id={manifest_id} already exists for trial {trial_id}"
|
391
|
-
)
|
366
|
+
raise RuntimeError(f"Manifest with manifest_id={manifest_id} already exists for trial {trial_id}")
|
392
367
|
|
393
368
|
# pull out some additional values we'll need
|
394
|
-
existing_cimac_ids = [
|
395
|
-
s["cimac_id"]
|
396
|
-
for p in trial_md.metadata_json["participants"]
|
397
|
-
for s in p["samples"]
|
398
|
-
]
|
369
|
+
existing_cimac_ids = [s["cimac_id"] for p in trial_md.metadata_json["participants"] for s in p["samples"]]
|
399
370
|
assay_priority, assay_type = _extract_details_from_trial(csms_samples)
|
400
371
|
if assay_priority:
|
401
372
|
manifest["assay_priority"] = assay_priority
|
@@ -405,19 +376,13 @@ def insert_manifest_into_blob(
|
|
405
376
|
# a patch is just the parts that are new, equivalent to the return of schemas.prismify
|
406
377
|
patch = {
|
407
378
|
"protocol_identifier": trial_id,
|
408
|
-
"shipments": [
|
409
|
-
_get_all_values(
|
410
|
-
target="shipment", old=manifest, drop=["excluded", "json_data"]
|
411
|
-
)
|
412
|
-
],
|
379
|
+
"shipments": [_get_all_values(target="shipment", old=manifest, drop=["excluded", "json_data"])],
|
413
380
|
"participants": [],
|
414
381
|
}
|
415
382
|
|
416
383
|
# sort samples by participants
|
417
384
|
sample_map: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
418
|
-
for cimac_id, sample in _convert_csms_samples(
|
419
|
-
trial_id, manifest_id, csms_samples, existing_cimac_ids
|
420
|
-
):
|
385
|
+
for cimac_id, sample in _convert_csms_samples(trial_id, manifest_id, csms_samples, existing_cimac_ids):
|
421
386
|
sample_map[cimac_id_to_cimac_participant_id(cimac_id, {})].append(sample)
|
422
387
|
|
423
388
|
# each participant has a list of samples
|
@@ -487,9 +452,7 @@ class Change:
|
|
487
452
|
changes = []
|
488
453
|
|
489
454
|
if entity_type not in ["sample", "shipment", "upload"]:
|
490
|
-
raise ValueError(
|
491
|
-
f"entity_type must be in: sample, shipment, upload\nnot: {entity_type}"
|
492
|
-
)
|
455
|
+
raise ValueError(f"entity_type must be in: sample, shipment, upload\nnot: {entity_type}")
|
493
456
|
|
494
457
|
self.entity_type = entity_type
|
495
458
|
self.trial_id = trial_id
|
@@ -555,11 +518,7 @@ def _calc_difference(
|
|
555
518
|
|
556
519
|
# handle formatting and ignore
|
557
520
|
cidc1: Dict[str, Any] = {
|
558
|
-
k: (
|
559
|
-
datetime.strftime(v, "%Y-%m-%d %H:%M:%S")
|
560
|
-
if isinstance(v, (date, time, datetime))
|
561
|
-
else v
|
562
|
-
)
|
521
|
+
k: (datetime.strftime(v, "%Y-%m-%d %H:%M:%S") if isinstance(v, (date, time, datetime)) else v)
|
563
522
|
for k, v in cidc.items()
|
564
523
|
if k not in ignore
|
565
524
|
}
|
@@ -574,9 +533,7 @@ def _calc_difference(
|
|
574
533
|
if cidc1.get(k) != csms1.get(k)
|
575
534
|
}
|
576
535
|
# then get both values once per key to return
|
577
|
-
changes: Dict[str, Tuple[Any, Any]] = {
|
578
|
-
k: (cidc.get(k), csms.get(k)) for k in diff_keys
|
579
|
-
}
|
536
|
+
changes: Dict[str, Tuple[Any, Any]] = {k: (cidc.get(k), csms.get(k)) for k in diff_keys}
|
580
537
|
|
581
538
|
return Change(
|
582
539
|
entity_type=entity_type,
|
@@ -589,10 +546,7 @@ def _calc_difference(
|
|
589
546
|
|
590
547
|
def _get_cidc_sample_map(metadata: dict) -> Dict[str, Dict[str, Any]]:
|
591
548
|
"""Returns a map of CIMAC IDs for this shipment to the relevant sample details from CIDC"""
|
592
|
-
cidc_partic_map = {
|
593
|
-
partic["cimac_participant_id"]: partic
|
594
|
-
for partic in metadata.get("participants", [])
|
595
|
-
}
|
549
|
+
cidc_partic_map = {partic["cimac_participant_id"]: partic for partic in metadata.get("participants", [])}
|
596
550
|
|
597
551
|
## make maps from cimac_id to a full dict
|
598
552
|
## need to add participant-level values
|
@@ -603,19 +557,13 @@ def _get_cidc_sample_map(metadata: dict) -> Dict[str, Dict[str, Any]]:
|
|
603
557
|
}
|
604
558
|
for cidc_cimac_id in cidc_sample_map.keys():
|
605
559
|
cimac_participant_id = cimac_id_to_cimac_participant_id(cidc_cimac_id, {})
|
606
|
-
cidc_sample_map[cidc_cimac_id]["cohort_name"] = cidc_partic_map[
|
607
|
-
|
608
|
-
]["cohort_name"]
|
609
|
-
cidc_sample_map[cidc_cimac_id]["participant_id"] = cidc_partic_map[
|
610
|
-
cimac_participant_id
|
611
|
-
]["participant_id"]
|
560
|
+
cidc_sample_map[cidc_cimac_id]["cohort_name"] = cidc_partic_map[cimac_participant_id]["cohort_name"]
|
561
|
+
cidc_sample_map[cidc_cimac_id]["participant_id"] = cidc_partic_map[cimac_participant_id]["participant_id"]
|
612
562
|
|
613
563
|
return cidc_sample_map
|
614
564
|
|
615
565
|
|
616
|
-
def _get_csms_sample_map(
|
617
|
-
trial_id, manifest_id, csms_samples
|
618
|
-
) -> Dict[str, Dict[str, Any]]:
|
566
|
+
def _get_csms_sample_map(trial_id, manifest_id, csms_samples) -> Dict[str, Dict[str, Any]]:
|
619
567
|
"""Returns a map of CIMAC IDs to the relevant sample details from CSMS"""
|
620
568
|
return {
|
621
569
|
csms_cimac_id: {
|
@@ -640,9 +588,7 @@ def _get_csms_sample_map(
|
|
640
588
|
],
|
641
589
|
),
|
642
590
|
}
|
643
|
-
for csms_cimac_id, csms_sample in _convert_csms_samples(
|
644
|
-
trial_id, manifest_id, csms_samples
|
645
|
-
)
|
591
|
+
for csms_cimac_id, csms_sample in _convert_csms_samples(trial_id, manifest_id, csms_samples)
|
646
592
|
}
|
647
593
|
|
648
594
|
|
@@ -662,9 +608,7 @@ def _cross_validate_samples(
|
|
662
608
|
manifest_id,
|
663
609
|
cidc_sample["cimac_id"],
|
664
610
|
)
|
665
|
-
raise RuntimeError(
|
666
|
-
f"Missing sample: {formatted} on CSMS {(trial_id, manifest_id)}"
|
667
|
-
)
|
611
|
+
raise RuntimeError(f"Missing sample: {formatted} on CSMS {(trial_id, manifest_id)}")
|
668
612
|
# make sure that all of the CSMS samples are in CIDC
|
669
613
|
all_cidc_sample_map: Dict[str, dict] = {
|
670
614
|
sample["cimac_id"]: {
|
@@ -672,9 +616,7 @@ def _cross_validate_samples(
|
|
672
616
|
"trial_id": upload.trial_id,
|
673
617
|
"manifest_id": upload.metadata_patch["shipments"][0]["manifest_id"],
|
674
618
|
}
|
675
|
-
for upload in session.query(UploadJobs)
|
676
|
-
.filter(UploadJobs.status == UploadJobStatus.MERGE_COMPLETED.value)
|
677
|
-
.all()
|
619
|
+
for upload in session.query(UploadJobs).filter(UploadJobs.status == UploadJobStatus.MERGE_COMPLETED.value).all()
|
678
620
|
for partic in upload.metadata_patch.get("participants", [])
|
679
621
|
for sample in partic.get("samples", [])
|
680
622
|
if len(upload.metadata_patch.get("shipments", []))
|
@@ -695,9 +637,7 @@ def _cross_validate_samples(
|
|
695
637
|
if cidc_sample is not None
|
696
638
|
else "<no sample found>"
|
697
639
|
)
|
698
|
-
raise RuntimeError(
|
699
|
-
f"Change in critical field for: {formatted} to CSMS {(trial_id, manifest_id, cimac_id)}"
|
700
|
-
)
|
640
|
+
raise RuntimeError(f"Change in critical field for: {formatted} to CSMS {(trial_id, manifest_id, cimac_id)}")
|
701
641
|
|
702
642
|
|
703
643
|
def _initial_manifest_validation(
|
@@ -788,9 +728,7 @@ def _handle_shipment_differences(
|
|
788
728
|
cidc_uploadjob: Optional[UploadJobs],
|
789
729
|
) -> Optional[Change]:
|
790
730
|
"""Compare the given CSMS and CIDC shipments, returning None's if no changes or the changes"""
|
791
|
-
cidc_manifest: Dict[str, Any] =
|
792
|
-
{} if cidc_uploadjob is None else cidc_uploadjob.metadata_patch["shipments"][0]
|
793
|
-
)
|
731
|
+
cidc_manifest: Dict[str, Any] = {} if cidc_uploadjob is None else cidc_uploadjob.metadata_patch["shipments"][0]
|
794
732
|
change: Change = _calc_difference(
|
795
733
|
entity_type="shipment",
|
796
734
|
trial_id=cidc_uploadjob.trial_id,
|
@@ -870,9 +808,7 @@ def _handle_upload_differences(
|
|
870
808
|
|
871
809
|
|
872
810
|
@with_default_session
|
873
|
-
def detect_manifest_changes(
|
874
|
-
csms_manifest: Dict[str, Any], uploader_email: str, *, session: Session
|
875
|
-
) -> List[Change]:
|
811
|
+
def detect_manifest_changes(csms_manifest: Dict[str, Any], uploader_email: str, *, session: Session) -> List[Change]:
|
876
812
|
"""
|
877
813
|
Given a CSMS-style manifest, see if it has any differences from the current state of the db
|
878
814
|
If a new manifest, throws a NewManifestError
|
@@ -914,16 +850,12 @@ def detect_manifest_changes(
|
|
914
850
|
) = _initial_manifest_validation(csms_manifest, session=session)
|
915
851
|
|
916
852
|
# ----- Look for shipment-level differences -----
|
917
|
-
change: Optional[Change] = _handle_shipment_differences(
|
918
|
-
manifest_id, csms_manifest, cidc_uploadjob
|
919
|
-
)
|
853
|
+
change: Optional[Change] = _handle_shipment_differences(manifest_id, csms_manifest, cidc_uploadjob)
|
920
854
|
if change:
|
921
855
|
ret.append(change)
|
922
856
|
|
923
857
|
# ----- Look for sample-level differences -----
|
924
|
-
ret = _handle_sample_differences(
|
925
|
-
trial_id, manifest_id, csms_sample_map, cidc_sample_map, ret
|
926
|
-
)
|
858
|
+
ret = _handle_sample_differences(trial_id, manifest_id, csms_sample_map, cidc_sample_map, ret)
|
927
859
|
|
928
860
|
# ----- Look for differences in the Upload -----
|
929
861
|
change: Optional[Change] = _handle_upload_differences(
|
cidc_api/models/files/details.py
CHANGED
@@ -408,12 +408,8 @@ details_dict = {
|
|
408
408
|
"neoantigen: list of predicted neoantigens",
|
409
409
|
"The combined MHC class I and II predicted neoantigens using the pVACseq software. The column definitions are given here (ref: https://pvactools.readthedocs.io/en/latest/pvacseq/output_files.html)",
|
410
410
|
),
|
411
|
-
"/wes_tumor_only/analysis/vcf_gz_tnscope_filter.vcf.gz": FileDetails(
|
412
|
-
|
413
|
-
),
|
414
|
-
"/wes_tumor_only/analysis/vcf_gz_tnscope_output.vcf.gz": FileDetails(
|
415
|
-
"analysis", "", ""
|
416
|
-
),
|
411
|
+
"/wes_tumor_only/analysis/vcf_gz_tnscope_filter.vcf.gz": FileDetails("analysis", "", ""),
|
412
|
+
"/wes_tumor_only/analysis/vcf_gz_tnscope_output.vcf.gz": FileDetails("analysis", "", ""),
|
417
413
|
"/wes_tumor_only/analysis/maf_tnscope_filter.maf": FileDetails("analysis", "", ""),
|
418
414
|
"/wes_tumor_only/analysis/tnscope_exons.vcf.gz": FileDetails("analysis", "", ""),
|
419
415
|
"/wes_tumor_only/analysis/HLA_results.tsv": FileDetails("analysis", "", ""),
|
@@ -481,9 +477,7 @@ details_dict = {
|
|
481
477
|
"/wes_tumor_only/analysis/config.yaml": FileDetails("analysis", "", ""),
|
482
478
|
"/wes_tumor_only/analysis/metasheet.csv": FileDetails("analysis", "", ""),
|
483
479
|
"/wes_tumor_only/analysis/wes_sample.json": FileDetails("analysis", "", ""),
|
484
|
-
"/wes_tumor_only/analysis/tumor/xhla_report_hla.json": FileDetails(
|
485
|
-
"analysis", "", ""
|
486
|
-
),
|
480
|
+
"/wes_tumor_only/analysis/tumor/xhla_report_hla.json": FileDetails("analysis", "", ""),
|
487
481
|
# RNA
|
488
482
|
"/rna/r1_.fastq.gz": FileDetails(
|
489
483
|
"source",
|
@@ -748,9 +742,7 @@ details_dict = {
|
|
748
742
|
by the appropriate CIMAC participant or sample id.",
|
749
743
|
),
|
750
744
|
# Miscellaneous Data
|
751
|
-
"/misc_data/": FileDetails(
|
752
|
-
"source", "Unharmonized, one-off, or not-yet-supported data.", ""
|
753
|
-
),
|
745
|
+
"/misc_data/": FileDetails("source", "Unharmonized, one-off, or not-yet-supported data.", ""),
|
754
746
|
# TCR
|
755
747
|
"/tcr/reads.tsv": FileDetails("source", "", ""),
|
756
748
|
"/tcr/controls/reads.tsv": FileDetails("source", "", ""),
|
@@ -917,9 +909,7 @@ details_dict = {
|
|
917
909
|
"Corrected depth file generated by ichorCNA",
|
918
910
|
"",
|
919
911
|
),
|
920
|
-
"/ctdna/OnPrem.params.txt": FileDetails(
|
921
|
-
"source", "On premise parameters file generated by ichorCNA", ""
|
922
|
-
),
|
912
|
+
"/ctdna/OnPrem.params.txt": FileDetails("source", "On premise parameters file generated by ichorCNA", ""),
|
923
913
|
# Microbiome Assay
|
924
914
|
"/microbiome/forward.fastq.gz": FileDetails(
|
925
915
|
"source",
|
cidc_api/models/files/facets.py
CHANGED
@@ -284,9 +284,7 @@ assay_facets: Facets = {
|
|
284
284
|
"MSI": FacetConfig(["/wes_tumor_only/analysis/msisensor.txt"]),
|
285
285
|
},
|
286
286
|
"RNA": {
|
287
|
-
"Source": FacetConfig(
|
288
|
-
["/rna/r1_.fastq.gz", "/rna/r2_.fastq.gz", "/rna/reads_.bam"]
|
289
|
-
),
|
287
|
+
"Source": FacetConfig(["/rna/r1_.fastq.gz", "/rna/r2_.fastq.gz", "/rna/reads_.bam"]),
|
290
288
|
"Alignment": FacetConfig(
|
291
289
|
[
|
292
290
|
"/rna/analysis/star/sorted.bam",
|
@@ -428,16 +426,12 @@ assay_facets: Facets = {
|
|
428
426
|
"/tcr/replicate_/i2.fastq.gz",
|
429
427
|
]
|
430
428
|
),
|
431
|
-
"Misc.": FacetConfig(
|
432
|
-
["/tcr/SampleSheet.csv", "/tcr_analysis/summary_info.csv"]
|
433
|
-
),
|
429
|
+
"Misc.": FacetConfig(["/tcr/SampleSheet.csv", "/tcr_analysis/summary_info.csv"]),
|
434
430
|
"Analysis Data": FacetConfig(
|
435
431
|
["/tcr_analysis/tra_clone.csv", "/tcr_analysis/trb_clone.csv"],
|
436
432
|
"Data files indicating TRA & TRB clones' UMI counts",
|
437
433
|
),
|
438
|
-
"Reports": FacetConfig(
|
439
|
-
["/tcr_analysis/report_trial.tar.gz"], "Report from TCRseq analysis"
|
440
|
-
),
|
434
|
+
"Reports": FacetConfig(["/tcr_analysis/report_trial.tar.gz"], "Report from TCRseq analysis"),
|
441
435
|
},
|
442
436
|
"ELISA": {"Data": FacetConfig(["/elisa/assay.xlsx"])},
|
443
437
|
"ctDNA": {
|
@@ -564,9 +558,7 @@ def _build_facet_groups_to_names():
|
|
564
558
|
for facet_name, subfacet in facets_dict["Assay Type"].items():
|
565
559
|
for subfacet_name, subsubfacet in subfacet.items():
|
566
560
|
for facet_group in subsubfacet.facet_groups:
|
567
|
-
facet_names[facet_group] = FACET_NAME_DELIM.join(
|
568
|
-
[facet_name, subfacet_name]
|
569
|
-
)
|
561
|
+
facet_names[facet_group] = FACET_NAME_DELIM.join([facet_name, subfacet_name])
|
570
562
|
|
571
563
|
for facet_name, subfacet in facets_dict["Clinical Type"].items():
|
572
564
|
for facet_group in subfacet.facet_groups:
|
@@ -599,14 +591,9 @@ def build_data_category_facets(facet_group_file_counts: Dict[str, int]):
|
|
599
591
|
def extract_facet_info(facet_config_entries, _prefix):
|
600
592
|
results = []
|
601
593
|
for label, config in facet_config_entries.items():
|
602
|
-
count = sum(
|
603
|
-
facet_group_file_counts.get(facet_group, 0)
|
604
|
-
for facet_group in config.facet_groups
|
605
|
-
)
|
594
|
+
count = sum(facet_group_file_counts.get(facet_group, 0) for facet_group in config.facet_groups)
|
606
595
|
if count:
|
607
|
-
results.append(
|
608
|
-
{"label": label, "description": config.description, "count": count}
|
609
|
-
)
|
596
|
+
results.append({"label": label, "description": config.description, "count": count})
|
610
597
|
return results
|
611
598
|
|
612
599
|
assay_types = {}
|
@@ -632,10 +619,7 @@ def build_trial_facets(trial_file_counts: Dict[str, int]):
|
|
632
619
|
"""
|
633
620
|
Convert a mapping from trial ids to file counts into a list of facet specifications.
|
634
621
|
"""
|
635
|
-
return [
|
636
|
-
{"label": trial_id, "count": count}
|
637
|
-
for trial_id, count in trial_file_counts.items()
|
638
|
-
]
|
622
|
+
return [{"label": trial_id, "count": count} for trial_id, count in trial_file_counts.items()]
|
639
623
|
|
640
624
|
|
641
625
|
def get_facet_groups_for_paths(paths: List[List[str]]) -> List[str]:
|
@@ -699,9 +683,7 @@ def get_facet_groups_for_links() -> Dict[str, Dict[str, List[str]]]:
|
|
699
683
|
for first, first_config in facets_dict[category].items():
|
700
684
|
assay = _translate_assay(first)
|
701
685
|
for facet, facet_config in first_config.items():
|
702
|
-
_process_facet(
|
703
|
-
assay, [category, first, facet], facet_config, facets_to_return
|
704
|
-
)
|
686
|
+
_process_facet(assay, [category, first, facet], facet_config, facets_to_return)
|
705
687
|
|
706
688
|
# run through all analysis facets and put them in the return
|
707
689
|
category: str = "Analysis Ready"
|
@@ -714,9 +696,7 @@ def get_facet_groups_for_links() -> Dict[str, Dict[str, List[str]]]:
|
|
714
696
|
category: str = "Clinical Type"
|
715
697
|
for facet, facet_config in facets_dict[category].items():
|
716
698
|
# these will all go into received, as none contain "analysis"
|
717
|
-
_process_facet(
|
718
|
-
"clinical_participants", [category, facet], facet_config, facets_to_return
|
719
|
-
)
|
699
|
+
_process_facet("clinical_participants", [category, facet], facet_config, facets_to_return)
|
720
700
|
|
721
701
|
# wes specific, use same values for wes_tumor received as for wes_normal received
|
722
702
|
# because facets refer to the WHOLE of WES assay, not broken up by sample type
|