nmdc-runtime 2.6.0__py3-none-any.whl → 2.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/config.py +4 -0
- nmdc_runtime/mongo_util.py +90 -0
- nmdc_runtime/site/export/ncbi_xml.py +98 -27
- nmdc_runtime/site/export/ncbi_xml_utils.py +27 -25
- nmdc_runtime/site/graphs.py +42 -5
- nmdc_runtime/site/ops.py +405 -14
- nmdc_runtime/site/repair/database_updater.py +202 -1
- nmdc_runtime/site/repository.py +100 -1
- nmdc_runtime/site/resources.py +13 -0
- nmdc_runtime/site/translation/neon_benthic_translator.py +1 -0
- nmdc_runtime/site/translation/neon_soil_translator.py +1 -0
- nmdc_runtime/site/translation/neon_surface_water_translator.py +1 -0
- nmdc_runtime/util.py +56 -2
- {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.8.0.dist-info}/METADATA +18 -3
- {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.8.0.dist-info}/RECORD +19 -18
- {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.8.0.dist-info}/WHEEL +1 -1
- {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.8.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.8.0.dist-info}/licenses/LICENSE +0 -0
- {nmdc_runtime-2.6.0.dist-info → nmdc_runtime-2.8.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from functools import lru_cache
|
|
2
|
-
from typing import Any, Dict, List
|
|
2
|
+
from typing import Any, Dict, List, Union
|
|
3
3
|
import pandas as pd
|
|
4
4
|
from nmdc_runtime.site.resources import (
|
|
5
5
|
RuntimeApiUserClient,
|
|
@@ -240,3 +240,204 @@ class DatabaseUpdater:
|
|
|
240
240
|
]
|
|
241
241
|
|
|
242
242
|
return database
|
|
243
|
+
|
|
244
|
+
def queries_run_script_to_update_insdc_identifiers(
|
|
245
|
+
self,
|
|
246
|
+
) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
|
|
247
|
+
"""This method creates a `/queries:run` API endpoint compatible update script that can be run
|
|
248
|
+
using that API endpoint to update/add information on the `insdc_biosample_identifiers` field
|
|
249
|
+
of biosample_set records and the `insdc_bioproject_identifiers` field on data_generation_set records.
|
|
250
|
+
|
|
251
|
+
The information to be asserted is retrieved from the `ncbiBioSampleAccession` and
|
|
252
|
+
`ncbiBioProjectAccession` fields on the GOLD `/projects` API endpoint.
|
|
253
|
+
|
|
254
|
+
:return: A `/queries:run` update query compatible script serialized as a dictionary/JSON.
|
|
255
|
+
"""
|
|
256
|
+
# Fetch all biosamples associated with the study
|
|
257
|
+
biosample_set = self.runtime_api_user_client.get_biosamples_for_study(
|
|
258
|
+
self.study_id
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
# Fetch all data_generation records associated with the study
|
|
262
|
+
data_generation_set = (
|
|
263
|
+
self.runtime_api_user_client.get_data_generation_records_for_study(
|
|
264
|
+
self.study_id
|
|
265
|
+
)
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
biosample_updates = []
|
|
269
|
+
data_generation_updates = []
|
|
270
|
+
|
|
271
|
+
# Dictionary to store gold_project_id -> ncbi_bioproject_accession mapping
|
|
272
|
+
gold_project_to_bioproject = {}
|
|
273
|
+
|
|
274
|
+
# Dictionary to store all project data we gather during biosample processing
|
|
275
|
+
all_processed_projects = {}
|
|
276
|
+
|
|
277
|
+
# Process biosamples for insdc_biosample_identifiers
|
|
278
|
+
for biosample in biosample_set:
|
|
279
|
+
# get the list (usually one) of GOLD biosample identifiers on the gold_biosample_identifiers slot
|
|
280
|
+
gold_biosample_identifiers = biosample.get("gold_biosample_identifiers", [])
|
|
281
|
+
if not gold_biosample_identifiers:
|
|
282
|
+
continue
|
|
283
|
+
|
|
284
|
+
biosample_id = biosample.get("id")
|
|
285
|
+
if not biosample_id:
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
insdc_biosample_identifiers = []
|
|
289
|
+
|
|
290
|
+
for gold_biosample_id in gold_biosample_identifiers:
|
|
291
|
+
normalized_id = gold_biosample_id.replace("gold:", "")
|
|
292
|
+
|
|
293
|
+
# fetch projects associated with a GOLD biosample from the GOLD `/projects` API endpoint
|
|
294
|
+
gold_projects = self.gold_api_client.fetch_projects_by_biosample(
|
|
295
|
+
normalized_id
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
for project in gold_projects:
|
|
299
|
+
# Store each project for later use
|
|
300
|
+
project_gold_id = project.get("projectGoldId")
|
|
301
|
+
if project_gold_id:
|
|
302
|
+
all_processed_projects[project_gold_id] = project
|
|
303
|
+
|
|
304
|
+
# Collect ncbi_biosample_accession for biosample updates
|
|
305
|
+
ncbi_biosample_accession = project.get("ncbiBioSampleAccession")
|
|
306
|
+
if ncbi_biosample_accession and ncbi_biosample_accession.strip():
|
|
307
|
+
insdc_biosample_identifiers.append(ncbi_biosample_accession)
|
|
308
|
+
|
|
309
|
+
# Collect ncbi_bioproject_accession for data_generation records
|
|
310
|
+
ncbi_bioproject_accession = project.get("ncbiBioProjectAccession")
|
|
311
|
+
if (
|
|
312
|
+
project_gold_id
|
|
313
|
+
and ncbi_bioproject_accession
|
|
314
|
+
and ncbi_bioproject_accession.strip()
|
|
315
|
+
):
|
|
316
|
+
gold_project_to_bioproject[project_gold_id] = (
|
|
317
|
+
ncbi_bioproject_accession
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
if insdc_biosample_identifiers:
|
|
321
|
+
existing_insdc_biosample_identifiers = biosample.get(
|
|
322
|
+
"insdc_biosample_identifiers", []
|
|
323
|
+
)
|
|
324
|
+
new_insdc_biosample_identifiers = list(
|
|
325
|
+
set(insdc_biosample_identifiers)
|
|
326
|
+
- set(existing_insdc_biosample_identifiers)
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
if new_insdc_biosample_identifiers:
|
|
330
|
+
prefixed_new_biosample_identifiers = [
|
|
331
|
+
f"biosample:{id}" for id in new_insdc_biosample_identifiers
|
|
332
|
+
]
|
|
333
|
+
|
|
334
|
+
if existing_insdc_biosample_identifiers:
|
|
335
|
+
all_biosample_identifiers = list(
|
|
336
|
+
set(
|
|
337
|
+
existing_insdc_biosample_identifiers
|
|
338
|
+
+ prefixed_new_biosample_identifiers
|
|
339
|
+
)
|
|
340
|
+
)
|
|
341
|
+
biosample_updates.append(
|
|
342
|
+
{
|
|
343
|
+
"q": {"id": biosample_id},
|
|
344
|
+
"u": {
|
|
345
|
+
"$set": {
|
|
346
|
+
"insdc_biosample_identifiers": all_biosample_identifiers
|
|
347
|
+
}
|
|
348
|
+
},
|
|
349
|
+
}
|
|
350
|
+
)
|
|
351
|
+
else:
|
|
352
|
+
biosample_updates.append(
|
|
353
|
+
{
|
|
354
|
+
"q": {"id": biosample_id},
|
|
355
|
+
"u": {
|
|
356
|
+
"$set": {
|
|
357
|
+
"insdc_biosample_identifiers": prefixed_new_biosample_identifiers
|
|
358
|
+
}
|
|
359
|
+
},
|
|
360
|
+
}
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Process data_generation records for insdc_bioproject_identifiers
|
|
364
|
+
for data_generation in data_generation_set:
|
|
365
|
+
data_generation_id = data_generation.get("id")
|
|
366
|
+
if not data_generation_id:
|
|
367
|
+
continue
|
|
368
|
+
|
|
369
|
+
# Extract existing insdc_bioproject_identifiers
|
|
370
|
+
existing_insdc_bioproject_identifiers = data_generation.get(
|
|
371
|
+
"insdc_bioproject_identifiers", []
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
collected_insdc_bioproject_identifiers = set()
|
|
375
|
+
|
|
376
|
+
# Add any project identifiers already on the record
|
|
377
|
+
if "insdc_bioproject_identifiers" in data_generation:
|
|
378
|
+
for identifier in data_generation["insdc_bioproject_identifiers"]:
|
|
379
|
+
collected_insdc_bioproject_identifiers.add(identifier)
|
|
380
|
+
|
|
381
|
+
# If there are gold_sequencing_project_identifiers, use our pre-collected mapping
|
|
382
|
+
gold_project_identifiers = data_generation.get(
|
|
383
|
+
"gold_sequencing_project_identifiers", []
|
|
384
|
+
)
|
|
385
|
+
for gold_project_id in gold_project_identifiers:
|
|
386
|
+
normalized_id = gold_project_id.replace("gold:", "")
|
|
387
|
+
|
|
388
|
+
# Check if we have a bioproject ID for this GOLD project ID
|
|
389
|
+
if normalized_id in gold_project_to_bioproject:
|
|
390
|
+
ncbi_bioproject_accession = gold_project_to_bioproject[
|
|
391
|
+
normalized_id
|
|
392
|
+
]
|
|
393
|
+
collected_insdc_bioproject_identifiers.add(
|
|
394
|
+
f"bioproject:{ncbi_bioproject_accession}"
|
|
395
|
+
)
|
|
396
|
+
else:
|
|
397
|
+
# Only if we don't have it in our mapping, try to fetch it
|
|
398
|
+
# Instead of making a direct API request, check if we've already seen this project
|
|
399
|
+
if normalized_id in all_processed_projects:
|
|
400
|
+
project_data = all_processed_projects[normalized_id]
|
|
401
|
+
ncbi_bioproject_accession = project_data.get(
|
|
402
|
+
"ncbiBioProjectAccession"
|
|
403
|
+
)
|
|
404
|
+
if (
|
|
405
|
+
ncbi_bioproject_accession
|
|
406
|
+
and ncbi_bioproject_accession.strip()
|
|
407
|
+
):
|
|
408
|
+
collected_insdc_bioproject_identifiers.add(
|
|
409
|
+
f"bioproject:{ncbi_bioproject_accession}"
|
|
410
|
+
)
|
|
411
|
+
# Add to our mapping for future reference
|
|
412
|
+
gold_project_to_bioproject[normalized_id] = (
|
|
413
|
+
ncbi_bioproject_accession
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# Create a list from the set of collected identifiers
|
|
417
|
+
collected_insdc_bioproject_identifiers = list(
|
|
418
|
+
collected_insdc_bioproject_identifiers
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# Only update if there are identifiers to add
|
|
422
|
+
if collected_insdc_bioproject_identifiers and set(
|
|
423
|
+
collected_insdc_bioproject_identifiers
|
|
424
|
+
) != set(existing_insdc_bioproject_identifiers):
|
|
425
|
+
data_generation_updates.append(
|
|
426
|
+
{
|
|
427
|
+
"q": {"id": data_generation_id},
|
|
428
|
+
"u": {
|
|
429
|
+
"$set": {
|
|
430
|
+
"insdc_bioproject_identifiers": collected_insdc_bioproject_identifiers
|
|
431
|
+
}
|
|
432
|
+
},
|
|
433
|
+
}
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
# Return updates for both collections
|
|
437
|
+
if data_generation_updates:
|
|
438
|
+
return [
|
|
439
|
+
{"update": "biosample_set", "updates": biosample_updates},
|
|
440
|
+
{"update": "data_generation_set", "updates": data_generation_updates},
|
|
441
|
+
]
|
|
442
|
+
else:
|
|
443
|
+
return {"update": "biosample_set", "updates": biosample_updates}
|
nmdc_runtime/site/repository.py
CHANGED
|
@@ -14,6 +14,7 @@ from dagster import (
|
|
|
14
14
|
DagsterRunStatus,
|
|
15
15
|
RunStatusSensorContext,
|
|
16
16
|
DefaultSensorStatus,
|
|
17
|
+
in_process_executor,
|
|
17
18
|
)
|
|
18
19
|
from starlette import status
|
|
19
20
|
from toolz import merge, get_in
|
|
@@ -44,8 +45,10 @@ from nmdc_runtime.site.graphs import (
|
|
|
44
45
|
ingest_neon_benthic_metadata,
|
|
45
46
|
ingest_neon_surface_water_metadata,
|
|
46
47
|
ensure_alldocs,
|
|
48
|
+
run_ontology_load,
|
|
47
49
|
nmdc_study_to_ncbi_submission_export,
|
|
48
50
|
generate_data_generation_set_for_biosamples_in_nmdc_study,
|
|
51
|
+
generate_update_script_for_insdc_biosample_identifiers,
|
|
49
52
|
)
|
|
50
53
|
from nmdc_runtime.site.resources import (
|
|
51
54
|
get_mongo,
|
|
@@ -123,6 +126,55 @@ ensure_alldocs_daily = ScheduleDefinition(
|
|
|
123
126
|
)
|
|
124
127
|
|
|
125
128
|
|
|
129
|
+
load_envo_ontology_weekly = ScheduleDefinition(
|
|
130
|
+
name="weekly_load_envo_ontology",
|
|
131
|
+
cron_schedule="0 7 * * 1",
|
|
132
|
+
execution_timezone="America/New_York",
|
|
133
|
+
job=run_ontology_load.to_job(
|
|
134
|
+
name="scheduled_envo_ontology_load",
|
|
135
|
+
config=unfreeze(
|
|
136
|
+
merge(
|
|
137
|
+
run_config_frozen__normal_env,
|
|
138
|
+
{"ops": {"load_ontology": {"config": {"source_ontology": "envo"}}}},
|
|
139
|
+
)
|
|
140
|
+
),
|
|
141
|
+
resource_defs=resource_defs,
|
|
142
|
+
),
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
load_uberon_ontology_weekly = ScheduleDefinition(
|
|
146
|
+
name="weekly_load_uberon_ontology",
|
|
147
|
+
cron_schedule="0 8 * * 1",
|
|
148
|
+
execution_timezone="America/New_York",
|
|
149
|
+
job=run_ontology_load.to_job(
|
|
150
|
+
name="scheduled_uberon_ontology_load",
|
|
151
|
+
config=unfreeze(
|
|
152
|
+
merge(
|
|
153
|
+
run_config_frozen__normal_env,
|
|
154
|
+
{"ops": {"load_ontology": {"config": {"source_ontology": "uberon"}}}},
|
|
155
|
+
)
|
|
156
|
+
),
|
|
157
|
+
resource_defs=resource_defs,
|
|
158
|
+
),
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
load_po_ontology_weekly = ScheduleDefinition(
|
|
162
|
+
name="weekly_load_po_ontology",
|
|
163
|
+
cron_schedule="0 9 * * 1",
|
|
164
|
+
execution_timezone="America/New_York",
|
|
165
|
+
job=run_ontology_load.to_job(
|
|
166
|
+
name="scheduled_po_ontology_load",
|
|
167
|
+
config=unfreeze(
|
|
168
|
+
merge(
|
|
169
|
+
run_config_frozen__normal_env,
|
|
170
|
+
{"ops": {"load_ontology": {"config": {"source_ontology": "po"}}}},
|
|
171
|
+
)
|
|
172
|
+
),
|
|
173
|
+
resource_defs=resource_defs,
|
|
174
|
+
),
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
126
178
|
def asset_materialization_metadata(asset_event, key):
|
|
127
179
|
"""Get metadata from an asset materialization event.
|
|
128
180
|
|
|
@@ -462,7 +514,13 @@ def repo():
|
|
|
462
514
|
export_study_biosamples_metadata.to_job(**preset_normal),
|
|
463
515
|
ensure_alldocs.to_job(**preset_normal),
|
|
464
516
|
]
|
|
465
|
-
schedules = [
|
|
517
|
+
schedules = [
|
|
518
|
+
housekeeping_weekly,
|
|
519
|
+
ensure_alldocs_daily,
|
|
520
|
+
load_envo_ontology_weekly,
|
|
521
|
+
load_uberon_ontology_weekly,
|
|
522
|
+
load_po_ontology_weekly,
|
|
523
|
+
]
|
|
466
524
|
sensors = [
|
|
467
525
|
done_object_put_ops,
|
|
468
526
|
ensure_gold_translation_job,
|
|
@@ -1008,6 +1066,47 @@ def database_records_stitching():
|
|
|
1008
1066
|
},
|
|
1009
1067
|
},
|
|
1010
1068
|
),
|
|
1069
|
+
generate_update_script_for_insdc_biosample_identifiers.to_job(
|
|
1070
|
+
description="This job generates a MongoDB update script to add INSDC biosample identifiers to biosamples based on GOLD data.",
|
|
1071
|
+
resource_defs=resource_defs,
|
|
1072
|
+
config={
|
|
1073
|
+
"resources": merge(
|
|
1074
|
+
unfreeze(normal_resources),
|
|
1075
|
+
{
|
|
1076
|
+
"runtime_api_user_client": {
|
|
1077
|
+
"config": {
|
|
1078
|
+
"base_url": {"env": "API_HOST"},
|
|
1079
|
+
"username": {"env": "API_ADMIN_USER"},
|
|
1080
|
+
"password": {"env": "API_ADMIN_PASS"},
|
|
1081
|
+
},
|
|
1082
|
+
},
|
|
1083
|
+
"runtime_api_site_client": {
|
|
1084
|
+
"config": {
|
|
1085
|
+
"base_url": {"env": "API_HOST"},
|
|
1086
|
+
"client_id": {"env": "API_SITE_CLIENT_ID"},
|
|
1087
|
+
"client_secret": {"env": "API_SITE_CLIENT_SECRET"},
|
|
1088
|
+
"site_id": {"env": "API_SITE_ID"},
|
|
1089
|
+
},
|
|
1090
|
+
},
|
|
1091
|
+
"gold_api_client": {
|
|
1092
|
+
"config": {
|
|
1093
|
+
"base_url": {"env": "GOLD_API_BASE_URL"},
|
|
1094
|
+
"username": {"env": "GOLD_API_USERNAME"},
|
|
1095
|
+
"password": {"env": "GOLD_API_PASSWORD"},
|
|
1096
|
+
},
|
|
1097
|
+
},
|
|
1098
|
+
},
|
|
1099
|
+
),
|
|
1100
|
+
"ops": {
|
|
1101
|
+
"get_database_updater_inputs": {
|
|
1102
|
+
"config": {
|
|
1103
|
+
"nmdc_study_id": "",
|
|
1104
|
+
"gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/refs/heads/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
|
|
1105
|
+
}
|
|
1106
|
+
},
|
|
1107
|
+
},
|
|
1108
|
+
},
|
|
1109
|
+
),
|
|
1011
1110
|
]
|
|
1012
1111
|
|
|
1013
1112
|
|
nmdc_runtime/site/resources.py
CHANGED
|
@@ -147,6 +147,19 @@ class RuntimeApiUserClient(RuntimeApiClient):
|
|
|
147
147
|
response.raise_for_status()
|
|
148
148
|
return response.json()["resources"]
|
|
149
149
|
|
|
150
|
+
def get_data_generation_records_for_study(self, study_id: str):
|
|
151
|
+
# TODO: same as above, we are using a large max_page_size to avoid pagination.
|
|
152
|
+
response = self.request(
|
|
153
|
+
"GET",
|
|
154
|
+
f"/nmdcschema/data_generation_set",
|
|
155
|
+
{
|
|
156
|
+
"filter": json.dumps({"associated_studies": study_id}),
|
|
157
|
+
"max_page_size": 10000,
|
|
158
|
+
},
|
|
159
|
+
)
|
|
160
|
+
response.raise_for_status()
|
|
161
|
+
return response.json()["resources"]
|
|
162
|
+
|
|
150
163
|
def get_omics_processing_by_name(self, name: str):
|
|
151
164
|
response = self.request(
|
|
152
165
|
"POST",
|
|
@@ -349,6 +349,7 @@ class NeonBenthicDataTranslator(Translator):
|
|
|
349
349
|
description=f"sequencing results for {basename}",
|
|
350
350
|
type="nmdc:DataObject",
|
|
351
351
|
data_object_type=do_type,
|
|
352
|
+
data_category=nmdc.DataCategoryEnum.instrument_data.text,
|
|
352
353
|
in_manifest=manifest_id,
|
|
353
354
|
)
|
|
354
355
|
|
|
@@ -397,6 +397,7 @@ class NeonSurfaceWaterDataTranslator(Translator):
|
|
|
397
397
|
description=f"sequencing results for {basename}",
|
|
398
398
|
type="nmdc:DataObject",
|
|
399
399
|
data_object_type=do_type,
|
|
400
|
+
data_category=nmdc.DataCategoryEnum.instrument_data.text,
|
|
400
401
|
in_manifest=manifest_id,
|
|
401
402
|
)
|
|
402
403
|
|
nmdc_runtime/util.py
CHANGED
|
@@ -11,7 +11,7 @@ from io import BytesIO
|
|
|
11
11
|
from itertools import chain
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from uuid import uuid4
|
|
14
|
-
from typing import List, Optional, Set, Dict
|
|
14
|
+
from typing import Callable, List, Optional, Set, Dict
|
|
15
15
|
|
|
16
16
|
import fastjsonschema
|
|
17
17
|
import requests
|
|
@@ -510,7 +510,27 @@ def ensure_unique_id_indexes(mdb: MongoDatabase):
|
|
|
510
510
|
collection_name in schema_collection_names_with_id_field()
|
|
511
511
|
or all_docs_have_unique_id(mdb[collection_name])
|
|
512
512
|
):
|
|
513
|
-
|
|
513
|
+
# Check if index already exists, and if so, drop it if not unique
|
|
514
|
+
try:
|
|
515
|
+
existing_indexes = list(mdb[collection_name].list_indexes())
|
|
516
|
+
id_index = next(
|
|
517
|
+
(idx for idx in existing_indexes if idx["name"] == "id_1"), None
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
if id_index:
|
|
521
|
+
# If index exists but isn't unique, drop it so we can recreate
|
|
522
|
+
if not id_index.get("unique", False):
|
|
523
|
+
mdb[collection_name].drop_index("id_1")
|
|
524
|
+
|
|
525
|
+
# Create index with unique constraint
|
|
526
|
+
mdb[collection_name].create_index("id", unique=True)
|
|
527
|
+
except OperationFailure as e:
|
|
528
|
+
# If error is about index with same name, just continue
|
|
529
|
+
if "An existing index has the same name" in str(e):
|
|
530
|
+
continue
|
|
531
|
+
else:
|
|
532
|
+
# Re-raise other errors
|
|
533
|
+
raise
|
|
514
534
|
|
|
515
535
|
|
|
516
536
|
class UpdateStatement(BaseModel):
|
|
@@ -776,3 +796,37 @@ def validate_json(
|
|
|
776
796
|
return {"result": "All Okay!"}
|
|
777
797
|
else:
|
|
778
798
|
return {"result": "errors", "detail": validation_errors}
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
def decorate_if(condition: bool = False) -> Callable:
|
|
802
|
+
r"""
|
|
803
|
+
Decorator that applies another decorator only when `condition` is `True`.
|
|
804
|
+
|
|
805
|
+
Note: We implemented this so we could conditionally register
|
|
806
|
+
endpoints with FastAPI's `@router`.
|
|
807
|
+
|
|
808
|
+
Example usages:
|
|
809
|
+
A. Apply the `@router.get` decorator:
|
|
810
|
+
```python
|
|
811
|
+
@decorate_if(True)(router.get("/me"))
|
|
812
|
+
def get_me(...):
|
|
813
|
+
...
|
|
814
|
+
```
|
|
815
|
+
B. Bypass the `@router.get` decorator:
|
|
816
|
+
```python
|
|
817
|
+
@decorate_if(False)(router.get("/me"))
|
|
818
|
+
def get_me(...):
|
|
819
|
+
...
|
|
820
|
+
```
|
|
821
|
+
"""
|
|
822
|
+
|
|
823
|
+
def apply_original_decorator(original_decorator: Callable) -> Callable:
|
|
824
|
+
def check_condition(original_function: Callable) -> Callable:
|
|
825
|
+
if condition:
|
|
826
|
+
return original_decorator(original_function)
|
|
827
|
+
else:
|
|
828
|
+
return original_function
|
|
829
|
+
|
|
830
|
+
return check_condition
|
|
831
|
+
|
|
832
|
+
return apply_original_decorator
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nmdc_runtime
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.8.0
|
|
4
4
|
Summary: A runtime system for NMDC data management and orchestration
|
|
5
5
|
Home-page: https://github.com/microbiomedata/nmdc-runtime
|
|
6
6
|
Author: Donny Winston
|
|
@@ -127,7 +127,8 @@ source .env
|
|
|
127
127
|
set +a
|
|
128
128
|
```
|
|
129
129
|
|
|
130
|
-
If you are connecting to resources that require an SSH tunnel—for example, a MongoDB server that is only accessible on
|
|
130
|
+
If you are connecting to resources that require an SSH tunnel—for example, a MongoDB server that is only accessible on
|
|
131
|
+
the NERSC network—set up the SSH tunnel.
|
|
131
132
|
|
|
132
133
|
The following command could be useful to you, either directly or as a template (see `Makefile`).
|
|
133
134
|
|
|
@@ -149,6 +150,19 @@ The Dagit web server is viewable at http://127.0.0.1:3000/.
|
|
|
149
150
|
The FastAPI service is viewable at http://127.0.0.1:8000/ -- e.g., rendered documentation at
|
|
150
151
|
http://127.0.0.1:8000/redoc/.
|
|
151
152
|
|
|
153
|
+
|
|
154
|
+
* NOTE: Any time you add or change requirements in requirements/main.in or requirements/dev.in, you must run:
|
|
155
|
+
```
|
|
156
|
+
pip-compile --build-isolation --allow-unsafe --resolver=backtracking --strip-extras --output-file requirements/[main|dev].txt requirements/[main|dev].in
|
|
157
|
+
```
|
|
158
|
+
to generate main.txt and dev.txt files respectively. main.in is kind of like a poetry dependency stanza, dev.in is kind
|
|
159
|
+
of like poetry dev.dependencies stanza. main.txt and dev.txt are kind of like poetry.lock files to specify the exact
|
|
160
|
+
versions of dependencies to use. main.txt and dev.txt are combined in the docker compose build process to create the
|
|
161
|
+
final requirements.txt file and import the dependencies into the Docker image.
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
|
|
152
166
|
## Local Testing
|
|
153
167
|
|
|
154
168
|
Tests can be found in `tests` and are run with the following commands:
|
|
@@ -160,12 +174,13 @@ make test
|
|
|
160
174
|
# Run a Specific test file eg. tests/test_api/test_endpoints.py
|
|
161
175
|
make test ARGS="tests/test_api/test_endpoints.py"
|
|
162
176
|
```
|
|
177
|
+
docker compose --file docker-compose.test.yml run test
|
|
163
178
|
|
|
164
179
|
As you create Dagster solids and pipelines, add tests in `tests/` to check that your code behaves as
|
|
165
180
|
desired and does not break over time.
|
|
166
181
|
|
|
167
182
|
[For hints on how to write tests for solids and pipelines in Dagster, see their documentation
|
|
168
|
-
tutorial on Testing](https://docs.dagster.io/
|
|
183
|
+
tutorial on Testing](https://docs.dagster.io/guides/test/unit-testing-assets-and-ops).
|
|
169
184
|
|
|
170
185
|
### RAM usage
|
|
171
186
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
nmdc_runtime/config.py,sha256=
|
|
2
|
+
nmdc_runtime/config.py,sha256=GKmovwYD3tIiUQX-mAOcHI8NMEMLhogjHDB9I8azA4c,195
|
|
3
3
|
nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
|
|
4
4
|
nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
nmdc_runtime/
|
|
5
|
+
nmdc_runtime/mongo_util.py,sha256=7NRvqFE8W2CUcpcXAA4KElUACIdAkBehZ9TBG4k7zNE,3000
|
|
6
|
+
nmdc_runtime/util.py,sha256=FfGNfcnHKS6Yzuwbdj0FtCcL-ks9HUjwWUfsPs1H2ao,33285
|
|
6
7
|
nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
8
|
nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
9
|
nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
|
|
@@ -36,10 +37,10 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
|
|
|
36
37
|
nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
38
|
nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
|
|
38
39
|
nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
nmdc_runtime/site/graphs.py,sha256=
|
|
40
|
-
nmdc_runtime/site/ops.py,sha256=
|
|
41
|
-
nmdc_runtime/site/repository.py,sha256=
|
|
42
|
-
nmdc_runtime/site/resources.py,sha256=
|
|
40
|
+
nmdc_runtime/site/graphs.py,sha256=cJfLCRYH6l3SW-0MYIOihORit6Fe_gziwQ6BJaph55c,17713
|
|
41
|
+
nmdc_runtime/site/ops.py,sha256=m9p8dlfNVpdEyu0o06cT9jMLkjZh0GGFxEQxDuDPUaA,65917
|
|
42
|
+
nmdc_runtime/site/repository.py,sha256=ZkIykDDaFTxB4QW1Eo_w-9IywQrXXTV7Ugogf8vQ604,47439
|
|
43
|
+
nmdc_runtime/site/resources.py,sha256=2R9X-06f9ZpDWYKltOkl_IIAScQGEEbsZF-URm4O6dM,20164
|
|
43
44
|
nmdc_runtime/site/util.py,sha256=h70UJCT9g-I63EJn0drZjv1iaQ8LHJTbG29R9kqJ04c,1821
|
|
44
45
|
nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
46
|
nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
|
|
@@ -51,21 +52,21 @@ nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
51
52
|
nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
|
|
52
53
|
nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
|
|
53
54
|
nmdc_runtime/site/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
-
nmdc_runtime/site/export/ncbi_xml.py,sha256=
|
|
55
|
-
nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=
|
|
55
|
+
nmdc_runtime/site/export/ncbi_xml.py,sha256=iZQHBr3LL5Q32I2L_Xpfp9n4ZtgAz_MwrlxIF5do7Pw,29715
|
|
56
|
+
nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=RnoAW0HQwBG6JR63d9muI18RIC114wnX3iYPqOllw44,10700
|
|
56
57
|
nmdc_runtime/site/export/study_metadata.py,sha256=yR5pXL6JG8d7cAtqcF-60Hp7bLD3dJ0Rut4AtYc0tXA,4844
|
|
57
58
|
nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
59
|
nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
|
|
59
60
|
nmdc_runtime/site/repair/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
|
-
nmdc_runtime/site/repair/database_updater.py,sha256=
|
|
61
|
+
nmdc_runtime/site/repair/database_updater.py,sha256=gRZ-NxZzXNd-vTIuygabEUqUSiF9eL4hL2rI9Qdf2WI,20764
|
|
61
62
|
nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
63
|
nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
|
|
63
64
|
nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
|
|
64
65
|
nmdc_runtime/site/translation/gold_translator.py,sha256=HGbWeuxppqlVfU8F5oKTYIDoC6qaftugJeWFIALB9XE,32720
|
|
65
66
|
nmdc_runtime/site/translation/jgi.py,sha256=qk878KhIw674TkrVfbl2x1QJrKi3zlvE0vesIpe9slM,876
|
|
66
|
-
nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=
|
|
67
|
-
nmdc_runtime/site/translation/neon_soil_translator.py,sha256=
|
|
68
|
-
nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=
|
|
67
|
+
nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=8_QF75Gf-dc2xVeO6jzTmdDrlGdh1-QrLJKG2SwUhCA,23797
|
|
68
|
+
nmdc_runtime/site/translation/neon_soil_translator.py,sha256=IMeq4ABgWaSUbB_gmG8vBCMeynQSlbCUw9p2be6o8kE,38620
|
|
69
|
+
nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=Js8_r6vHBW8b-_BpFySTUuYOFe7r51k8HwaNCQ7nAAg,30587
|
|
69
70
|
nmdc_runtime/site/translation/neon_utils.py,sha256=d00o7duKKugpLHmsEifNbp4WjeC4GOqcgw0b5qlCg4I,5549
|
|
70
71
|
nmdc_runtime/site/translation/submission_portal_translator.py,sha256=UEeqlkz_YGqcnx8vomFysetOlXxDu23q0Ryr93SZy78,41684
|
|
71
72
|
nmdc_runtime/site/translation/translator.py,sha256=V6Aq0y03LoQ4LTL2iHDHxGTh_eMjOmDJJSwNHSrp2wo,837
|
|
@@ -75,9 +76,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
|
|
|
75
76
|
nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
|
|
76
77
|
nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
|
|
77
78
|
nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
|
|
78
|
-
nmdc_runtime-2.
|
|
79
|
-
nmdc_runtime-2.
|
|
80
|
-
nmdc_runtime-2.
|
|
81
|
-
nmdc_runtime-2.
|
|
82
|
-
nmdc_runtime-2.
|
|
83
|
-
nmdc_runtime-2.
|
|
79
|
+
nmdc_runtime-2.8.0.dist-info/licenses/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
|
|
80
|
+
nmdc_runtime-2.8.0.dist-info/METADATA,sha256=B8Vhde36JVAAwdCqKkcFaTyF13D0uWL8KEQnsyJUajc,8953
|
|
81
|
+
nmdc_runtime-2.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
82
|
+
nmdc_runtime-2.8.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
|
|
83
|
+
nmdc_runtime-2.8.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
|
|
84
|
+
nmdc_runtime-2.8.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|