nmdc-runtime 2.9.0__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/Dockerfile +167 -0
- nmdc_runtime/api/analytics.py +90 -0
- nmdc_runtime/api/boot/capabilities.py +9 -0
- nmdc_runtime/api/boot/object_types.py +126 -0
- nmdc_runtime/api/boot/triggers.py +84 -0
- nmdc_runtime/api/boot/workflows.py +116 -0
- nmdc_runtime/api/core/auth.py +208 -0
- nmdc_runtime/api/core/idgen.py +200 -0
- nmdc_runtime/api/core/metadata.py +788 -0
- nmdc_runtime/api/core/util.py +109 -0
- nmdc_runtime/api/db/mongo.py +435 -0
- nmdc_runtime/api/db/s3.py +37 -0
- nmdc_runtime/api/endpoints/capabilities.py +25 -0
- nmdc_runtime/api/endpoints/find.py +634 -0
- nmdc_runtime/api/endpoints/jobs.py +143 -0
- nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
- nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
- nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
- nmdc_runtime/api/endpoints/metadata.py +260 -0
- nmdc_runtime/api/endpoints/nmdcschema.py +502 -0
- nmdc_runtime/api/endpoints/object_types.py +38 -0
- nmdc_runtime/api/endpoints/objects.py +270 -0
- nmdc_runtime/api/endpoints/operations.py +78 -0
- nmdc_runtime/api/endpoints/queries.py +701 -0
- nmdc_runtime/api/endpoints/runs.py +98 -0
- nmdc_runtime/api/endpoints/search.py +38 -0
- nmdc_runtime/api/endpoints/sites.py +205 -0
- nmdc_runtime/api/endpoints/triggers.py +25 -0
- nmdc_runtime/api/endpoints/users.py +214 -0
- nmdc_runtime/api/endpoints/util.py +796 -0
- nmdc_runtime/api/endpoints/workflows.py +353 -0
- nmdc_runtime/api/entrypoint.sh +7 -0
- nmdc_runtime/api/main.py +425 -0
- nmdc_runtime/api/middleware.py +43 -0
- nmdc_runtime/api/models/capability.py +14 -0
- nmdc_runtime/api/models/id.py +92 -0
- nmdc_runtime/api/models/job.py +37 -0
- nmdc_runtime/api/models/lib/helpers.py +78 -0
- nmdc_runtime/api/models/metadata.py +11 -0
- nmdc_runtime/api/models/nmdc_schema.py +146 -0
- nmdc_runtime/api/models/object.py +180 -0
- nmdc_runtime/api/models/object_type.py +20 -0
- nmdc_runtime/api/models/operation.py +66 -0
- nmdc_runtime/api/models/query.py +246 -0
- nmdc_runtime/api/models/query_continuation.py +111 -0
- nmdc_runtime/api/models/run.py +161 -0
- nmdc_runtime/api/models/site.py +87 -0
- nmdc_runtime/api/models/trigger.py +13 -0
- nmdc_runtime/api/models/user.py +140 -0
- nmdc_runtime/api/models/util.py +260 -0
- nmdc_runtime/api/models/workflow.py +15 -0
- nmdc_runtime/api/openapi.py +178 -0
- nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
- nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
- nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
- nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
- nmdc_runtime/config.py +7 -8
- nmdc_runtime/minter/adapters/repository.py +22 -2
- nmdc_runtime/minter/config.py +2 -0
- nmdc_runtime/minter/domain/model.py +55 -1
- nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
- nmdc_runtime/mongo_util.py +1 -2
- nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
- nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
- nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
- nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
- nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
- nmdc_runtime/site/dagster.yaml +53 -0
- nmdc_runtime/site/entrypoint-daemon.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
- nmdc_runtime/site/entrypoint-dagit.sh +26 -0
- nmdc_runtime/site/export/ncbi_xml.py +633 -13
- nmdc_runtime/site/export/ncbi_xml_utils.py +115 -1
- nmdc_runtime/site/graphs.py +8 -22
- nmdc_runtime/site/ops.py +147 -181
- nmdc_runtime/site/repository.py +2 -112
- nmdc_runtime/site/resources.py +16 -3
- nmdc_runtime/site/translation/gold_translator.py +4 -12
- nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
- nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
- nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
- nmdc_runtime/site/translation/submission_portal_translator.py +84 -68
- nmdc_runtime/site/translation/translator.py +63 -1
- nmdc_runtime/site/util.py +8 -3
- nmdc_runtime/site/validation/util.py +10 -5
- nmdc_runtime/site/workspace.yaml +13 -0
- nmdc_runtime/static/NMDC_logo.svg +1073 -0
- nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
- nmdc_runtime/static/README.md +5 -0
- nmdc_runtime/static/favicon.ico +0 -0
- nmdc_runtime/util.py +90 -48
- nmdc_runtime-2.11.0.dist-info/METADATA +46 -0
- nmdc_runtime-2.11.0.dist-info/RECORD +128 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/WHEEL +1 -2
- nmdc_runtime/containers.py +0 -14
- nmdc_runtime/core/db/Database.py +0 -15
- nmdc_runtime/core/exceptions/__init__.py +0 -23
- nmdc_runtime/core/exceptions/base.py +0 -47
- nmdc_runtime/core/exceptions/token.py +0 -13
- nmdc_runtime/domain/users/queriesInterface.py +0 -18
- nmdc_runtime/domain/users/userSchema.py +0 -37
- nmdc_runtime/domain/users/userService.py +0 -14
- nmdc_runtime/infrastructure/database/db.py +0 -3
- nmdc_runtime/infrastructure/database/models/user.py +0 -10
- nmdc_runtime/lib/__init__.py +0 -1
- nmdc_runtime/lib/extract_nmdc_data.py +0 -41
- nmdc_runtime/lib/load_nmdc_data.py +0 -121
- nmdc_runtime/lib/nmdc_dataframes.py +0 -829
- nmdc_runtime/lib/nmdc_etl_class.py +0 -402
- nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
- nmdc_runtime/site/drsobjects/ingest.py +0 -93
- nmdc_runtime/site/drsobjects/registration.py +0 -131
- nmdc_runtime/site/translation/emsl.py +0 -43
- nmdc_runtime/site/translation/gold.py +0 -53
- nmdc_runtime/site/translation/jgi.py +0 -32
- nmdc_runtime/site/translation/util.py +0 -132
- nmdc_runtime/site/validation/jgi.py +0 -43
- nmdc_runtime-2.9.0.dist-info/METADATA +0 -214
- nmdc_runtime-2.9.0.dist-info/RECORD +0 -84
- nmdc_runtime-2.9.0.dist-info/top_level.txt +0 -1
- /nmdc_runtime/{client → api}/__init__.py +0 -0
- /nmdc_runtime/{core → api/boot}/__init__.py +0 -0
- /nmdc_runtime/{core/db → api/core}/__init__.py +0 -0
- /nmdc_runtime/{domain → api/db}/__init__.py +0 -0
- /nmdc_runtime/{domain/users → api/endpoints}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure → api/endpoints/lib}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database → api/models}/__init__.py +0 -0
- /nmdc_runtime/{infrastructure/database/models → api/models/lib}/__init__.py +0 -0
- /nmdc_runtime/{site/drsobjects/__init__.py → api/models/minter.py} +0 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.11.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from io import BytesIO, StringIO
|
|
2
|
-
from typing import Any, Dict, List
|
|
2
|
+
from typing import Any, Dict, List
|
|
3
3
|
|
|
4
4
|
from nmdc_runtime.api.endpoints.util import strip_oid
|
|
5
5
|
from nmdc_runtime.minter.config import typecodes
|
|
@@ -275,6 +275,120 @@ def load_mappings(url):
|
|
|
275
275
|
return attribute_mappings, slot_range_mappings
|
|
276
276
|
|
|
277
277
|
|
|
278
|
+
def check_pooling_for_biosamples(
|
|
279
|
+
material_processing_set: Collection, biosamples_list: List[Dict[str, Any]]
|
|
280
|
+
) -> Dict[str, Dict[str, Any]]:
|
|
281
|
+
"""Check which biosamples are part of pooling processes and return pooling information.
|
|
282
|
+
|
|
283
|
+
The way in which we check if a biosample is part of a Pooling process is by checking if
|
|
284
|
+
the biosample id has been asserted on the `has_input` slot/key of an `nmdc:Pooling` process
|
|
285
|
+
instance.
|
|
286
|
+
|
|
287
|
+
:param material_processing_set: reference to the material_processing_set collection
|
|
288
|
+
:param biosamples_list: list of all biosamples to check
|
|
289
|
+
:return: dictionary mapping biosample_id to pooling information (empty dict if not pooled)
|
|
290
|
+
"""
|
|
291
|
+
result = {}
|
|
292
|
+
# get list of all biosample IDs that are part of a given study
|
|
293
|
+
biosample_lookup = {bs["id"]: bs for bs in biosamples_list}
|
|
294
|
+
|
|
295
|
+
# get list of all pooling processes
|
|
296
|
+
pooling_processes = list(material_processing_set.find({"type": "nmdc:Pooling"}))
|
|
297
|
+
|
|
298
|
+
# initialize all biosamples as not pooled
|
|
299
|
+
for biosample in biosamples_list:
|
|
300
|
+
result[biosample["id"]] = {}
|
|
301
|
+
|
|
302
|
+
# process each pooling process
|
|
303
|
+
for pooling_process in pooling_processes:
|
|
304
|
+
pooled_biosample_ids = pooling_process.get("has_input", [])
|
|
305
|
+
|
|
306
|
+
# get the processed sample output from the pooling process
|
|
307
|
+
has_output = pooling_process.get("has_output", [])
|
|
308
|
+
processed_sample_id = None
|
|
309
|
+
|
|
310
|
+
for output_id in has_output:
|
|
311
|
+
if get_classname_from_typecode(output_id) == "ProcessedSample":
|
|
312
|
+
processed_sample_id = output_id
|
|
313
|
+
break
|
|
314
|
+
|
|
315
|
+
# aggregate the values on `collection_date` and `depth` slots
|
|
316
|
+
# here, we are collecting the `collection_date` and `depth` values
|
|
317
|
+
# asserted on each of the biosamples that are part of a given pooling
|
|
318
|
+
# process in the following way:
|
|
319
|
+
# example of aggregated `collection_date`: 2017-06-05T16:50Z/2017-06-05T17:47Z
|
|
320
|
+
# example of aggregated `depth`: 0-10 m
|
|
321
|
+
collection_dates = []
|
|
322
|
+
depths = []
|
|
323
|
+
|
|
324
|
+
for bs_id in pooled_biosample_ids:
|
|
325
|
+
biosample = biosample_lookup.get(bs_id)
|
|
326
|
+
if not biosample:
|
|
327
|
+
continue
|
|
328
|
+
|
|
329
|
+
if "collection_date" in biosample:
|
|
330
|
+
collection_date = biosample["collection_date"]
|
|
331
|
+
if (
|
|
332
|
+
isinstance(collection_date, dict)
|
|
333
|
+
and "has_raw_value" in collection_date
|
|
334
|
+
):
|
|
335
|
+
collection_dates.append(collection_date["has_raw_value"])
|
|
336
|
+
elif isinstance(collection_date, str):
|
|
337
|
+
collection_dates.append(collection_date)
|
|
338
|
+
|
|
339
|
+
if "depth" in biosample:
|
|
340
|
+
depth = biosample["depth"]
|
|
341
|
+
if isinstance(depth, dict):
|
|
342
|
+
if "has_numeric_value" in depth:
|
|
343
|
+
depths.append(depth["has_numeric_value"])
|
|
344
|
+
elif (
|
|
345
|
+
"has_minimum_numeric_value" in depth
|
|
346
|
+
and "has_maximum_numeric_value" in depth
|
|
347
|
+
):
|
|
348
|
+
depths.extend(
|
|
349
|
+
[
|
|
350
|
+
depth["has_minimum_numeric_value"],
|
|
351
|
+
depth["has_maximum_numeric_value"],
|
|
352
|
+
]
|
|
353
|
+
)
|
|
354
|
+
elif isinstance(depth, (int, float)):
|
|
355
|
+
depths.append(depth)
|
|
356
|
+
|
|
357
|
+
# create aggregated (forward slash separated) value for `collection_date`
|
|
358
|
+
aggregated_collection_date = None
|
|
359
|
+
if collection_dates:
|
|
360
|
+
sorted_dates = sorted(collection_dates)
|
|
361
|
+
if len(sorted_dates) > 1:
|
|
362
|
+
aggregated_collection_date = f"{sorted_dates[0]}/{sorted_dates[-1]}"
|
|
363
|
+
else:
|
|
364
|
+
aggregated_collection_date = sorted_dates[0]
|
|
365
|
+
|
|
366
|
+
# create aggregated (hyphen separated) value for `depth`
|
|
367
|
+
aggregated_depth = None
|
|
368
|
+
if depths:
|
|
369
|
+
min_depth = min(depths)
|
|
370
|
+
max_depth = max(depths)
|
|
371
|
+
if min_depth != max_depth:
|
|
372
|
+
aggregated_depth = f"{min_depth}-{max_depth} m"
|
|
373
|
+
else:
|
|
374
|
+
aggregated_depth = f"{min_depth} m"
|
|
375
|
+
|
|
376
|
+
# update all biosamples that are part of this pooling process
|
|
377
|
+
pooling_info = {
|
|
378
|
+
"processed_sample_id": processed_sample_id,
|
|
379
|
+
"pooling_process_id": pooling_process.get("id"),
|
|
380
|
+
"pooled_biosample_ids": pooled_biosample_ids,
|
|
381
|
+
"aggregated_collection_date": aggregated_collection_date,
|
|
382
|
+
"aggregated_depth": aggregated_depth,
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
for bs_id in pooled_biosample_ids:
|
|
386
|
+
if bs_id in result:
|
|
387
|
+
result[bs_id] = pooling_info
|
|
388
|
+
|
|
389
|
+
return result
|
|
390
|
+
|
|
391
|
+
|
|
278
392
|
def validate_xml(xml, xsd_url):
|
|
279
393
|
response = requests.get(xsd_url)
|
|
280
394
|
response.raise_for_status()
|
nmdc_runtime/site/graphs.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from dagster import graph
|
|
1
|
+
from dagster import graph
|
|
2
2
|
|
|
3
3
|
from nmdc_runtime.site.ops import (
|
|
4
|
-
build_merged_db,
|
|
5
4
|
generate_biosample_set_for_nmdc_study_from_gold,
|
|
6
5
|
nmdc_schema_database_export_filename,
|
|
7
6
|
nmdc_schema_database_from_gold_study,
|
|
@@ -12,8 +11,6 @@ from nmdc_runtime.site.ops import (
|
|
|
12
11
|
gold_projects_by_study,
|
|
13
12
|
gold_study,
|
|
14
13
|
poll_for_run_completion,
|
|
15
|
-
run_etl,
|
|
16
|
-
local_file_to_api_object,
|
|
17
14
|
get_operation,
|
|
18
15
|
produce_curated_db,
|
|
19
16
|
delete_operations,
|
|
@@ -56,6 +53,7 @@ from nmdc_runtime.site.ops import (
|
|
|
56
53
|
get_data_objects_from_biosamples,
|
|
57
54
|
get_nucleotide_sequencing_from_biosamples,
|
|
58
55
|
get_library_preparation_from_biosamples,
|
|
56
|
+
get_aggregated_pooled_biosamples,
|
|
59
57
|
get_all_instruments,
|
|
60
58
|
get_ncbi_export_pipeline_inputs,
|
|
61
59
|
ncbi_submission_xml_from_nmdc_study,
|
|
@@ -70,24 +68,6 @@ from nmdc_runtime.site.ops import (
|
|
|
70
68
|
from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id
|
|
71
69
|
|
|
72
70
|
|
|
73
|
-
@graph
|
|
74
|
-
def gold_translation():
|
|
75
|
-
"""
|
|
76
|
-
Translating an export of the JGI GOLD [1] SQL database to the NMDC database JSON schema.
|
|
77
|
-
|
|
78
|
-
[1] Genomes OnLine Database (GOLD) <https://gold.jgi.doe.gov/>.
|
|
79
|
-
"""
|
|
80
|
-
local_file_to_api_object(run_etl(build_merged_db()))
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
@graph()
|
|
84
|
-
def gold_translation_curation():
|
|
85
|
-
# TODO
|
|
86
|
-
# - have produce_curated_db do actual curation (see notebook), persisting to db.
|
|
87
|
-
# - more steps in pipeline? Or handoff via run_status_sensor on DagsterRunStatus.SUCCESS.
|
|
88
|
-
produce_curated_db(get_operation())
|
|
89
|
-
|
|
90
|
-
|
|
91
71
|
@graph()
|
|
92
72
|
def create_objects_from_site_object_puts():
|
|
93
73
|
delete_operations(
|
|
@@ -194,6 +174,7 @@ def translate_metadata_submission_to_nmdc_schema_database():
|
|
|
194
174
|
data_object_mapping_file_url,
|
|
195
175
|
biosample_extras_file_url,
|
|
196
176
|
biosample_extras_slot_mapping_file_url,
|
|
177
|
+
study_id,
|
|
197
178
|
) = get_submission_portal_pipeline_inputs()
|
|
198
179
|
|
|
199
180
|
metadata_submission = fetch_nmdc_portal_submission_by_id(submission_id)
|
|
@@ -214,6 +195,7 @@ def translate_metadata_submission_to_nmdc_schema_database():
|
|
|
214
195
|
biosample_extras=biosample_extras,
|
|
215
196
|
biosample_extras_slot_mapping=biosample_extras_slot_mapping,
|
|
216
197
|
instrument_mapping=instrument_mapping,
|
|
198
|
+
study_id=study_id,
|
|
217
199
|
)
|
|
218
200
|
|
|
219
201
|
validate_metadata(database)
|
|
@@ -234,6 +216,7 @@ def ingest_metadata_submission():
|
|
|
234
216
|
data_object_mapping_file_url,
|
|
235
217
|
biosample_extras_file_url,
|
|
236
218
|
biosample_extras_slot_mapping_file_url,
|
|
219
|
+
study_id,
|
|
237
220
|
) = get_submission_portal_pipeline_inputs()
|
|
238
221
|
|
|
239
222
|
metadata_submission = fetch_nmdc_portal_submission_by_id(submission_id)
|
|
@@ -254,6 +237,7 @@ def ingest_metadata_submission():
|
|
|
254
237
|
biosample_extras=biosample_extras,
|
|
255
238
|
biosample_extras_slot_mapping=biosample_extras_slot_mapping,
|
|
256
239
|
instrument_mapping=instrument_mapping,
|
|
240
|
+
study_id=study_id,
|
|
257
241
|
)
|
|
258
242
|
|
|
259
243
|
log_database_ids(database)
|
|
@@ -493,6 +477,7 @@ def nmdc_study_to_ncbi_submission_export():
|
|
|
493
477
|
)
|
|
494
478
|
data_object_records = get_data_objects_from_biosamples(biosamples)
|
|
495
479
|
library_preparation_records = get_library_preparation_from_biosamples(biosamples)
|
|
480
|
+
pooled_biosamples_data = get_aggregated_pooled_biosamples(biosamples)
|
|
496
481
|
all_instruments = get_all_instruments()
|
|
497
482
|
xml_data = ncbi_submission_xml_from_nmdc_study(
|
|
498
483
|
nmdc_study,
|
|
@@ -502,6 +487,7 @@ def nmdc_study_to_ncbi_submission_export():
|
|
|
502
487
|
data_object_records,
|
|
503
488
|
library_preparation_records,
|
|
504
489
|
all_instruments,
|
|
490
|
+
pooled_biosamples_data,
|
|
505
491
|
)
|
|
506
492
|
ncbi_submission_xml_asset(xml_data)
|
|
507
493
|
|