nmdc-runtime 1.9.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/minter/config.py +59 -3
- nmdc_runtime/site/export/ncbi_xml.py +29 -25
- nmdc_runtime/site/export/ncbi_xml_utils.py +5 -5
- nmdc_runtime/site/export/study_metadata.py +3 -1
- nmdc_runtime/site/graphs.py +71 -15
- nmdc_runtime/site/ops.py +135 -42
- nmdc_runtime/site/repository.py +16 -4
- nmdc_runtime/site/translation/gold_translator.py +112 -43
- nmdc_runtime/site/translation/neon_benthic_translator.py +59 -34
- nmdc_runtime/site/translation/neon_soil_translator.py +72 -48
- nmdc_runtime/site/translation/neon_surface_water_translator.py +61 -32
- nmdc_runtime/site/translation/neon_utils.py +19 -6
- nmdc_runtime/site/translation/submission_portal_translator.py +67 -36
- {nmdc_runtime-1.9.0.dist-info → nmdc_runtime-2.0.0.dist-info}/METADATA +1 -1
- {nmdc_runtime-1.9.0.dist-info → nmdc_runtime-2.0.0.dist-info}/RECORD +19 -19
- {nmdc_runtime-1.9.0.dist-info → nmdc_runtime-2.0.0.dist-info}/WHEEL +1 -1
- {nmdc_runtime-1.9.0.dist-info → nmdc_runtime-2.0.0.dist-info}/LICENSE +0 -0
- {nmdc_runtime-1.9.0.dist-info → nmdc_runtime-2.0.0.dist-info}/entry_points.txt +0 -0
- {nmdc_runtime-1.9.0.dist-info → nmdc_runtime-2.0.0.dist-info}/top_level.txt +0 -0
nmdc_runtime/site/ops.py
CHANGED
|
@@ -9,6 +9,7 @@ from datetime import datetime, timezone
|
|
|
9
9
|
from io import BytesIO, StringIO
|
|
10
10
|
from typing import Tuple
|
|
11
11
|
from zipfile import ZipFile
|
|
12
|
+
from itertools import chain
|
|
12
13
|
|
|
13
14
|
import pandas as pd
|
|
14
15
|
import requests
|
|
@@ -65,7 +66,7 @@ from nmdc_runtime.api.models.util import ResultT
|
|
|
65
66
|
from nmdc_runtime.site.export.ncbi_xml import NCBISubmissionXML
|
|
66
67
|
from nmdc_runtime.site.export.ncbi_xml_utils import (
|
|
67
68
|
fetch_data_objects_from_biosamples,
|
|
68
|
-
|
|
69
|
+
fetch_nucleotide_sequencing_from_biosamples,
|
|
69
70
|
fetch_library_preparation_from_biosamples,
|
|
70
71
|
)
|
|
71
72
|
from nmdc_runtime.site.drsobjects.ingest import mongo_add_docs_result_as_dict
|
|
@@ -582,9 +583,24 @@ def add_output_run_event(context: OpExecutionContext, outputs: List[str]):
|
|
|
582
583
|
context.log.info(f"No NMDC RunEvent doc for Dagster Run {context.run_id}")
|
|
583
584
|
|
|
584
585
|
|
|
585
|
-
@op(
|
|
586
|
-
|
|
587
|
-
|
|
586
|
+
@op(
|
|
587
|
+
config_schema={
|
|
588
|
+
"study_id": str,
|
|
589
|
+
"study_type": str,
|
|
590
|
+
"gold_nmdc_instrument_mapping_file_url": str,
|
|
591
|
+
},
|
|
592
|
+
out={
|
|
593
|
+
"study_id": Out(str),
|
|
594
|
+
"study_type": Out(str),
|
|
595
|
+
"gold_nmdc_instrument_mapping_file_url": Out(str),
|
|
596
|
+
},
|
|
597
|
+
)
|
|
598
|
+
def get_gold_study_pipeline_inputs(context: OpExecutionContext) -> Tuple[str, str, str]:
|
|
599
|
+
return (
|
|
600
|
+
context.op_config["study_id"],
|
|
601
|
+
context.op_config["study_type"],
|
|
602
|
+
context.op_config["gold_nmdc_instrument_mapping_file_url"],
|
|
603
|
+
)
|
|
588
604
|
|
|
589
605
|
|
|
590
606
|
@op(required_resource_keys={"gold_api_client"})
|
|
@@ -621,9 +637,11 @@ def gold_study(context: OpExecutionContext, study_id: str) -> Dict[str, Any]:
|
|
|
621
637
|
def nmdc_schema_database_from_gold_study(
|
|
622
638
|
context: OpExecutionContext,
|
|
623
639
|
study: Dict[str, Any],
|
|
640
|
+
study_type: str,
|
|
624
641
|
projects: List[Dict[str, Any]],
|
|
625
642
|
biosamples: List[Dict[str, Any]],
|
|
626
643
|
analysis_projects: List[Dict[str, Any]],
|
|
644
|
+
gold_nmdc_instrument_map_df: pd.DataFrame,
|
|
627
645
|
) -> nmdc.Database:
|
|
628
646
|
client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
|
|
629
647
|
|
|
@@ -632,7 +650,13 @@ def nmdc_schema_database_from_gold_study(
|
|
|
632
650
|
return response.json()
|
|
633
651
|
|
|
634
652
|
translator = GoldStudyTranslator(
|
|
635
|
-
study,
|
|
653
|
+
study,
|
|
654
|
+
study_type,
|
|
655
|
+
biosamples,
|
|
656
|
+
projects,
|
|
657
|
+
analysis_projects,
|
|
658
|
+
gold_nmdc_instrument_map_df,
|
|
659
|
+
id_minter=id_minter,
|
|
636
660
|
)
|
|
637
661
|
database = translator.get_database()
|
|
638
662
|
return database
|
|
@@ -641,7 +665,7 @@ def nmdc_schema_database_from_gold_study(
|
|
|
641
665
|
@op(
|
|
642
666
|
out={
|
|
643
667
|
"submission_id": Out(),
|
|
644
|
-
"
|
|
668
|
+
"nucleotide_sequencing_mapping_file_url": Out(Optional[str]),
|
|
645
669
|
"data_object_mapping_file_url": Out(Optional[str]),
|
|
646
670
|
"biosample_extras_file_url": Out(Optional[str]),
|
|
647
671
|
"biosample_extras_slot_mapping_file_url": Out(Optional[str]),
|
|
@@ -649,14 +673,14 @@ def nmdc_schema_database_from_gold_study(
|
|
|
649
673
|
)
|
|
650
674
|
def get_submission_portal_pipeline_inputs(
|
|
651
675
|
submission_id: str,
|
|
652
|
-
|
|
676
|
+
nucleotide_sequencing_mapping_file_url: Optional[str],
|
|
653
677
|
data_object_mapping_file_url: Optional[str],
|
|
654
678
|
biosample_extras_file_url: Optional[str],
|
|
655
679
|
biosample_extras_slot_mapping_file_url: Optional[str],
|
|
656
680
|
) -> Tuple[str, str | None, str | None, str | None, str | None]:
|
|
657
681
|
return (
|
|
658
682
|
submission_id,
|
|
659
|
-
|
|
683
|
+
nucleotide_sequencing_mapping_file_url,
|
|
660
684
|
data_object_mapping_file_url,
|
|
661
685
|
biosample_extras_file_url,
|
|
662
686
|
biosample_extras_slot_mapping_file_url,
|
|
@@ -677,7 +701,7 @@ def fetch_nmdc_portal_submission_by_id(
|
|
|
677
701
|
def translate_portal_submission_to_nmdc_schema_database(
|
|
678
702
|
context: OpExecutionContext,
|
|
679
703
|
metadata_submission: Dict[str, Any],
|
|
680
|
-
|
|
704
|
+
nucleotide_sequencing_mapping: List,
|
|
681
705
|
data_object_mapping: List,
|
|
682
706
|
study_category: Optional[str],
|
|
683
707
|
study_doi_category: Optional[str],
|
|
@@ -694,8 +718,8 @@ def translate_portal_submission_to_nmdc_schema_database(
|
|
|
694
718
|
|
|
695
719
|
translator = SubmissionPortalTranslator(
|
|
696
720
|
metadata_submission,
|
|
697
|
-
|
|
698
|
-
data_object_mapping,
|
|
721
|
+
nucleotide_sequencing_mapping=nucleotide_sequencing_mapping,
|
|
722
|
+
data_object_mapping=data_object_mapping,
|
|
699
723
|
id_minter=id_minter,
|
|
700
724
|
study_category=study_category,
|
|
701
725
|
study_doi_category=study_doi_category,
|
|
@@ -840,6 +864,7 @@ def nmdc_schema_database_from_neon_soil_data(
|
|
|
840
864
|
sls_data: Dict[str, pd.DataFrame],
|
|
841
865
|
neon_envo_mappings_file: pd.DataFrame,
|
|
842
866
|
neon_raw_data_file_mappings_file: pd.DataFrame,
|
|
867
|
+
neon_nmdc_instrument_mapping_file: pd.DataFrame,
|
|
843
868
|
) -> nmdc.Database:
|
|
844
869
|
client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
|
|
845
870
|
|
|
@@ -852,6 +877,7 @@ def nmdc_schema_database_from_neon_soil_data(
|
|
|
852
877
|
sls_data,
|
|
853
878
|
neon_envo_mappings_file,
|
|
854
879
|
neon_raw_data_file_mappings_file,
|
|
880
|
+
neon_nmdc_instrument_mapping_file,
|
|
855
881
|
id_minter=id_minter,
|
|
856
882
|
)
|
|
857
883
|
|
|
@@ -866,6 +892,7 @@ def nmdc_schema_database_from_neon_benthic_data(
|
|
|
866
892
|
site_code_mapping: Dict[str, str],
|
|
867
893
|
neon_envo_mappings_file: pd.DataFrame,
|
|
868
894
|
neon_raw_data_file_mappings_file: pd.DataFrame,
|
|
895
|
+
neon_nmdc_instrument_mapping_file: pd.DataFrame,
|
|
869
896
|
) -> nmdc.Database:
|
|
870
897
|
client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
|
|
871
898
|
|
|
@@ -878,6 +905,7 @@ def nmdc_schema_database_from_neon_benthic_data(
|
|
|
878
905
|
site_code_mapping,
|
|
879
906
|
neon_envo_mappings_file,
|
|
880
907
|
neon_raw_data_file_mappings_file,
|
|
908
|
+
neon_nmdc_instrument_mapping_file,
|
|
881
909
|
id_minter=id_minter,
|
|
882
910
|
)
|
|
883
911
|
|
|
@@ -892,6 +920,7 @@ def nmdc_schema_database_from_neon_surface_water_data(
|
|
|
892
920
|
site_code_mapping: Dict[str, str],
|
|
893
921
|
neon_envo_mappings_file: pd.DataFrame,
|
|
894
922
|
neon_raw_data_file_mappings_file: pd.DataFrame,
|
|
923
|
+
neon_nmdc_instrument_mapping_file: pd.DataFrame,
|
|
895
924
|
) -> nmdc.Database:
|
|
896
925
|
client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
|
|
897
926
|
|
|
@@ -904,6 +933,7 @@ def nmdc_schema_database_from_neon_surface_water_data(
|
|
|
904
933
|
site_code_mapping,
|
|
905
934
|
neon_envo_mappings_file,
|
|
906
935
|
neon_raw_data_file_mappings_file,
|
|
936
|
+
neon_nmdc_instrument_mapping_file,
|
|
907
937
|
id_minter=id_minter,
|
|
908
938
|
)
|
|
909
939
|
|
|
@@ -915,15 +945,18 @@ def nmdc_schema_database_from_neon_surface_water_data(
|
|
|
915
945
|
out={
|
|
916
946
|
"neon_envo_mappings_file_url": Out(),
|
|
917
947
|
"neon_raw_data_file_mappings_file_url": Out(),
|
|
948
|
+
"neon_nmdc_instrument_mapping_file_url": Out(),
|
|
918
949
|
}
|
|
919
950
|
)
|
|
920
951
|
def get_neon_pipeline_inputs(
|
|
921
952
|
neon_envo_mappings_file_url: str,
|
|
922
953
|
neon_raw_data_file_mappings_file_url: str,
|
|
923
|
-
|
|
954
|
+
neon_nmdc_instrument_mapping_file_url: str,
|
|
955
|
+
) -> Tuple[str, str, str]:
|
|
924
956
|
return (
|
|
925
957
|
neon_envo_mappings_file_url,
|
|
926
958
|
neon_raw_data_file_mappings_file_url,
|
|
959
|
+
neon_nmdc_instrument_mapping_file_url,
|
|
927
960
|
)
|
|
928
961
|
|
|
929
962
|
|
|
@@ -999,50 +1032,108 @@ def materialize_alldocs(context) -> int:
|
|
|
999
1032
|
mdb = context.resources.mongo.db
|
|
1000
1033
|
collection_names = populated_schema_collection_names_with_id_field(mdb)
|
|
1001
1034
|
|
|
1002
|
-
for
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1035
|
+
# Insert a no-op as an anchor point for this comment.
|
|
1036
|
+
#
|
|
1037
|
+
# Note: There used to be code here that `assert`-ed that each collection could only contain documents of a single
|
|
1038
|
+
# type. With the legacy schema, that assertion was true. With the Berkeley schema, it is false. That code was
|
|
1039
|
+
# in place because subsequent code (further below) used a single document in a collection as the source of the
|
|
1040
|
+
# class ancestry information of _all_ documents in that collection; an optimization that spared us from
|
|
1041
|
+
# having to do the same for every single document in that collection. With the Berkeley schema, we have
|
|
1042
|
+
# eliminated that optimization (since it is inadequate; it would produce some incorrect class ancestries
|
|
1043
|
+
# for descendants of `PlannedProcess`, for example).
|
|
1044
|
+
#
|
|
1045
|
+
pass
|
|
1006
1046
|
|
|
1007
1047
|
context.log.info(f"{collection_names=}")
|
|
1008
1048
|
|
|
1009
1049
|
# Drop any existing `alldocs` collection (e.g. from previous use of this op).
|
|
1050
|
+
#
|
|
1051
|
+
# FIXME: This "nuke and pave" approach introduces a race condition.
|
|
1052
|
+
# For example, if someone were to visit an API endpoint that uses the "alldocs" collection,
|
|
1053
|
+
# the endpoint would fail to perform its job since the "alldocs" collection is temporarily missing.
|
|
1054
|
+
#
|
|
1010
1055
|
mdb.alldocs.drop()
|
|
1011
1056
|
|
|
1012
1057
|
# Build alldocs
|
|
1013
1058
|
context.log.info("constructing `alldocs` collection")
|
|
1014
1059
|
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
try:
|
|
1018
|
-
nmdcdb = NMDCDatabase(
|
|
1019
|
-
**{collection: [dissoc(mdb[collection].find_one(), "_id")]}
|
|
1020
|
-
)
|
|
1021
|
-
exemplar = getattr(nmdcdb, collection)[0]
|
|
1022
|
-
newdoc_type: list[str] = class_hierarchy_as_list(exemplar)
|
|
1023
|
-
except ValueError as e:
|
|
1024
|
-
context.log.info(f"Collection {collection} does not exist.")
|
|
1025
|
-
raise e
|
|
1026
|
-
|
|
1060
|
+
# For each collection, group its documents by their `type` value, transform them, and load them into `alldocs`.
|
|
1061
|
+
for collection_name in collection_names:
|
|
1027
1062
|
context.log.info(
|
|
1028
|
-
f"Found {mdb[
|
|
1029
|
-
)
|
|
1030
|
-
# For each document in this collection, replace the value of the `type` field with
|
|
1031
|
-
# a _list_ of the document's own class and ancestor classes, remove the `_id` field,
|
|
1032
|
-
# and insert the resulting document into the `alldocs` collection.
|
|
1033
|
-
|
|
1034
|
-
inserted_many_result = mdb.alldocs.insert_many(
|
|
1035
|
-
[
|
|
1036
|
-
assoc(dissoc(doc, "type", "_id"), "type", newdoc_type)
|
|
1037
|
-
for doc in mdb[collection].find()
|
|
1038
|
-
]
|
|
1063
|
+
f"Found {mdb[collection_name].estimated_document_count()} estimated documents for {collection_name=}."
|
|
1039
1064
|
)
|
|
1065
|
+
|
|
1066
|
+
# Process all the distinct `type` values (i.e. value in the `type` field) of the documents in this collection.
|
|
1067
|
+
#
|
|
1068
|
+
# References:
|
|
1069
|
+
# - https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.distinct
|
|
1070
|
+
#
|
|
1071
|
+
distinct_type_values = mdb[collection_name].distinct(key="type")
|
|
1040
1072
|
context.log.info(
|
|
1041
|
-
f"
|
|
1073
|
+
f"Found {len(distinct_type_values)} distinct `type` values in {collection_name=}: {distinct_type_values=}"
|
|
1042
1074
|
)
|
|
1075
|
+
for type_value in distinct_type_values:
|
|
1076
|
+
|
|
1077
|
+
# Process all the documents in this collection that have this value in their `type` field.
|
|
1078
|
+
#
|
|
1079
|
+
# References:
|
|
1080
|
+
# - https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.count_documents
|
|
1081
|
+
# - https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find
|
|
1082
|
+
#
|
|
1083
|
+
filter_ = {"type": type_value}
|
|
1084
|
+
num_docs_having_type = mdb[collection_name].count_documents(filter=filter_)
|
|
1085
|
+
docs_having_type = mdb[collection_name].find(filter=filter_)
|
|
1086
|
+
context.log.info(
|
|
1087
|
+
f"Found {num_docs_having_type} documents having {type_value=} in {collection_name=}."
|
|
1088
|
+
)
|
|
1089
|
+
|
|
1090
|
+
# Get a "representative" document from the result.
|
|
1091
|
+
#
|
|
1092
|
+
# Note: Since all of the documents in this batch have the same class ancestry, we will save time by
|
|
1093
|
+
# determining the class ancestry of only _one_ of them (we call this the "representative") and then
|
|
1094
|
+
# (later) attributing that class ancestry to all of them.
|
|
1095
|
+
#
|
|
1096
|
+
representative_doc = next(docs_having_type)
|
|
1097
|
+
|
|
1098
|
+
# Instantiate the Python class represented by the "representative" document.
|
|
1099
|
+
db_dict = {
|
|
1100
|
+
# Shed the `_id` attribute, since the constructor doesn't allow it.
|
|
1101
|
+
collection_name: [dissoc(representative_doc, "_id")]
|
|
1102
|
+
}
|
|
1103
|
+
nmdc_db = NMDCDatabase(**db_dict)
|
|
1104
|
+
representative_instance = getattr(nmdc_db, collection_name)[0]
|
|
1105
|
+
|
|
1106
|
+
# Get the class ancestry of that instance, as a list of class names (including its own class name).
|
|
1107
|
+
ancestor_class_names = class_hierarchy_as_list(representative_instance)
|
|
1108
|
+
|
|
1109
|
+
# Store the documents belonging to this group, in the `alldocs` collection, setting their `type` field
|
|
1110
|
+
# to the list of class names obtained from the "representative" document above.
|
|
1111
|
+
#
|
|
1112
|
+
# TODO: Document why clobbering the existing contents of the `type` field is OK.
|
|
1113
|
+
#
|
|
1114
|
+
# Note: The reason we `chain()` our "representative" document (in an iterable) with the `docs_having_type`
|
|
1115
|
+
# iterator here is that, when we called `next(docs_having_type)` above, we "consumed" our
|
|
1116
|
+
# "representative" document from that iterator. We use `chain()` here so that that document gets
|
|
1117
|
+
# inserted alongside its cousins (i.e. the documents _still_ accessible via `docs_having_type`).
|
|
1118
|
+
# Reference: https://docs.python.org/3/library/itertools.html#itertools.chain
|
|
1119
|
+
#
|
|
1120
|
+
inserted_many_result = mdb.alldocs.insert_many(
|
|
1121
|
+
[
|
|
1122
|
+
assoc(dissoc(doc, "type", "_id"), "type", ancestor_class_names)
|
|
1123
|
+
for doc in chain([representative_doc], docs_having_type)
|
|
1124
|
+
]
|
|
1125
|
+
)
|
|
1126
|
+
context.log.info(
|
|
1127
|
+
f"Inserted {len(inserted_many_result.inserted_ids)} documents from {collection_name=} "
|
|
1128
|
+
f"originally having {type_value=}."
|
|
1129
|
+
)
|
|
1043
1130
|
|
|
1044
1131
|
# Re-idx for `alldocs` collection
|
|
1045
1132
|
mdb.alldocs.create_index("id", unique=True)
|
|
1133
|
+
# The indexes were added to improve the performance of the
|
|
1134
|
+
# /data_objects/study/{study_id} endpoint
|
|
1135
|
+
mdb.alldocs.create_index("has_input")
|
|
1136
|
+
mdb.alldocs.create_index("has_output")
|
|
1046
1137
|
context.log.info(
|
|
1047
1138
|
f"refreshed {mdb.alldocs} collection with {mdb.alldocs.estimated_document_count()} docs."
|
|
1048
1139
|
)
|
|
@@ -1106,10 +1197,12 @@ def get_data_objects_from_biosamples(context: OpExecutionContext, biosamples: li
|
|
|
1106
1197
|
|
|
1107
1198
|
|
|
1108
1199
|
@op(required_resource_keys={"mongo"})
|
|
1109
|
-
def
|
|
1200
|
+
def get_nucleotide_sequencing_from_biosamples(
|
|
1201
|
+
context: OpExecutionContext, biosamples: list
|
|
1202
|
+
):
|
|
1110
1203
|
mdb = context.resources.mongo.db
|
|
1111
1204
|
alldocs_collection = mdb["alldocs"]
|
|
1112
|
-
biosample_omics_processing =
|
|
1205
|
+
biosample_omics_processing = fetch_nucleotide_sequencing_from_biosamples(
|
|
1113
1206
|
alldocs_collection, biosamples
|
|
1114
1207
|
)
|
|
1115
1208
|
return biosample_omics_processing
|
nmdc_runtime/site/repository.py
CHANGED
|
@@ -501,7 +501,13 @@ def biosample_submission_ingest():
|
|
|
501
501
|
},
|
|
502
502
|
),
|
|
503
503
|
"ops": {
|
|
504
|
-
"get_gold_study_pipeline_inputs": {
|
|
504
|
+
"get_gold_study_pipeline_inputs": {
|
|
505
|
+
"config": {
|
|
506
|
+
"study_id": "",
|
|
507
|
+
"study_type": "research_study",
|
|
508
|
+
"gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
|
|
509
|
+
},
|
|
510
|
+
},
|
|
505
511
|
"export_json_to_drs": {"config": {"username": ""}},
|
|
506
512
|
},
|
|
507
513
|
},
|
|
@@ -528,7 +534,7 @@ def biosample_submission_ingest():
|
|
|
528
534
|
"get_submission_portal_pipeline_inputs": {
|
|
529
535
|
"inputs": {
|
|
530
536
|
"submission_id": "",
|
|
531
|
-
"
|
|
537
|
+
"nucleotide_sequencing_mapping_file_url": None,
|
|
532
538
|
"data_object_mapping_file_url": None,
|
|
533
539
|
"biosample_extras_file_url": None,
|
|
534
540
|
"biosample_extras_slot_mapping_file_url": None,
|
|
@@ -536,7 +542,7 @@ def biosample_submission_ingest():
|
|
|
536
542
|
},
|
|
537
543
|
"translate_portal_submission_to_nmdc_schema_database": {
|
|
538
544
|
"inputs": {
|
|
539
|
-
"study_category":
|
|
545
|
+
"study_category": "research_study",
|
|
540
546
|
"study_doi_category": None,
|
|
541
547
|
"study_doi_provider": None,
|
|
542
548
|
"study_pi_image_url": None,
|
|
@@ -566,7 +572,7 @@ def biosample_submission_ingest():
|
|
|
566
572
|
"get_submission_portal_pipeline_inputs": {
|
|
567
573
|
"inputs": {
|
|
568
574
|
"submission_id": "",
|
|
569
|
-
"
|
|
575
|
+
"nucleotide_sequencing_mapping_file_url": None,
|
|
570
576
|
"data_object_mapping_file_url": None,
|
|
571
577
|
"biosample_extras_file_url": None,
|
|
572
578
|
"biosample_extras_slot_mapping_file_url": None,
|
|
@@ -636,6 +642,7 @@ def biosample_submission_ingest():
|
|
|
636
642
|
"inputs": {
|
|
637
643
|
"neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
|
|
638
644
|
"neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
|
|
645
|
+
"neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
|
|
639
646
|
}
|
|
640
647
|
},
|
|
641
648
|
},
|
|
@@ -677,6 +684,7 @@ def biosample_submission_ingest():
|
|
|
677
684
|
"inputs": {
|
|
678
685
|
"neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
|
|
679
686
|
"neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
|
|
687
|
+
"neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
|
|
680
688
|
}
|
|
681
689
|
},
|
|
682
690
|
},
|
|
@@ -719,6 +727,7 @@ def biosample_submission_ingest():
|
|
|
719
727
|
"inputs": {
|
|
720
728
|
"neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
|
|
721
729
|
"neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
|
|
730
|
+
"neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
|
|
722
731
|
}
|
|
723
732
|
},
|
|
724
733
|
"get_neon_pipeline_benthic_data_product": {
|
|
@@ -760,6 +769,7 @@ def biosample_submission_ingest():
|
|
|
760
769
|
"inputs": {
|
|
761
770
|
"neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
|
|
762
771
|
"neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
|
|
772
|
+
"neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
|
|
763
773
|
}
|
|
764
774
|
},
|
|
765
775
|
},
|
|
@@ -802,6 +812,7 @@ def biosample_submission_ingest():
|
|
|
802
812
|
"inputs": {
|
|
803
813
|
"neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
|
|
804
814
|
"neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
|
|
815
|
+
"neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
|
|
805
816
|
}
|
|
806
817
|
},
|
|
807
818
|
"get_neon_pipeline_surface_water_data_product": {
|
|
@@ -843,6 +854,7 @@ def biosample_submission_ingest():
|
|
|
843
854
|
"inputs": {
|
|
844
855
|
"neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
|
|
845
856
|
"neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
|
|
857
|
+
"neon_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/refs/heads/main/assets/misc/neon_sequencingMethod_to_nmdc_instrument_set.tsv",
|
|
846
858
|
}
|
|
847
859
|
},
|
|
848
860
|
},
|