nmdc-runtime 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/site/export/ncbi_xml.py +433 -0
- nmdc_runtime/site/export/ncbi_xml_utils.py +206 -0
- nmdc_runtime/site/export/study_metadata.py +24 -4
- nmdc_runtime/site/graphs.py +88 -12
- nmdc_runtime/site/ops.py +154 -44
- nmdc_runtime/site/repository.py +141 -6
- nmdc_runtime/site/resources.py +30 -40
- nmdc_runtime/site/translation/neon_surface_water_translator.py +620 -0
- nmdc_runtime/site/translation/neon_utils.py +5 -1
- nmdc_runtime/site/translation/submission_portal_translator.py +16 -9
- nmdc_runtime/util.py +1 -1
- {nmdc_runtime-1.5.0.dist-info → nmdc_runtime-1.7.0.dist-info}/METADATA +4 -7
- {nmdc_runtime-1.5.0.dist-info → nmdc_runtime-1.7.0.dist-info}/RECORD +17 -18
- {nmdc_runtime-1.5.0.dist-info → nmdc_runtime-1.7.0.dist-info}/WHEEL +1 -1
- {nmdc_runtime-1.5.0.dist-info → nmdc_runtime-1.7.0.dist-info}/entry_points.txt +0 -1
- nmdc_runtime/site/terminusdb/__init__.py +0 -0
- nmdc_runtime/site/terminusdb/generate.py +0 -198
- nmdc_runtime/site/terminusdb/ingest.py +0 -44
- nmdc_runtime/site/terminusdb/schema.py +0 -1671
- {nmdc_runtime-1.5.0.dist-info → nmdc_runtime-1.7.0.dist-info}/LICENSE +0 -0
- {nmdc_runtime-1.5.0.dist-info → nmdc_runtime-1.7.0.dist-info}/top_level.txt +0 -0
nmdc_runtime/site/graphs.py
CHANGED
|
@@ -22,7 +22,6 @@ from nmdc_runtime.site.ops import (
|
|
|
22
22
|
hello,
|
|
23
23
|
mongo_stats,
|
|
24
24
|
submit_metadata_to_db,
|
|
25
|
-
update_schema,
|
|
26
25
|
filter_ops_undone_expired,
|
|
27
26
|
construct_jobs,
|
|
28
27
|
maybe_post_jobs,
|
|
@@ -38,16 +37,25 @@ from nmdc_runtime.site.ops import (
|
|
|
38
37
|
neon_data_by_product,
|
|
39
38
|
nmdc_schema_database_from_neon_soil_data,
|
|
40
39
|
nmdc_schema_database_from_neon_benthic_data,
|
|
40
|
+
nmdc_schema_database_from_neon_surface_water_data,
|
|
41
41
|
nmdc_schema_database_export_filename_neon,
|
|
42
42
|
get_neon_pipeline_mms_data_product,
|
|
43
43
|
get_neon_pipeline_sls_data_product,
|
|
44
|
+
get_neon_pipeline_surface_water_data_product,
|
|
44
45
|
get_submission_portal_pipeline_inputs,
|
|
45
46
|
get_csv_rows_from_url,
|
|
46
47
|
get_neon_pipeline_benthic_data_product,
|
|
47
48
|
get_neon_pipeline_inputs,
|
|
48
49
|
get_df_from_url,
|
|
49
50
|
site_code_mapping,
|
|
51
|
+
get_ncbi_export_pipeline_study,
|
|
52
|
+
get_data_objects_from_biosamples,
|
|
53
|
+
get_omics_processing_from_biosamples,
|
|
54
|
+
get_ncbi_export_pipeline_inputs,
|
|
55
|
+
ncbi_submission_xml_from_nmdc_study,
|
|
56
|
+
ncbi_submission_xml_asset,
|
|
50
57
|
)
|
|
58
|
+
from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id
|
|
51
59
|
|
|
52
60
|
|
|
53
61
|
@graph
|
|
@@ -85,17 +93,6 @@ def hello_mongo():
|
|
|
85
93
|
mongo_stats()
|
|
86
94
|
|
|
87
95
|
|
|
88
|
-
@graph
|
|
89
|
-
def update_terminus():
|
|
90
|
-
"""
|
|
91
|
-
A pipeline definition. This example pipeline has a single solid.
|
|
92
|
-
|
|
93
|
-
For more hints on writing Dagster pipelines, see our documentation overview on Pipelines:
|
|
94
|
-
https://docs.dagster.io/overview/solids-pipelines/pipelines
|
|
95
|
-
"""
|
|
96
|
-
update_schema()
|
|
97
|
-
|
|
98
|
-
|
|
99
96
|
@graph
|
|
100
97
|
def housekeeping():
|
|
101
98
|
delete_operations(list_operations(filter_ops_undone_expired()))
|
|
@@ -317,3 +314,82 @@ def ingest_neon_benthic_metadata():
|
|
|
317
314
|
)
|
|
318
315
|
run_id = submit_metadata_to_db(database)
|
|
319
316
|
poll_for_run_completion(run_id)
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
@graph
|
|
320
|
+
def translate_neon_api_surface_water_metadata_to_nmdc_schema_database():
|
|
321
|
+
mms_surface_water_data_product = get_neon_pipeline_surface_water_data_product()
|
|
322
|
+
|
|
323
|
+
mms_surface_water = neon_data_by_product(mms_surface_water_data_product)
|
|
324
|
+
|
|
325
|
+
sites_mapping_dict = site_code_mapping()
|
|
326
|
+
|
|
327
|
+
(
|
|
328
|
+
neon_envo_mappings_file_url,
|
|
329
|
+
neon_raw_data_file_mappings_file_url,
|
|
330
|
+
) = get_neon_pipeline_inputs()
|
|
331
|
+
|
|
332
|
+
neon_envo_mappings_file = get_df_from_url(neon_envo_mappings_file_url)
|
|
333
|
+
|
|
334
|
+
neon_raw_data_file_mappings_file = get_df_from_url(
|
|
335
|
+
neon_raw_data_file_mappings_file_url
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
database = nmdc_schema_database_from_neon_surface_water_data(
|
|
339
|
+
mms_surface_water,
|
|
340
|
+
sites_mapping_dict,
|
|
341
|
+
neon_envo_mappings_file,
|
|
342
|
+
neon_raw_data_file_mappings_file,
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
database_dict = nmdc_schema_object_to_dict(database)
|
|
346
|
+
filename = nmdc_schema_database_export_filename_neon()
|
|
347
|
+
|
|
348
|
+
outputs = export_json_to_drs(database_dict, filename)
|
|
349
|
+
add_output_run_event(outputs)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
@graph
|
|
353
|
+
def ingest_neon_surface_water_metadata():
|
|
354
|
+
mms_surface_water_data_product = get_neon_pipeline_surface_water_data_product()
|
|
355
|
+
|
|
356
|
+
mms_surface_water = neon_data_by_product(mms_surface_water_data_product)
|
|
357
|
+
|
|
358
|
+
sites_mapping_dict = site_code_mapping()
|
|
359
|
+
|
|
360
|
+
(
|
|
361
|
+
neon_envo_mappings_file_url,
|
|
362
|
+
neon_raw_data_file_mappings_file_url,
|
|
363
|
+
) = get_neon_pipeline_inputs()
|
|
364
|
+
|
|
365
|
+
neon_envo_mappings_file = get_df_from_url(neon_envo_mappings_file_url)
|
|
366
|
+
|
|
367
|
+
neon_raw_data_file_mappings_file = get_df_from_url(
|
|
368
|
+
neon_raw_data_file_mappings_file_url
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
database = nmdc_schema_database_from_neon_benthic_data(
|
|
372
|
+
mms_surface_water,
|
|
373
|
+
sites_mapping_dict,
|
|
374
|
+
neon_envo_mappings_file,
|
|
375
|
+
neon_raw_data_file_mappings_file,
|
|
376
|
+
)
|
|
377
|
+
run_id = submit_metadata_to_db(database)
|
|
378
|
+
poll_for_run_completion(run_id)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
@graph
|
|
382
|
+
def nmdc_study_to_ncbi_submission_export():
|
|
383
|
+
nmdc_study = get_ncbi_export_pipeline_study()
|
|
384
|
+
ncbi_submission_metadata = get_ncbi_export_pipeline_inputs()
|
|
385
|
+
biosamples = get_biosamples_by_study_id(nmdc_study)
|
|
386
|
+
omics_processing_records = get_omics_processing_from_biosamples(biosamples)
|
|
387
|
+
data_objects = get_data_objects_from_biosamples(biosamples)
|
|
388
|
+
xml_data = ncbi_submission_xml_from_nmdc_study(
|
|
389
|
+
nmdc_study,
|
|
390
|
+
ncbi_submission_metadata,
|
|
391
|
+
biosamples,
|
|
392
|
+
omics_processing_records,
|
|
393
|
+
data_objects,
|
|
394
|
+
)
|
|
395
|
+
ncbi_submission_xml_asset(xml_data)
|
nmdc_runtime/site/ops.py
CHANGED
|
@@ -9,6 +9,7 @@ from datetime import datetime, timezone
|
|
|
9
9
|
from io import BytesIO, StringIO
|
|
10
10
|
from typing import Tuple
|
|
11
11
|
from zipfile import ZipFile
|
|
12
|
+
|
|
12
13
|
import pandas as pd
|
|
13
14
|
import requests
|
|
14
15
|
|
|
@@ -29,10 +30,14 @@ from dagster import (
|
|
|
29
30
|
String,
|
|
30
31
|
op,
|
|
31
32
|
Optional,
|
|
33
|
+
Field,
|
|
34
|
+
Permissive,
|
|
35
|
+
Bool,
|
|
32
36
|
)
|
|
33
37
|
from gridfs import GridFS
|
|
34
38
|
from linkml_runtime.dumpers import json_dumper
|
|
35
39
|
from linkml_runtime.utils.yamlutils import YAMLRoot
|
|
40
|
+
from nmdc_runtime.api.db.mongo import get_mongo_db
|
|
36
41
|
from nmdc_runtime.api.core.idgen import generate_one_id
|
|
37
42
|
from nmdc_runtime.api.core.metadata import (
|
|
38
43
|
_validate_changesheet,
|
|
@@ -42,6 +47,7 @@ from nmdc_runtime.api.core.metadata import (
|
|
|
42
47
|
)
|
|
43
48
|
from nmdc_runtime.api.core.util import dotted_path_for, hash_from_str, json_clean, now
|
|
44
49
|
from nmdc_runtime.api.endpoints.util import persist_content_and_get_drs_object
|
|
50
|
+
from nmdc_runtime.api.endpoints.find import find_study_by_id
|
|
45
51
|
from nmdc_runtime.api.models.job import Job, JobOperationMetadata
|
|
46
52
|
from nmdc_runtime.api.models.metadata import ChangesheetIn
|
|
47
53
|
from nmdc_runtime.api.models.operation import (
|
|
@@ -55,6 +61,11 @@ from nmdc_runtime.api.models.run import (
|
|
|
55
61
|
_add_run_complete_event,
|
|
56
62
|
)
|
|
57
63
|
from nmdc_runtime.api.models.util import ResultT
|
|
64
|
+
from nmdc_runtime.site.export.ncbi_xml import NCBISubmissionXML
|
|
65
|
+
from nmdc_runtime.site.export.ncbi_xml_utils import (
|
|
66
|
+
fetch_data_objects_from_biosamples,
|
|
67
|
+
fetch_omics_processing_from_biosamples,
|
|
68
|
+
)
|
|
58
69
|
from nmdc_runtime.site.drsobjects.ingest import mongo_add_docs_result_as_dict
|
|
59
70
|
from nmdc_runtime.site.resources import (
|
|
60
71
|
NmdcPortalApiClient,
|
|
@@ -68,6 +79,9 @@ from nmdc_runtime.site.translation.neon_soil_translator import NeonSoilDataTrans
|
|
|
68
79
|
from nmdc_runtime.site.translation.neon_benthic_translator import (
|
|
69
80
|
NeonBenthicDataTranslator,
|
|
70
81
|
)
|
|
82
|
+
from nmdc_runtime.site.translation.neon_surface_water_translator import (
|
|
83
|
+
NeonSurfaceWaterDataTranslator,
|
|
84
|
+
)
|
|
71
85
|
from nmdc_runtime.site.translation.submission_portal_translator import (
|
|
72
86
|
SubmissionPortalTranslator,
|
|
73
87
|
)
|
|
@@ -83,7 +97,6 @@ from nmdc_schema import nmdc
|
|
|
83
97
|
from pydantic import BaseModel
|
|
84
98
|
from pymongo.database import Database as MongoDatabase
|
|
85
99
|
from starlette import status
|
|
86
|
-
from terminusdb_client.woqlquery import WOQLQuery as WQ
|
|
87
100
|
from toolz import assoc, dissoc, get_in, valfilter, identity
|
|
88
101
|
|
|
89
102
|
|
|
@@ -108,14 +121,6 @@ def log_env(context):
|
|
|
108
121
|
context.log.info("\n".join(out))
|
|
109
122
|
|
|
110
123
|
|
|
111
|
-
@op(required_resource_keys={"terminus"})
|
|
112
|
-
def list_databases(context) -> List[String]:
|
|
113
|
-
client = context.resources.terminus.client
|
|
114
|
-
list_ = client.list_databases()
|
|
115
|
-
context.log.info(f"databases: {list_}")
|
|
116
|
-
return list_
|
|
117
|
-
|
|
118
|
-
|
|
119
124
|
@op(required_resource_keys={"mongo"})
|
|
120
125
|
def mongo_stats(context) -> List[str]:
|
|
121
126
|
db = context.resources.mongo.db
|
|
@@ -124,41 +129,6 @@ def mongo_stats(context) -> List[str]:
|
|
|
124
129
|
return collection_names
|
|
125
130
|
|
|
126
131
|
|
|
127
|
-
@op(required_resource_keys={"terminus"})
|
|
128
|
-
def update_schema(context):
|
|
129
|
-
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
130
|
-
try:
|
|
131
|
-
context.log.info("shallow-cloning nmdc-schema repo")
|
|
132
|
-
subprocess.check_output(
|
|
133
|
-
"git clone https://github.com/microbiomedata/nmdc-schema.git"
|
|
134
|
-
f" --branch main --single-branch {tmpdirname}/nmdc-schema",
|
|
135
|
-
shell=True,
|
|
136
|
-
)
|
|
137
|
-
context.log.info("generating TerminusDB JSON-LD from NMDC LinkML")
|
|
138
|
-
subprocess.check_output(
|
|
139
|
-
f"gen-terminusdb {tmpdirname}/nmdc-schema/src/schema/nmdc.yaml"
|
|
140
|
-
f" > {tmpdirname}/nmdc.terminus.json",
|
|
141
|
-
shell=True,
|
|
142
|
-
)
|
|
143
|
-
except subprocess.CalledProcessError as e:
|
|
144
|
-
if e.stdout:
|
|
145
|
-
context.log.debug(e.stdout.decode())
|
|
146
|
-
if e.stderr:
|
|
147
|
-
context.log.error(e.stderr.decode())
|
|
148
|
-
context.log.debug(str(e.returncode))
|
|
149
|
-
raise e
|
|
150
|
-
|
|
151
|
-
with open(f"{tmpdirname}/nmdc.terminus.json") as f:
|
|
152
|
-
woql_dict = json.load(f)
|
|
153
|
-
|
|
154
|
-
context.log.info("Updating terminus schema via WOQLQuery")
|
|
155
|
-
rv = WQ(query=woql_dict).execute(
|
|
156
|
-
context.resources.terminus.client, "update schema via WOQL"
|
|
157
|
-
)
|
|
158
|
-
context.log.info(str(rv))
|
|
159
|
-
return rv
|
|
160
|
-
|
|
161
|
-
|
|
162
132
|
@op(
|
|
163
133
|
required_resource_keys={"mongo", "runtime_api_site_client"},
|
|
164
134
|
retry_policy=RetryPolicy(max_retries=2),
|
|
@@ -765,6 +735,33 @@ def export_json_to_drs(
|
|
|
765
735
|
return ["/objects/" + drs_object["id"]]
|
|
766
736
|
|
|
767
737
|
|
|
738
|
+
@op(
|
|
739
|
+
description="NCBI Submission XML file rendered in a Dagster Asset",
|
|
740
|
+
out=Out(description="XML content rendered through Dagit UI"),
|
|
741
|
+
)
|
|
742
|
+
def ncbi_submission_xml_asset(context: OpExecutionContext, data: str):
|
|
743
|
+
filename = "ncbi_submission.xml"
|
|
744
|
+
file_path = os.path.join(context.instance.storage_directory(), filename)
|
|
745
|
+
|
|
746
|
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
|
747
|
+
|
|
748
|
+
with open(file_path, "w") as f:
|
|
749
|
+
f.write(data)
|
|
750
|
+
|
|
751
|
+
context.log_event(
|
|
752
|
+
AssetMaterialization(
|
|
753
|
+
asset_key="ncbi_submission_xml",
|
|
754
|
+
description="NCBI Submission XML Data",
|
|
755
|
+
metadata={
|
|
756
|
+
"file_path": MetadataValue.path(file_path),
|
|
757
|
+
"xml": MetadataValue.text(data),
|
|
758
|
+
},
|
|
759
|
+
)
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
return Output(data)
|
|
763
|
+
|
|
764
|
+
|
|
768
765
|
def unique_field_values(docs: List[Dict[str, Any]], field: str):
|
|
769
766
|
return {doc[field] for doc in docs if field in doc}
|
|
770
767
|
|
|
@@ -784,6 +781,11 @@ def get_neon_pipeline_benthic_data_product(context: OpExecutionContext) -> dict:
|
|
|
784
781
|
return context.op_config["benthic_data_product"]
|
|
785
782
|
|
|
786
783
|
|
|
784
|
+
@op(config_schema={"surface_water_data_product": dict})
|
|
785
|
+
def get_neon_pipeline_surface_water_data_product(context: OpExecutionContext) -> dict:
|
|
786
|
+
return context.op_config["surface_water_data_product"]
|
|
787
|
+
|
|
788
|
+
|
|
787
789
|
@op(required_resource_keys={"neon_api_client"})
|
|
788
790
|
def neon_data_by_product(
|
|
789
791
|
context: OpExecutionContext, data_product: dict
|
|
@@ -862,6 +864,32 @@ def nmdc_schema_database_from_neon_benthic_data(
|
|
|
862
864
|
return database
|
|
863
865
|
|
|
864
866
|
|
|
867
|
+
@op(required_resource_keys={"runtime_api_site_client"})
|
|
868
|
+
def nmdc_schema_database_from_neon_surface_water_data(
|
|
869
|
+
context: OpExecutionContext,
|
|
870
|
+
surface_water_data: Dict[str, pd.DataFrame],
|
|
871
|
+
site_code_mapping: Dict[str, str],
|
|
872
|
+
neon_envo_mappings_file: pd.DataFrame,
|
|
873
|
+
neon_raw_data_file_mappings_file: pd.DataFrame,
|
|
874
|
+
) -> nmdc.Database:
|
|
875
|
+
client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
|
|
876
|
+
|
|
877
|
+
def id_minter(*args, **kwargs):
|
|
878
|
+
response = client.mint_id(*args, **kwargs)
|
|
879
|
+
return response.json()
|
|
880
|
+
|
|
881
|
+
translator = NeonSurfaceWaterDataTranslator(
|
|
882
|
+
surface_water_data,
|
|
883
|
+
site_code_mapping,
|
|
884
|
+
neon_envo_mappings_file,
|
|
885
|
+
neon_raw_data_file_mappings_file,
|
|
886
|
+
id_minter=id_minter,
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
database = translator.get_database()
|
|
890
|
+
return database
|
|
891
|
+
|
|
892
|
+
|
|
865
893
|
@op(
|
|
866
894
|
out={
|
|
867
895
|
"neon_envo_mappings_file_url": Out(),
|
|
@@ -943,3 +971,85 @@ def site_code_mapping() -> dict:
|
|
|
943
971
|
raise Exception(
|
|
944
972
|
f"Failed to fetch site data from {endpoint}. Status code: {response.status_code}, Content: {response.content}"
|
|
945
973
|
)
|
|
974
|
+
|
|
975
|
+
|
|
976
|
+
@op(config_schema={"nmdc_study_id": str}, required_resource_keys={"mongo"})
|
|
977
|
+
def get_ncbi_export_pipeline_study(context: OpExecutionContext) -> Any:
|
|
978
|
+
nmdc_study = find_study_by_id(
|
|
979
|
+
context.op_config["nmdc_study_id"], context.resources.mongo.db
|
|
980
|
+
)
|
|
981
|
+
return nmdc_study
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
@op(
|
|
985
|
+
config_schema={
|
|
986
|
+
"nmdc_ncbi_attribute_mapping_file_url": str,
|
|
987
|
+
"ncbi_submission_metadata": Field(
|
|
988
|
+
Permissive(
|
|
989
|
+
{
|
|
990
|
+
"organization": String,
|
|
991
|
+
}
|
|
992
|
+
),
|
|
993
|
+
is_required=True,
|
|
994
|
+
description="General metadata about the NCBI submission.",
|
|
995
|
+
),
|
|
996
|
+
"ncbi_biosample_metadata": Field(
|
|
997
|
+
Permissive(
|
|
998
|
+
{
|
|
999
|
+
"organism_name": String,
|
|
1000
|
+
}
|
|
1001
|
+
),
|
|
1002
|
+
is_required=True,
|
|
1003
|
+
description="Metadata for one or many NCBI BioSample in the Submission.",
|
|
1004
|
+
),
|
|
1005
|
+
},
|
|
1006
|
+
out=Out(Dict),
|
|
1007
|
+
)
|
|
1008
|
+
def get_ncbi_export_pipeline_inputs(context: OpExecutionContext) -> str:
|
|
1009
|
+
nmdc_ncbi_attribute_mapping_file_url = context.op_config[
|
|
1010
|
+
"nmdc_ncbi_attribute_mapping_file_url"
|
|
1011
|
+
]
|
|
1012
|
+
ncbi_submission_metadata = context.op_config.get("ncbi_submission_metadata", {})
|
|
1013
|
+
ncbi_biosample_metadata = context.op_config.get("ncbi_biosample_metadata", {})
|
|
1014
|
+
|
|
1015
|
+
return {
|
|
1016
|
+
"nmdc_ncbi_attribute_mapping_file_url": nmdc_ncbi_attribute_mapping_file_url,
|
|
1017
|
+
"ncbi_submission_metadata": ncbi_submission_metadata,
|
|
1018
|
+
"ncbi_biosample_metadata": ncbi_biosample_metadata,
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
@op(required_resource_keys={"mongo"})
|
|
1023
|
+
def get_data_objects_from_biosamples(context: OpExecutionContext, biosamples: list):
|
|
1024
|
+
mdb = context.resources.mongo.db
|
|
1025
|
+
alldocs_collection = mdb["alldocs"]
|
|
1026
|
+
biosample_data_objects = fetch_data_objects_from_biosamples(
|
|
1027
|
+
alldocs_collection, biosamples
|
|
1028
|
+
)
|
|
1029
|
+
return biosample_data_objects
|
|
1030
|
+
|
|
1031
|
+
|
|
1032
|
+
@op(required_resource_keys={"mongo"})
|
|
1033
|
+
def get_omics_processing_from_biosamples(context: OpExecutionContext, biosamples: list):
|
|
1034
|
+
mdb = context.resources.mongo.db
|
|
1035
|
+
alldocs_collection = mdb["alldocs"]
|
|
1036
|
+
biosample_omics_processing = fetch_omics_processing_from_biosamples(
|
|
1037
|
+
alldocs_collection, biosamples
|
|
1038
|
+
)
|
|
1039
|
+
return biosample_omics_processing
|
|
1040
|
+
|
|
1041
|
+
|
|
1042
|
+
@op
|
|
1043
|
+
def ncbi_submission_xml_from_nmdc_study(
|
|
1044
|
+
context: OpExecutionContext,
|
|
1045
|
+
nmdc_study: Any,
|
|
1046
|
+
ncbi_exporter_metadata: dict,
|
|
1047
|
+
biosamples: list,
|
|
1048
|
+
omics_processing_records: list,
|
|
1049
|
+
data_objects: list,
|
|
1050
|
+
) -> str:
|
|
1051
|
+
ncbi_exporter = NCBISubmissionXML(nmdc_study, ncbi_exporter_metadata)
|
|
1052
|
+
ncbi_xml = ncbi_exporter.get_submission_xml(
|
|
1053
|
+
biosamples, omics_processing_records, data_objects
|
|
1054
|
+
)
|
|
1055
|
+
return ncbi_xml
|
nmdc_runtime/site/repository.py
CHANGED
|
@@ -38,8 +38,11 @@ from nmdc_runtime.site.graphs import (
|
|
|
38
38
|
hello_graph,
|
|
39
39
|
translate_neon_api_soil_metadata_to_nmdc_schema_database,
|
|
40
40
|
translate_neon_api_benthic_metadata_to_nmdc_schema_database,
|
|
41
|
+
translate_neon_api_surface_water_metadata_to_nmdc_schema_database,
|
|
41
42
|
ingest_neon_soil_metadata,
|
|
42
43
|
ingest_neon_benthic_metadata,
|
|
44
|
+
ingest_neon_surface_water_metadata,
|
|
45
|
+
nmdc_study_to_ncbi_submission_export,
|
|
43
46
|
)
|
|
44
47
|
from nmdc_runtime.site.resources import (
|
|
45
48
|
get_mongo,
|
|
@@ -48,7 +51,6 @@ from nmdc_runtime.site.resources import (
|
|
|
48
51
|
nmdc_portal_api_client_resource,
|
|
49
52
|
gold_api_client_resource,
|
|
50
53
|
neon_api_client_resource,
|
|
51
|
-
terminus_resource,
|
|
52
54
|
mongo_resource,
|
|
53
55
|
)
|
|
54
56
|
from nmdc_runtime.site.resources import (
|
|
@@ -66,7 +68,6 @@ resource_defs = {
|
|
|
66
68
|
"nmdc_portal_api_client": nmdc_portal_api_client_resource,
|
|
67
69
|
"gold_api_client": gold_api_client_resource,
|
|
68
70
|
"neon_api_client": neon_api_client_resource,
|
|
69
|
-
"terminus": terminus_resource,
|
|
70
71
|
"mongo": mongo_resource,
|
|
71
72
|
}
|
|
72
73
|
|
|
@@ -513,8 +514,8 @@ def biosample_submission_ingest():
|
|
|
513
514
|
"nmdc_portal_api_client": {
|
|
514
515
|
"config": {
|
|
515
516
|
"base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
|
|
516
|
-
"
|
|
517
|
-
"env": "
|
|
517
|
+
"refresh_token": {
|
|
518
|
+
"env": "NMDC_PORTAL_API_REFRESH_TOKEN"
|
|
518
519
|
},
|
|
519
520
|
}
|
|
520
521
|
}
|
|
@@ -553,8 +554,8 @@ def biosample_submission_ingest():
|
|
|
553
554
|
"nmdc_portal_api_client": {
|
|
554
555
|
"config": {
|
|
555
556
|
"base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
|
|
556
|
-
"
|
|
557
|
-
"env": "
|
|
557
|
+
"refresh_token": {
|
|
558
|
+
"env": "NMDC_PORTAL_API_REFRESH_TOKEN"
|
|
558
559
|
},
|
|
559
560
|
}
|
|
560
561
|
}
|
|
@@ -764,6 +765,140 @@ def biosample_submission_ingest():
|
|
|
764
765
|
},
|
|
765
766
|
},
|
|
766
767
|
),
|
|
768
|
+
translate_neon_api_surface_water_metadata_to_nmdc_schema_database.to_job(
|
|
769
|
+
description="This job fetches the metadata associated with a given NEON data product code and translates it into an equivalent nmdc:Database object. The object is serialized to JSON and stored in DRS. This can be considered a dry-run for the `ingest_neon_metadata` job.",
|
|
770
|
+
resource_defs=resource_defs,
|
|
771
|
+
config={
|
|
772
|
+
"resources": merge(
|
|
773
|
+
unfreeze(normal_resources),
|
|
774
|
+
{
|
|
775
|
+
"neon_api_client": {
|
|
776
|
+
"config": {
|
|
777
|
+
"base_url": {"env": "NEON_API_BASE_URL"},
|
|
778
|
+
"api_token": {"env": "NEON_API_TOKEN"},
|
|
779
|
+
},
|
|
780
|
+
},
|
|
781
|
+
"mongo": {
|
|
782
|
+
"config": {
|
|
783
|
+
"dbname": {"env": "MONGO_DBNAME"},
|
|
784
|
+
"host": {"env": "MONGO_HOST"},
|
|
785
|
+
"password": {"env": "MONGO_PASSWORD"},
|
|
786
|
+
"username": {"env": "MONGO_USERNAME"},
|
|
787
|
+
},
|
|
788
|
+
},
|
|
789
|
+
"runtime_api_site_client": {
|
|
790
|
+
"config": {
|
|
791
|
+
"base_url": {"env": "API_HOST"},
|
|
792
|
+
"client_id": {"env": "API_SITE_CLIENT_ID"},
|
|
793
|
+
"client_secret": {"env": "API_SITE_CLIENT_SECRET"},
|
|
794
|
+
"site_id": {"env": "API_SITE_ID"},
|
|
795
|
+
},
|
|
796
|
+
},
|
|
797
|
+
},
|
|
798
|
+
),
|
|
799
|
+
"ops": {
|
|
800
|
+
"export_json_to_drs": {"config": {"username": "..."}},
|
|
801
|
+
"get_neon_pipeline_inputs": {
|
|
802
|
+
"inputs": {
|
|
803
|
+
"neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
|
|
804
|
+
"neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
|
|
805
|
+
}
|
|
806
|
+
},
|
|
807
|
+
"get_neon_pipeline_surface_water_data_product": {
|
|
808
|
+
"config": {
|
|
809
|
+
"surface_water_data_product": {
|
|
810
|
+
"product_id": "DP1.20281.001",
|
|
811
|
+
"product_tables": "mms_swMetagenomeSequencing, mms_swMetagenomeDnaExtraction, amc_fieldGenetic, amc_fieldSuperParent",
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
},
|
|
815
|
+
},
|
|
816
|
+
},
|
|
817
|
+
),
|
|
818
|
+
ingest_neon_surface_water_metadata.to_job(
|
|
819
|
+
description="",
|
|
820
|
+
resource_defs=resource_defs,
|
|
821
|
+
config={
|
|
822
|
+
"resources": merge(
|
|
823
|
+
unfreeze(normal_resources),
|
|
824
|
+
{
|
|
825
|
+
"neon_api_client": {
|
|
826
|
+
"config": {
|
|
827
|
+
"base_url": {"env": "NEON_API_BASE_URL"},
|
|
828
|
+
"api_token": {"env": "NEON_API_TOKEN"},
|
|
829
|
+
},
|
|
830
|
+
}
|
|
831
|
+
},
|
|
832
|
+
),
|
|
833
|
+
"ops": {
|
|
834
|
+
"get_neon_pipeline_surface_water_data_product": {
|
|
835
|
+
"config": {
|
|
836
|
+
"surface_water_data_product": {
|
|
837
|
+
"product_id": "DP1.20281.001",
|
|
838
|
+
"product_tables": "mms_swMetagenomeSequencing, mms_swMetagenomeDnaExtraction, amc_fieldGenetic, amc_fieldSuperParent",
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
},
|
|
842
|
+
"get_neon_pipeline_inputs": {
|
|
843
|
+
"inputs": {
|
|
844
|
+
"neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
|
|
845
|
+
"neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
|
|
846
|
+
}
|
|
847
|
+
},
|
|
848
|
+
},
|
|
849
|
+
},
|
|
850
|
+
),
|
|
851
|
+
]
|
|
852
|
+
|
|
853
|
+
|
|
854
|
+
@repository
|
|
855
|
+
def biosample_export():
|
|
856
|
+
normal_resources = run_config_frozen__normal_env["resources"]
|
|
857
|
+
return [
|
|
858
|
+
nmdc_study_to_ncbi_submission_export.to_job(
|
|
859
|
+
resource_defs=resource_defs,
|
|
860
|
+
config={
|
|
861
|
+
"resources": merge(
|
|
862
|
+
unfreeze(normal_resources),
|
|
863
|
+
{
|
|
864
|
+
"mongo": {
|
|
865
|
+
"config": {
|
|
866
|
+
"host": {"env": "MONGO_HOST"},
|
|
867
|
+
"username": {"env": "MONGO_USERNAME"},
|
|
868
|
+
"password": {"env": "MONGO_PASSWORD"},
|
|
869
|
+
"dbname": {"env": "MONGO_DBNAME"},
|
|
870
|
+
},
|
|
871
|
+
},
|
|
872
|
+
"runtime_api_site_client": {
|
|
873
|
+
"config": {
|
|
874
|
+
"base_url": {"env": "API_HOST"},
|
|
875
|
+
"client_id": {"env": "API_SITE_CLIENT_ID"},
|
|
876
|
+
"client_secret": {"env": "API_SITE_CLIENT_SECRET"},
|
|
877
|
+
"site_id": {"env": "API_SITE_ID"},
|
|
878
|
+
},
|
|
879
|
+
},
|
|
880
|
+
},
|
|
881
|
+
),
|
|
882
|
+
"ops": {
|
|
883
|
+
"get_ncbi_export_pipeline_study": {
|
|
884
|
+
"config": {
|
|
885
|
+
"nmdc_study_id": "",
|
|
886
|
+
}
|
|
887
|
+
},
|
|
888
|
+
"get_ncbi_export_pipeline_inputs": {
|
|
889
|
+
"config": {
|
|
890
|
+
"nmdc_ncbi_attribute_mapping_file_url": "",
|
|
891
|
+
"ncbi_submission_metadata": {
|
|
892
|
+
"organization": "",
|
|
893
|
+
},
|
|
894
|
+
"ncbi_biosample_metadata": {
|
|
895
|
+
"organism_name": "",
|
|
896
|
+
},
|
|
897
|
+
}
|
|
898
|
+
},
|
|
899
|
+
},
|
|
900
|
+
},
|
|
901
|
+
),
|
|
767
902
|
]
|
|
768
903
|
|
|
769
904
|
|