nmdc-runtime 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nmdc-runtime might be problematic. Click here for more details.
- nmdc_runtime/site/export/ncbi_xml.py +433 -0
- nmdc_runtime/site/export/ncbi_xml_utils.py +206 -0
- nmdc_runtime/site/export/study_metadata.py +24 -4
- nmdc_runtime/site/graphs.py +24 -12
- nmdc_runtime/site/ops.py +120 -44
- nmdc_runtime/site/repository.py +56 -6
- nmdc_runtime/site/resources.py +30 -40
- nmdc_runtime/site/translation/submission_portal_translator.py +16 -9
- nmdc_runtime/util.py +1 -1
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.7.0.dist-info}/METADATA +4 -7
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.7.0.dist-info}/RECORD +15 -17
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.7.0.dist-info}/WHEEL +1 -1
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.7.0.dist-info}/entry_points.txt +0 -1
- nmdc_runtime/site/terminusdb/__init__.py +0 -0
- nmdc_runtime/site/terminusdb/generate.py +0 -198
- nmdc_runtime/site/terminusdb/ingest.py +0 -44
- nmdc_runtime/site/terminusdb/schema.py +0 -1671
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.7.0.dist-info}/LICENSE +0 -0
- {nmdc_runtime-1.6.0.dist-info → nmdc_runtime-1.7.0.dist-info}/top_level.txt +0 -0
nmdc_runtime/site/graphs.py
CHANGED
|
@@ -22,7 +22,6 @@ from nmdc_runtime.site.ops import (
|
|
|
22
22
|
hello,
|
|
23
23
|
mongo_stats,
|
|
24
24
|
submit_metadata_to_db,
|
|
25
|
-
update_schema,
|
|
26
25
|
filter_ops_undone_expired,
|
|
27
26
|
construct_jobs,
|
|
28
27
|
maybe_post_jobs,
|
|
@@ -49,7 +48,14 @@ from nmdc_runtime.site.ops import (
|
|
|
49
48
|
get_neon_pipeline_inputs,
|
|
50
49
|
get_df_from_url,
|
|
51
50
|
site_code_mapping,
|
|
51
|
+
get_ncbi_export_pipeline_study,
|
|
52
|
+
get_data_objects_from_biosamples,
|
|
53
|
+
get_omics_processing_from_biosamples,
|
|
54
|
+
get_ncbi_export_pipeline_inputs,
|
|
55
|
+
ncbi_submission_xml_from_nmdc_study,
|
|
56
|
+
ncbi_submission_xml_asset,
|
|
52
57
|
)
|
|
58
|
+
from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id
|
|
53
59
|
|
|
54
60
|
|
|
55
61
|
@graph
|
|
@@ -87,17 +93,6 @@ def hello_mongo():
|
|
|
87
93
|
mongo_stats()
|
|
88
94
|
|
|
89
95
|
|
|
90
|
-
@graph
|
|
91
|
-
def update_terminus():
|
|
92
|
-
"""
|
|
93
|
-
A pipeline definition. This example pipeline has a single solid.
|
|
94
|
-
|
|
95
|
-
For more hints on writing Dagster pipelines, see our documentation overview on Pipelines:
|
|
96
|
-
https://docs.dagster.io/overview/solids-pipelines/pipelines
|
|
97
|
-
"""
|
|
98
|
-
update_schema()
|
|
99
|
-
|
|
100
|
-
|
|
101
96
|
@graph
|
|
102
97
|
def housekeeping():
|
|
103
98
|
delete_operations(list_operations(filter_ops_undone_expired()))
|
|
@@ -381,3 +376,20 @@ def ingest_neon_surface_water_metadata():
|
|
|
381
376
|
)
|
|
382
377
|
run_id = submit_metadata_to_db(database)
|
|
383
378
|
poll_for_run_completion(run_id)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
@graph
|
|
382
|
+
def nmdc_study_to_ncbi_submission_export():
|
|
383
|
+
nmdc_study = get_ncbi_export_pipeline_study()
|
|
384
|
+
ncbi_submission_metadata = get_ncbi_export_pipeline_inputs()
|
|
385
|
+
biosamples = get_biosamples_by_study_id(nmdc_study)
|
|
386
|
+
omics_processing_records = get_omics_processing_from_biosamples(biosamples)
|
|
387
|
+
data_objects = get_data_objects_from_biosamples(biosamples)
|
|
388
|
+
xml_data = ncbi_submission_xml_from_nmdc_study(
|
|
389
|
+
nmdc_study,
|
|
390
|
+
ncbi_submission_metadata,
|
|
391
|
+
biosamples,
|
|
392
|
+
omics_processing_records,
|
|
393
|
+
data_objects,
|
|
394
|
+
)
|
|
395
|
+
ncbi_submission_xml_asset(xml_data)
|
nmdc_runtime/site/ops.py
CHANGED
|
@@ -9,6 +9,7 @@ from datetime import datetime, timezone
|
|
|
9
9
|
from io import BytesIO, StringIO
|
|
10
10
|
from typing import Tuple
|
|
11
11
|
from zipfile import ZipFile
|
|
12
|
+
|
|
12
13
|
import pandas as pd
|
|
13
14
|
import requests
|
|
14
15
|
|
|
@@ -29,10 +30,14 @@ from dagster import (
|
|
|
29
30
|
String,
|
|
30
31
|
op,
|
|
31
32
|
Optional,
|
|
33
|
+
Field,
|
|
34
|
+
Permissive,
|
|
35
|
+
Bool,
|
|
32
36
|
)
|
|
33
37
|
from gridfs import GridFS
|
|
34
38
|
from linkml_runtime.dumpers import json_dumper
|
|
35
39
|
from linkml_runtime.utils.yamlutils import YAMLRoot
|
|
40
|
+
from nmdc_runtime.api.db.mongo import get_mongo_db
|
|
36
41
|
from nmdc_runtime.api.core.idgen import generate_one_id
|
|
37
42
|
from nmdc_runtime.api.core.metadata import (
|
|
38
43
|
_validate_changesheet,
|
|
@@ -42,6 +47,7 @@ from nmdc_runtime.api.core.metadata import (
|
|
|
42
47
|
)
|
|
43
48
|
from nmdc_runtime.api.core.util import dotted_path_for, hash_from_str, json_clean, now
|
|
44
49
|
from nmdc_runtime.api.endpoints.util import persist_content_and_get_drs_object
|
|
50
|
+
from nmdc_runtime.api.endpoints.find import find_study_by_id
|
|
45
51
|
from nmdc_runtime.api.models.job import Job, JobOperationMetadata
|
|
46
52
|
from nmdc_runtime.api.models.metadata import ChangesheetIn
|
|
47
53
|
from nmdc_runtime.api.models.operation import (
|
|
@@ -55,6 +61,11 @@ from nmdc_runtime.api.models.run import (
|
|
|
55
61
|
_add_run_complete_event,
|
|
56
62
|
)
|
|
57
63
|
from nmdc_runtime.api.models.util import ResultT
|
|
64
|
+
from nmdc_runtime.site.export.ncbi_xml import NCBISubmissionXML
|
|
65
|
+
from nmdc_runtime.site.export.ncbi_xml_utils import (
|
|
66
|
+
fetch_data_objects_from_biosamples,
|
|
67
|
+
fetch_omics_processing_from_biosamples,
|
|
68
|
+
)
|
|
58
69
|
from nmdc_runtime.site.drsobjects.ingest import mongo_add_docs_result_as_dict
|
|
59
70
|
from nmdc_runtime.site.resources import (
|
|
60
71
|
NmdcPortalApiClient,
|
|
@@ -86,7 +97,6 @@ from nmdc_schema import nmdc
|
|
|
86
97
|
from pydantic import BaseModel
|
|
87
98
|
from pymongo.database import Database as MongoDatabase
|
|
88
99
|
from starlette import status
|
|
89
|
-
from terminusdb_client.woqlquery import WOQLQuery as WQ
|
|
90
100
|
from toolz import assoc, dissoc, get_in, valfilter, identity
|
|
91
101
|
|
|
92
102
|
|
|
@@ -111,14 +121,6 @@ def log_env(context):
|
|
|
111
121
|
context.log.info("\n".join(out))
|
|
112
122
|
|
|
113
123
|
|
|
114
|
-
@op(required_resource_keys={"terminus"})
|
|
115
|
-
def list_databases(context) -> List[String]:
|
|
116
|
-
client = context.resources.terminus.client
|
|
117
|
-
list_ = client.list_databases()
|
|
118
|
-
context.log.info(f"databases: {list_}")
|
|
119
|
-
return list_
|
|
120
|
-
|
|
121
|
-
|
|
122
124
|
@op(required_resource_keys={"mongo"})
|
|
123
125
|
def mongo_stats(context) -> List[str]:
|
|
124
126
|
db = context.resources.mongo.db
|
|
@@ -127,41 +129,6 @@ def mongo_stats(context) -> List[str]:
|
|
|
127
129
|
return collection_names
|
|
128
130
|
|
|
129
131
|
|
|
130
|
-
@op(required_resource_keys={"terminus"})
|
|
131
|
-
def update_schema(context):
|
|
132
|
-
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
133
|
-
try:
|
|
134
|
-
context.log.info("shallow-cloning nmdc-schema repo")
|
|
135
|
-
subprocess.check_output(
|
|
136
|
-
"git clone https://github.com/microbiomedata/nmdc-schema.git"
|
|
137
|
-
f" --branch main --single-branch {tmpdirname}/nmdc-schema",
|
|
138
|
-
shell=True,
|
|
139
|
-
)
|
|
140
|
-
context.log.info("generating TerminusDB JSON-LD from NMDC LinkML")
|
|
141
|
-
subprocess.check_output(
|
|
142
|
-
f"gen-terminusdb {tmpdirname}/nmdc-schema/src/schema/nmdc.yaml"
|
|
143
|
-
f" > {tmpdirname}/nmdc.terminus.json",
|
|
144
|
-
shell=True,
|
|
145
|
-
)
|
|
146
|
-
except subprocess.CalledProcessError as e:
|
|
147
|
-
if e.stdout:
|
|
148
|
-
context.log.debug(e.stdout.decode())
|
|
149
|
-
if e.stderr:
|
|
150
|
-
context.log.error(e.stderr.decode())
|
|
151
|
-
context.log.debug(str(e.returncode))
|
|
152
|
-
raise e
|
|
153
|
-
|
|
154
|
-
with open(f"{tmpdirname}/nmdc.terminus.json") as f:
|
|
155
|
-
woql_dict = json.load(f)
|
|
156
|
-
|
|
157
|
-
context.log.info("Updating terminus schema via WOQLQuery")
|
|
158
|
-
rv = WQ(query=woql_dict).execute(
|
|
159
|
-
context.resources.terminus.client, "update schema via WOQL"
|
|
160
|
-
)
|
|
161
|
-
context.log.info(str(rv))
|
|
162
|
-
return rv
|
|
163
|
-
|
|
164
|
-
|
|
165
132
|
@op(
|
|
166
133
|
required_resource_keys={"mongo", "runtime_api_site_client"},
|
|
167
134
|
retry_policy=RetryPolicy(max_retries=2),
|
|
@@ -768,6 +735,33 @@ def export_json_to_drs(
|
|
|
768
735
|
return ["/objects/" + drs_object["id"]]
|
|
769
736
|
|
|
770
737
|
|
|
738
|
+
@op(
|
|
739
|
+
description="NCBI Submission XML file rendered in a Dagster Asset",
|
|
740
|
+
out=Out(description="XML content rendered through Dagit UI"),
|
|
741
|
+
)
|
|
742
|
+
def ncbi_submission_xml_asset(context: OpExecutionContext, data: str):
|
|
743
|
+
filename = "ncbi_submission.xml"
|
|
744
|
+
file_path = os.path.join(context.instance.storage_directory(), filename)
|
|
745
|
+
|
|
746
|
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
|
747
|
+
|
|
748
|
+
with open(file_path, "w") as f:
|
|
749
|
+
f.write(data)
|
|
750
|
+
|
|
751
|
+
context.log_event(
|
|
752
|
+
AssetMaterialization(
|
|
753
|
+
asset_key="ncbi_submission_xml",
|
|
754
|
+
description="NCBI Submission XML Data",
|
|
755
|
+
metadata={
|
|
756
|
+
"file_path": MetadataValue.path(file_path),
|
|
757
|
+
"xml": MetadataValue.text(data),
|
|
758
|
+
},
|
|
759
|
+
)
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
return Output(data)
|
|
763
|
+
|
|
764
|
+
|
|
771
765
|
def unique_field_values(docs: List[Dict[str, Any]], field: str):
|
|
772
766
|
return {doc[field] for doc in docs if field in doc}
|
|
773
767
|
|
|
@@ -977,3 +971,85 @@ def site_code_mapping() -> dict:
|
|
|
977
971
|
raise Exception(
|
|
978
972
|
f"Failed to fetch site data from {endpoint}. Status code: {response.status_code}, Content: {response.content}"
|
|
979
973
|
)
|
|
974
|
+
|
|
975
|
+
|
|
976
|
+
@op(config_schema={"nmdc_study_id": str}, required_resource_keys={"mongo"})
|
|
977
|
+
def get_ncbi_export_pipeline_study(context: OpExecutionContext) -> Any:
|
|
978
|
+
nmdc_study = find_study_by_id(
|
|
979
|
+
context.op_config["nmdc_study_id"], context.resources.mongo.db
|
|
980
|
+
)
|
|
981
|
+
return nmdc_study
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
@op(
|
|
985
|
+
config_schema={
|
|
986
|
+
"nmdc_ncbi_attribute_mapping_file_url": str,
|
|
987
|
+
"ncbi_submission_metadata": Field(
|
|
988
|
+
Permissive(
|
|
989
|
+
{
|
|
990
|
+
"organization": String,
|
|
991
|
+
}
|
|
992
|
+
),
|
|
993
|
+
is_required=True,
|
|
994
|
+
description="General metadata about the NCBI submission.",
|
|
995
|
+
),
|
|
996
|
+
"ncbi_biosample_metadata": Field(
|
|
997
|
+
Permissive(
|
|
998
|
+
{
|
|
999
|
+
"organism_name": String,
|
|
1000
|
+
}
|
|
1001
|
+
),
|
|
1002
|
+
is_required=True,
|
|
1003
|
+
description="Metadata for one or many NCBI BioSample in the Submission.",
|
|
1004
|
+
),
|
|
1005
|
+
},
|
|
1006
|
+
out=Out(Dict),
|
|
1007
|
+
)
|
|
1008
|
+
def get_ncbi_export_pipeline_inputs(context: OpExecutionContext) -> str:
|
|
1009
|
+
nmdc_ncbi_attribute_mapping_file_url = context.op_config[
|
|
1010
|
+
"nmdc_ncbi_attribute_mapping_file_url"
|
|
1011
|
+
]
|
|
1012
|
+
ncbi_submission_metadata = context.op_config.get("ncbi_submission_metadata", {})
|
|
1013
|
+
ncbi_biosample_metadata = context.op_config.get("ncbi_biosample_metadata", {})
|
|
1014
|
+
|
|
1015
|
+
return {
|
|
1016
|
+
"nmdc_ncbi_attribute_mapping_file_url": nmdc_ncbi_attribute_mapping_file_url,
|
|
1017
|
+
"ncbi_submission_metadata": ncbi_submission_metadata,
|
|
1018
|
+
"ncbi_biosample_metadata": ncbi_biosample_metadata,
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
@op(required_resource_keys={"mongo"})
|
|
1023
|
+
def get_data_objects_from_biosamples(context: OpExecutionContext, biosamples: list):
|
|
1024
|
+
mdb = context.resources.mongo.db
|
|
1025
|
+
alldocs_collection = mdb["alldocs"]
|
|
1026
|
+
biosample_data_objects = fetch_data_objects_from_biosamples(
|
|
1027
|
+
alldocs_collection, biosamples
|
|
1028
|
+
)
|
|
1029
|
+
return biosample_data_objects
|
|
1030
|
+
|
|
1031
|
+
|
|
1032
|
+
@op(required_resource_keys={"mongo"})
|
|
1033
|
+
def get_omics_processing_from_biosamples(context: OpExecutionContext, biosamples: list):
|
|
1034
|
+
mdb = context.resources.mongo.db
|
|
1035
|
+
alldocs_collection = mdb["alldocs"]
|
|
1036
|
+
biosample_omics_processing = fetch_omics_processing_from_biosamples(
|
|
1037
|
+
alldocs_collection, biosamples
|
|
1038
|
+
)
|
|
1039
|
+
return biosample_omics_processing
|
|
1040
|
+
|
|
1041
|
+
|
|
1042
|
+
@op
|
|
1043
|
+
def ncbi_submission_xml_from_nmdc_study(
|
|
1044
|
+
context: OpExecutionContext,
|
|
1045
|
+
nmdc_study: Any,
|
|
1046
|
+
ncbi_exporter_metadata: dict,
|
|
1047
|
+
biosamples: list,
|
|
1048
|
+
omics_processing_records: list,
|
|
1049
|
+
data_objects: list,
|
|
1050
|
+
) -> str:
|
|
1051
|
+
ncbi_exporter = NCBISubmissionXML(nmdc_study, ncbi_exporter_metadata)
|
|
1052
|
+
ncbi_xml = ncbi_exporter.get_submission_xml(
|
|
1053
|
+
biosamples, omics_processing_records, data_objects
|
|
1054
|
+
)
|
|
1055
|
+
return ncbi_xml
|
nmdc_runtime/site/repository.py
CHANGED
|
@@ -42,6 +42,7 @@ from nmdc_runtime.site.graphs import (
|
|
|
42
42
|
ingest_neon_soil_metadata,
|
|
43
43
|
ingest_neon_benthic_metadata,
|
|
44
44
|
ingest_neon_surface_water_metadata,
|
|
45
|
+
nmdc_study_to_ncbi_submission_export,
|
|
45
46
|
)
|
|
46
47
|
from nmdc_runtime.site.resources import (
|
|
47
48
|
get_mongo,
|
|
@@ -50,7 +51,6 @@ from nmdc_runtime.site.resources import (
|
|
|
50
51
|
nmdc_portal_api_client_resource,
|
|
51
52
|
gold_api_client_resource,
|
|
52
53
|
neon_api_client_resource,
|
|
53
|
-
terminus_resource,
|
|
54
54
|
mongo_resource,
|
|
55
55
|
)
|
|
56
56
|
from nmdc_runtime.site.resources import (
|
|
@@ -68,7 +68,6 @@ resource_defs = {
|
|
|
68
68
|
"nmdc_portal_api_client": nmdc_portal_api_client_resource,
|
|
69
69
|
"gold_api_client": gold_api_client_resource,
|
|
70
70
|
"neon_api_client": neon_api_client_resource,
|
|
71
|
-
"terminus": terminus_resource,
|
|
72
71
|
"mongo": mongo_resource,
|
|
73
72
|
}
|
|
74
73
|
|
|
@@ -515,8 +514,8 @@ def biosample_submission_ingest():
|
|
|
515
514
|
"nmdc_portal_api_client": {
|
|
516
515
|
"config": {
|
|
517
516
|
"base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
|
|
518
|
-
"
|
|
519
|
-
"env": "
|
|
517
|
+
"refresh_token": {
|
|
518
|
+
"env": "NMDC_PORTAL_API_REFRESH_TOKEN"
|
|
520
519
|
},
|
|
521
520
|
}
|
|
522
521
|
}
|
|
@@ -555,8 +554,8 @@ def biosample_submission_ingest():
|
|
|
555
554
|
"nmdc_portal_api_client": {
|
|
556
555
|
"config": {
|
|
557
556
|
"base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
|
|
558
|
-
"
|
|
559
|
-
"env": "
|
|
557
|
+
"refresh_token": {
|
|
558
|
+
"env": "NMDC_PORTAL_API_REFRESH_TOKEN"
|
|
560
559
|
},
|
|
561
560
|
}
|
|
562
561
|
}
|
|
@@ -852,6 +851,57 @@ def biosample_submission_ingest():
|
|
|
852
851
|
]
|
|
853
852
|
|
|
854
853
|
|
|
854
|
+
@repository
|
|
855
|
+
def biosample_export():
|
|
856
|
+
normal_resources = run_config_frozen__normal_env["resources"]
|
|
857
|
+
return [
|
|
858
|
+
nmdc_study_to_ncbi_submission_export.to_job(
|
|
859
|
+
resource_defs=resource_defs,
|
|
860
|
+
config={
|
|
861
|
+
"resources": merge(
|
|
862
|
+
unfreeze(normal_resources),
|
|
863
|
+
{
|
|
864
|
+
"mongo": {
|
|
865
|
+
"config": {
|
|
866
|
+
"host": {"env": "MONGO_HOST"},
|
|
867
|
+
"username": {"env": "MONGO_USERNAME"},
|
|
868
|
+
"password": {"env": "MONGO_PASSWORD"},
|
|
869
|
+
"dbname": {"env": "MONGO_DBNAME"},
|
|
870
|
+
},
|
|
871
|
+
},
|
|
872
|
+
"runtime_api_site_client": {
|
|
873
|
+
"config": {
|
|
874
|
+
"base_url": {"env": "API_HOST"},
|
|
875
|
+
"client_id": {"env": "API_SITE_CLIENT_ID"},
|
|
876
|
+
"client_secret": {"env": "API_SITE_CLIENT_SECRET"},
|
|
877
|
+
"site_id": {"env": "API_SITE_ID"},
|
|
878
|
+
},
|
|
879
|
+
},
|
|
880
|
+
},
|
|
881
|
+
),
|
|
882
|
+
"ops": {
|
|
883
|
+
"get_ncbi_export_pipeline_study": {
|
|
884
|
+
"config": {
|
|
885
|
+
"nmdc_study_id": "",
|
|
886
|
+
}
|
|
887
|
+
},
|
|
888
|
+
"get_ncbi_export_pipeline_inputs": {
|
|
889
|
+
"config": {
|
|
890
|
+
"nmdc_ncbi_attribute_mapping_file_url": "",
|
|
891
|
+
"ncbi_submission_metadata": {
|
|
892
|
+
"organization": "",
|
|
893
|
+
},
|
|
894
|
+
"ncbi_biosample_metadata": {
|
|
895
|
+
"organism_name": "",
|
|
896
|
+
},
|
|
897
|
+
}
|
|
898
|
+
},
|
|
899
|
+
},
|
|
900
|
+
},
|
|
901
|
+
),
|
|
902
|
+
]
|
|
903
|
+
|
|
904
|
+
|
|
855
905
|
# @repository
|
|
856
906
|
# def validation():
|
|
857
907
|
# graph_jobs = [validate_jgi_job, validate_gold_job, validate_emsl_job]
|
nmdc_runtime/site/resources.py
CHANGED
|
@@ -19,7 +19,6 @@ from frozendict import frozendict
|
|
|
19
19
|
from linkml_runtime.dumpers import json_dumper
|
|
20
20
|
from pydantic import BaseModel, AnyUrl
|
|
21
21
|
from pymongo import MongoClient, ReplaceOne, InsertOne
|
|
22
|
-
from terminusdb_client import WOQLClient
|
|
23
22
|
from toolz import get_in
|
|
24
23
|
from toolz import merge
|
|
25
24
|
|
|
@@ -372,16 +371,37 @@ def gold_api_client_resource(context: InitResourceContext):
|
|
|
372
371
|
|
|
373
372
|
@dataclass
|
|
374
373
|
class NmdcPortalApiClient:
|
|
374
|
+
|
|
375
375
|
base_url: str
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
376
|
+
refresh_token: str
|
|
377
|
+
access_token: Optional[str] = None
|
|
378
|
+
access_token_expires_at: Optional[datetime] = None
|
|
379
|
+
|
|
380
|
+
def _request(self, method: str, endpoint: str, **kwargs):
|
|
381
|
+
r"""
|
|
382
|
+
Submits a request to the specified API endpoint;
|
|
383
|
+
after refreshing the access token, if necessary.
|
|
384
|
+
"""
|
|
385
|
+
if self.access_token is None or datetime.now() > self.access_token_expires_at:
|
|
386
|
+
refresh_response = requests.post(
|
|
387
|
+
f"{self.base_url}/auth/refresh",
|
|
388
|
+
json={"refresh_token": self.refresh_token},
|
|
389
|
+
)
|
|
390
|
+
refresh_response.raise_for_status()
|
|
391
|
+
refresh_body = refresh_response.json()
|
|
392
|
+
self.access_token_expires_at = datetime.now() + timedelta(
|
|
393
|
+
seconds=refresh_body["expires_in"]
|
|
394
|
+
)
|
|
395
|
+
self.access_token = refresh_body["access_token"]
|
|
379
396
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
397
|
+
headers = kwargs.get("headers", {})
|
|
398
|
+
headers["Authorization"] = f"Bearer {self.access_token}"
|
|
399
|
+
return requests.request(
|
|
400
|
+
method, f"{self.base_url}{endpoint}", **kwargs, headers=headers
|
|
384
401
|
)
|
|
402
|
+
|
|
403
|
+
def fetch_metadata_submission(self, id: str) -> Dict[str, Any]:
|
|
404
|
+
response = self._request("GET", f"/api/metadata_submission/{id}")
|
|
385
405
|
response.raise_for_status()
|
|
386
406
|
return response.json()
|
|
387
407
|
|
|
@@ -389,13 +409,13 @@ class NmdcPortalApiClient:
|
|
|
389
409
|
@resource(
|
|
390
410
|
config_schema={
|
|
391
411
|
"base_url": StringSource,
|
|
392
|
-
"
|
|
412
|
+
"refresh_token": StringSource,
|
|
393
413
|
}
|
|
394
414
|
)
|
|
395
415
|
def nmdc_portal_api_client_resource(context: InitResourceContext):
|
|
396
416
|
return NmdcPortalApiClient(
|
|
397
417
|
base_url=context.resource_config["base_url"],
|
|
398
|
-
|
|
418
|
+
refresh_token=context.resource_config["refresh_token"],
|
|
399
419
|
)
|
|
400
420
|
|
|
401
421
|
|
|
@@ -512,33 +532,3 @@ def get_mongo(run_config: frozendict):
|
|
|
512
532
|
)
|
|
513
533
|
)
|
|
514
534
|
return mongo_resource(resource_context)
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
class TerminusDB:
|
|
518
|
-
def __init__(self, server_url, user, key, account, dbid):
|
|
519
|
-
self.client = WOQLClient(server_url=server_url)
|
|
520
|
-
self.client.connect(user=user, key=key, account=account)
|
|
521
|
-
db_info = self.client.get_database(dbid=dbid, account=account)
|
|
522
|
-
if db_info is None:
|
|
523
|
-
self.client.create_database(dbid=dbid, accountid=account, label=dbid)
|
|
524
|
-
self.client.create_graph(graph_type="inference", graph_id="main")
|
|
525
|
-
self.client.connect(user=user, key=key, account=account, db=dbid)
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
@resource(
|
|
529
|
-
config_schema={
|
|
530
|
-
"server_url": StringSource,
|
|
531
|
-
"user": StringSource,
|
|
532
|
-
"key": StringSource,
|
|
533
|
-
"account": StringSource,
|
|
534
|
-
"dbid": StringSource,
|
|
535
|
-
}
|
|
536
|
-
)
|
|
537
|
-
def terminus_resource(context):
|
|
538
|
-
return TerminusDB(
|
|
539
|
-
server_url=context.resource_config["server_url"],
|
|
540
|
-
user=context.resource_config["user"],
|
|
541
|
-
key=context.resource_config["key"],
|
|
542
|
-
account=context.resource_config["account"],
|
|
543
|
-
dbid=context.resource_config["dbid"],
|
|
544
|
-
)
|
|
@@ -13,6 +13,9 @@ from toolz import get_in, groupby, concat, valmap, dissoc
|
|
|
13
13
|
from nmdc_runtime.site.translation.translator import JSON_OBJECT, Translator
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
BIOSAMPLE_UNIQUE_KEY_SLOT = "samp_name"
|
|
17
|
+
|
|
18
|
+
|
|
16
19
|
@lru_cache
|
|
17
20
|
def _get_schema_view():
|
|
18
21
|
"""Return a SchemaView instance representing the NMDC schema"""
|
|
@@ -98,7 +101,9 @@ class SubmissionPortalTranslator(Translator):
|
|
|
98
101
|
self.study_pi_image_url = study_pi_image_url
|
|
99
102
|
self.study_funding_sources = study_funding_sources
|
|
100
103
|
|
|
101
|
-
self.biosample_extras = group_dicts_by_key(
|
|
104
|
+
self.biosample_extras = group_dicts_by_key(
|
|
105
|
+
BIOSAMPLE_UNIQUE_KEY_SLOT, biosample_extras
|
|
106
|
+
)
|
|
102
107
|
self.biosample_extras_slot_mapping = group_dicts_by_key(
|
|
103
108
|
"subject_id", biosample_extras_slot_mapping
|
|
104
109
|
)
|
|
@@ -521,7 +526,7 @@ class SubmissionPortalTranslator(Translator):
|
|
|
521
526
|
:param default_env_package: Default value for `env_package` slot
|
|
522
527
|
:return: nmdc:Biosample
|
|
523
528
|
"""
|
|
524
|
-
|
|
529
|
+
biosample_key = sample_data[0].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip()
|
|
525
530
|
slots = {
|
|
526
531
|
"id": nmdc_biosample_id,
|
|
527
532
|
"part_of": nmdc_study_id,
|
|
@@ -533,7 +538,7 @@ class SubmissionPortalTranslator(Translator):
|
|
|
533
538
|
slots.update(transformed_tab)
|
|
534
539
|
|
|
535
540
|
if self.biosample_extras:
|
|
536
|
-
raw_extras = self.biosample_extras.get(
|
|
541
|
+
raw_extras = self.biosample_extras.get(biosample_key)
|
|
537
542
|
if raw_extras:
|
|
538
543
|
transformed_extras = self._transform_dict_for_class(
|
|
539
544
|
raw_extras, "Biosample", self.biosample_extras_slot_mapping
|
|
@@ -564,7 +569,9 @@ class SubmissionPortalTranslator(Translator):
|
|
|
564
569
|
|
|
565
570
|
sample_data = metadata_submission_data.get("sampleData", {})
|
|
566
571
|
package_name = metadata_submission_data["packageName"]
|
|
567
|
-
sample_data_by_id = groupby(
|
|
572
|
+
sample_data_by_id = groupby(
|
|
573
|
+
BIOSAMPLE_UNIQUE_KEY_SLOT, concat(sample_data.values())
|
|
574
|
+
)
|
|
568
575
|
nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(sample_data_by_id))
|
|
569
576
|
sample_data_to_nmdc_biosample_ids = dict(
|
|
570
577
|
zip(sample_data_by_id.keys(), nmdc_biosample_ids)
|
|
@@ -583,15 +590,15 @@ class SubmissionPortalTranslator(Translator):
|
|
|
583
590
|
|
|
584
591
|
if self.omics_processing_mapping:
|
|
585
592
|
# If there is data from an OmicsProcessing mapping file, process it now. This part
|
|
586
|
-
# assumes that there is a column in that file with the header
|
|
593
|
+
# assumes that there is a column in that file with the header __biosample_samp_name
|
|
587
594
|
# that can be used to join with the sample data from the submission portal. The
|
|
588
|
-
# biosample identified by that `
|
|
595
|
+
# biosample identified by that `samp_name` will be referenced in the `has_input`
|
|
589
596
|
# slot of the OmicsProcessing object. If a DataObject mapping file was also provided,
|
|
590
597
|
# those objects will also be generated and referenced in the `has_output` slot of the
|
|
591
|
-
# OmicsProcessing object. By keying off of the `
|
|
598
|
+
# OmicsProcessing object. By keying off of the `samp_name` slot of the submission's
|
|
592
599
|
# sample data there is an implicit 1:1 relationship between Biosample objects and
|
|
593
600
|
# OmicsProcessing objects generated here.
|
|
594
|
-
join_key = "
|
|
601
|
+
join_key = f"__biosample_{BIOSAMPLE_UNIQUE_KEY_SLOT}"
|
|
595
602
|
database.omics_processing_set = []
|
|
596
603
|
database.data_object_set = []
|
|
597
604
|
data_objects_by_sample_data_id = {}
|
|
@@ -617,7 +624,7 @@ class SubmissionPortalTranslator(Translator):
|
|
|
617
624
|
or sample_data_id not in sample_data_to_nmdc_biosample_ids
|
|
618
625
|
):
|
|
619
626
|
logging.warning(
|
|
620
|
-
f"Unrecognized biosample
|
|
627
|
+
f"Unrecognized biosample {BIOSAMPLE_UNIQUE_KEY_SLOT}: {sample_data_id}"
|
|
621
628
|
)
|
|
622
629
|
continue
|
|
623
630
|
nmdc_biosample_id = sample_data_to_nmdc_biosample_ids[sample_data_id]
|
nmdc_runtime/util.py
CHANGED
|
@@ -16,7 +16,7 @@ import fastjsonschema
|
|
|
16
16
|
import requests
|
|
17
17
|
from frozendict import frozendict
|
|
18
18
|
from jsonschema.validators import Draft7Validator
|
|
19
|
-
from nmdc_schema.
|
|
19
|
+
from nmdc_schema.nmdc import Database as NMDCDatabase
|
|
20
20
|
from nmdc_schema.get_nmdc_view import ViewGetter
|
|
21
21
|
from pydantic import Field, BaseModel
|
|
22
22
|
from pymongo.database import Database as MongoDatabase
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: nmdc_runtime
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: A runtime system for NMDC data management and orchestration
|
|
5
5
|
Home-page: https://github.com/microbiomedata/nmdc-runtime
|
|
6
6
|
Author: Donny Winston
|
|
@@ -77,18 +77,15 @@ The runtime features:
|
|
|
77
77
|
- `schedules` trigger recurring pipeline runs based on time
|
|
78
78
|
- `sensors` trigger pipeline runs based on external state
|
|
79
79
|
- Each `pipeline` can declare dependencies on any runtime `resources` or additional
|
|
80
|
-
configuration. There are
|
|
80
|
+
configuration. There are MongoDB `resources` defined, as well as `preset`
|
|
81
81
|
configuration definitions for both "dev" and "prod" `modes`. The `preset`s tell Dagster to
|
|
82
82
|
look to a set of known environment variables to load resources configurations, depending on
|
|
83
83
|
the `mode`.
|
|
84
|
-
|
|
85
|
-
2. A [TerminusDB](https://terminusdb.com/) database supporting revision control of schema-validated
|
|
86
|
-
data.
|
|
87
84
|
|
|
88
|
-
|
|
85
|
+
2. A MongoDB database supporting write-once, high-throughput internal
|
|
89
86
|
data storage by the nmdc-runtime FastAPI instance.
|
|
90
87
|
|
|
91
|
-
|
|
88
|
+
3. A [FastAPI](https://fastapi.tiangolo.com/) service to interface with the orchestrator and
|
|
92
89
|
database, as a hub for data management and workflow automation.
|
|
93
90
|
|
|
94
91
|
## Local Development
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
|
|
3
3
|
nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
nmdc_runtime/util.py,sha256=
|
|
4
|
+
nmdc_runtime/util.py,sha256=3mHVEUdMOv73XgT6NTuzMuMCL5Gs6NJ4Mk0bkgQQaQU,19844
|
|
5
5
|
nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
|
|
@@ -35,10 +35,10 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
|
|
|
35
35
|
nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
36
|
nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
|
|
37
37
|
nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
nmdc_runtime/site/graphs.py,sha256=
|
|
39
|
-
nmdc_runtime/site/ops.py,sha256=
|
|
40
|
-
nmdc_runtime/site/repository.py,sha256
|
|
41
|
-
nmdc_runtime/site/resources.py,sha256=
|
|
38
|
+
nmdc_runtime/site/graphs.py,sha256=_vCyQnICis4OQGH91i1ZwpvHYcXOG6Nfg04f5DVdy2M,12040
|
|
39
|
+
nmdc_runtime/site/ops.py,sha256=G6X3YgSmDNxOnsMEByLUMfB0peY4o21o0_Ig3V7v6M4,35835
|
|
40
|
+
nmdc_runtime/site/repository.py,sha256=-dOk9BEnLSrmAN6bZoIu_WnFSqriIpO0c5P76PuHW1M,37472
|
|
41
|
+
nmdc_runtime/site/resources.py,sha256=ZSH1yvA-li0R7Abc22_v0XLbjBYf5igETr2G01J3hnc,17557
|
|
42
42
|
nmdc_runtime/site/util.py,sha256=6hyVPpb6ZkWEG8Nm7uQxnZ-QmuPOG9hgWvl0mUBr5JU,1303
|
|
43
43
|
nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
|
|
@@ -50,13 +50,11 @@ nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
50
50
|
nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
|
|
51
51
|
nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
|
|
52
52
|
nmdc_runtime/site/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
-
nmdc_runtime/site/export/
|
|
53
|
+
nmdc_runtime/site/export/ncbi_xml.py,sha256=Z2qsaGIBvY2OdOkf8kJEZl1T_8R_YzhAlXxJ1gMQwnk,16946
|
|
54
|
+
nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=CqrtjwzmUbZXEW8aD-KpnCV_PlXVH-Gqp309nw3vbeo,6464
|
|
55
|
+
nmdc_runtime/site/export/study_metadata.py,sha256=WRU0F1ksWfNX3k9LD91Pn2DuLA-IOpGvYPJd6DnguEs,4819
|
|
54
56
|
nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
57
|
nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
|
|
56
|
-
nmdc_runtime/site/terminusdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
|
-
nmdc_runtime/site/terminusdb/generate.py,sha256=Z3c06LDx3TGw4pvPRO97caQvzc8SuhGmPIr_d5b_E9I,6144
|
|
58
|
-
nmdc_runtime/site/terminusdb/ingest.py,sha256=WE_V4vRRnlL6hIBU1TDSUheYOBWS9d5g6FHPS64jzvM,1245
|
|
59
|
-
nmdc_runtime/site/terminusdb/schema.py,sha256=3e39rHUSZsNbN_F0SHHNsvcEGRWtYa6O9KNj3cH3tUs,77129
|
|
60
58
|
nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
59
|
nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
|
|
62
60
|
nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
|
|
@@ -66,7 +64,7 @@ nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=e_7tXFrP0PpdhqUC
|
|
|
66
64
|
nmdc_runtime/site/translation/neon_soil_translator.py,sha256=cJJ_QPva5G5SIT_7DjCSsqbDvgbiKGqUYrxK3nx7_Lw,37634
|
|
67
65
|
nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=6LaFwBnVx6TN9v1D-G6LFrDxY0TK05AvMklx0E1tTeQ,26590
|
|
68
66
|
nmdc_runtime/site/translation/neon_utils.py,sha256=mdxJVPb3zbD4DiKW3Fwgk22kjczKMwkcozvy7fwteTE,5203
|
|
69
|
-
nmdc_runtime/site/translation/submission_portal_translator.py,sha256=
|
|
67
|
+
nmdc_runtime/site/translation/submission_portal_translator.py,sha256=KiVO1vohhrJGfwzLJOumRfyHjcbYfswBIBvkYIdFxv8,28097
|
|
70
68
|
nmdc_runtime/site/translation/translator.py,sha256=xM9dM-nTgSWwu5HFoUVNHf8kqk9iiH4PgWdSx4OKxEk,601
|
|
71
69
|
nmdc_runtime/site/translation/util.py,sha256=w_l3SiExGsl6cXRqto0a_ssDmHkP64ITvrOVfPxmNpY,4366
|
|
72
70
|
nmdc_runtime/site/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -74,9 +72,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
|
|
|
74
72
|
nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
|
|
75
73
|
nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
|
|
76
74
|
nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
|
|
77
|
-
nmdc_runtime-1.
|
|
78
|
-
nmdc_runtime-1.
|
|
79
|
-
nmdc_runtime-1.
|
|
80
|
-
nmdc_runtime-1.
|
|
81
|
-
nmdc_runtime-1.
|
|
82
|
-
nmdc_runtime-1.
|
|
75
|
+
nmdc_runtime-1.7.0.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
|
|
76
|
+
nmdc_runtime-1.7.0.dist-info/METADATA,sha256=FnoXHNgR6o5PEe6XhqRGdqOjbIX_ry-SKY5uMtZJQXY,7302
|
|
77
|
+
nmdc_runtime-1.7.0.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
|
78
|
+
nmdc_runtime-1.7.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
|
|
79
|
+
nmdc_runtime-1.7.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
|
|
80
|
+
nmdc_runtime-1.7.0.dist-info/RECORD,,
|