nmdc-runtime 1.5.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

@@ -22,7 +22,6 @@ from nmdc_runtime.site.ops import (
22
22
  hello,
23
23
  mongo_stats,
24
24
  submit_metadata_to_db,
25
- update_schema,
26
25
  filter_ops_undone_expired,
27
26
  construct_jobs,
28
27
  maybe_post_jobs,
@@ -38,16 +37,25 @@ from nmdc_runtime.site.ops import (
38
37
  neon_data_by_product,
39
38
  nmdc_schema_database_from_neon_soil_data,
40
39
  nmdc_schema_database_from_neon_benthic_data,
40
+ nmdc_schema_database_from_neon_surface_water_data,
41
41
  nmdc_schema_database_export_filename_neon,
42
42
  get_neon_pipeline_mms_data_product,
43
43
  get_neon_pipeline_sls_data_product,
44
+ get_neon_pipeline_surface_water_data_product,
44
45
  get_submission_portal_pipeline_inputs,
45
46
  get_csv_rows_from_url,
46
47
  get_neon_pipeline_benthic_data_product,
47
48
  get_neon_pipeline_inputs,
48
49
  get_df_from_url,
49
50
  site_code_mapping,
51
+ get_ncbi_export_pipeline_study,
52
+ get_data_objects_from_biosamples,
53
+ get_omics_processing_from_biosamples,
54
+ get_ncbi_export_pipeline_inputs,
55
+ ncbi_submission_xml_from_nmdc_study,
56
+ ncbi_submission_xml_asset,
50
57
  )
58
+ from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id
51
59
 
52
60
 
53
61
  @graph
@@ -85,17 +93,6 @@ def hello_mongo():
85
93
  mongo_stats()
86
94
 
87
95
 
88
- @graph
89
- def update_terminus():
90
- """
91
- A pipeline definition. This example pipeline has a single solid.
92
-
93
- For more hints on writing Dagster pipelines, see our documentation overview on Pipelines:
94
- https://docs.dagster.io/overview/solids-pipelines/pipelines
95
- """
96
- update_schema()
97
-
98
-
99
96
  @graph
100
97
  def housekeeping():
101
98
  delete_operations(list_operations(filter_ops_undone_expired()))
@@ -317,3 +314,82 @@ def ingest_neon_benthic_metadata():
317
314
  )
318
315
  run_id = submit_metadata_to_db(database)
319
316
  poll_for_run_completion(run_id)
317
+
318
+
319
+ @graph
320
+ def translate_neon_api_surface_water_metadata_to_nmdc_schema_database():
321
+ mms_surface_water_data_product = get_neon_pipeline_surface_water_data_product()
322
+
323
+ mms_surface_water = neon_data_by_product(mms_surface_water_data_product)
324
+
325
+ sites_mapping_dict = site_code_mapping()
326
+
327
+ (
328
+ neon_envo_mappings_file_url,
329
+ neon_raw_data_file_mappings_file_url,
330
+ ) = get_neon_pipeline_inputs()
331
+
332
+ neon_envo_mappings_file = get_df_from_url(neon_envo_mappings_file_url)
333
+
334
+ neon_raw_data_file_mappings_file = get_df_from_url(
335
+ neon_raw_data_file_mappings_file_url
336
+ )
337
+
338
+ database = nmdc_schema_database_from_neon_surface_water_data(
339
+ mms_surface_water,
340
+ sites_mapping_dict,
341
+ neon_envo_mappings_file,
342
+ neon_raw_data_file_mappings_file,
343
+ )
344
+
345
+ database_dict = nmdc_schema_object_to_dict(database)
346
+ filename = nmdc_schema_database_export_filename_neon()
347
+
348
+ outputs = export_json_to_drs(database_dict, filename)
349
+ add_output_run_event(outputs)
350
+
351
+
352
+ @graph
353
+ def ingest_neon_surface_water_metadata():
354
+ mms_surface_water_data_product = get_neon_pipeline_surface_water_data_product()
355
+
356
+ mms_surface_water = neon_data_by_product(mms_surface_water_data_product)
357
+
358
+ sites_mapping_dict = site_code_mapping()
359
+
360
+ (
361
+ neon_envo_mappings_file_url,
362
+ neon_raw_data_file_mappings_file_url,
363
+ ) = get_neon_pipeline_inputs()
364
+
365
+ neon_envo_mappings_file = get_df_from_url(neon_envo_mappings_file_url)
366
+
367
+ neon_raw_data_file_mappings_file = get_df_from_url(
368
+ neon_raw_data_file_mappings_file_url
369
+ )
370
+
371
+ database = nmdc_schema_database_from_neon_benthic_data(
372
+ mms_surface_water,
373
+ sites_mapping_dict,
374
+ neon_envo_mappings_file,
375
+ neon_raw_data_file_mappings_file,
376
+ )
377
+ run_id = submit_metadata_to_db(database)
378
+ poll_for_run_completion(run_id)
379
+
380
+
381
+ @graph
382
+ def nmdc_study_to_ncbi_submission_export():
383
+ nmdc_study = get_ncbi_export_pipeline_study()
384
+ ncbi_submission_metadata = get_ncbi_export_pipeline_inputs()
385
+ biosamples = get_biosamples_by_study_id(nmdc_study)
386
+ omics_processing_records = get_omics_processing_from_biosamples(biosamples)
387
+ data_objects = get_data_objects_from_biosamples(biosamples)
388
+ xml_data = ncbi_submission_xml_from_nmdc_study(
389
+ nmdc_study,
390
+ ncbi_submission_metadata,
391
+ biosamples,
392
+ omics_processing_records,
393
+ data_objects,
394
+ )
395
+ ncbi_submission_xml_asset(xml_data)
nmdc_runtime/site/ops.py CHANGED
@@ -9,6 +9,7 @@ from datetime import datetime, timezone
9
9
  from io import BytesIO, StringIO
10
10
  from typing import Tuple
11
11
  from zipfile import ZipFile
12
+
12
13
  import pandas as pd
13
14
  import requests
14
15
 
@@ -29,10 +30,14 @@ from dagster import (
29
30
  String,
30
31
  op,
31
32
  Optional,
33
+ Field,
34
+ Permissive,
35
+ Bool,
32
36
  )
33
37
  from gridfs import GridFS
34
38
  from linkml_runtime.dumpers import json_dumper
35
39
  from linkml_runtime.utils.yamlutils import YAMLRoot
40
+ from nmdc_runtime.api.db.mongo import get_mongo_db
36
41
  from nmdc_runtime.api.core.idgen import generate_one_id
37
42
  from nmdc_runtime.api.core.metadata import (
38
43
  _validate_changesheet,
@@ -42,6 +47,7 @@ from nmdc_runtime.api.core.metadata import (
42
47
  )
43
48
  from nmdc_runtime.api.core.util import dotted_path_for, hash_from_str, json_clean, now
44
49
  from nmdc_runtime.api.endpoints.util import persist_content_and_get_drs_object
50
+ from nmdc_runtime.api.endpoints.find import find_study_by_id
45
51
  from nmdc_runtime.api.models.job import Job, JobOperationMetadata
46
52
  from nmdc_runtime.api.models.metadata import ChangesheetIn
47
53
  from nmdc_runtime.api.models.operation import (
@@ -55,6 +61,11 @@ from nmdc_runtime.api.models.run import (
55
61
  _add_run_complete_event,
56
62
  )
57
63
  from nmdc_runtime.api.models.util import ResultT
64
+ from nmdc_runtime.site.export.ncbi_xml import NCBISubmissionXML
65
+ from nmdc_runtime.site.export.ncbi_xml_utils import (
66
+ fetch_data_objects_from_biosamples,
67
+ fetch_omics_processing_from_biosamples,
68
+ )
58
69
  from nmdc_runtime.site.drsobjects.ingest import mongo_add_docs_result_as_dict
59
70
  from nmdc_runtime.site.resources import (
60
71
  NmdcPortalApiClient,
@@ -68,6 +79,9 @@ from nmdc_runtime.site.translation.neon_soil_translator import NeonSoilDataTrans
68
79
  from nmdc_runtime.site.translation.neon_benthic_translator import (
69
80
  NeonBenthicDataTranslator,
70
81
  )
82
+ from nmdc_runtime.site.translation.neon_surface_water_translator import (
83
+ NeonSurfaceWaterDataTranslator,
84
+ )
71
85
  from nmdc_runtime.site.translation.submission_portal_translator import (
72
86
  SubmissionPortalTranslator,
73
87
  )
@@ -83,7 +97,6 @@ from nmdc_schema import nmdc
83
97
  from pydantic import BaseModel
84
98
  from pymongo.database import Database as MongoDatabase
85
99
  from starlette import status
86
- from terminusdb_client.woqlquery import WOQLQuery as WQ
87
100
  from toolz import assoc, dissoc, get_in, valfilter, identity
88
101
 
89
102
 
@@ -108,14 +121,6 @@ def log_env(context):
108
121
  context.log.info("\n".join(out))
109
122
 
110
123
 
111
- @op(required_resource_keys={"terminus"})
112
- def list_databases(context) -> List[String]:
113
- client = context.resources.terminus.client
114
- list_ = client.list_databases()
115
- context.log.info(f"databases: {list_}")
116
- return list_
117
-
118
-
119
124
  @op(required_resource_keys={"mongo"})
120
125
  def mongo_stats(context) -> List[str]:
121
126
  db = context.resources.mongo.db
@@ -124,41 +129,6 @@ def mongo_stats(context) -> List[str]:
124
129
  return collection_names
125
130
 
126
131
 
127
- @op(required_resource_keys={"terminus"})
128
- def update_schema(context):
129
- with tempfile.TemporaryDirectory() as tmpdirname:
130
- try:
131
- context.log.info("shallow-cloning nmdc-schema repo")
132
- subprocess.check_output(
133
- "git clone https://github.com/microbiomedata/nmdc-schema.git"
134
- f" --branch main --single-branch {tmpdirname}/nmdc-schema",
135
- shell=True,
136
- )
137
- context.log.info("generating TerminusDB JSON-LD from NMDC LinkML")
138
- subprocess.check_output(
139
- f"gen-terminusdb {tmpdirname}/nmdc-schema/src/schema/nmdc.yaml"
140
- f" > {tmpdirname}/nmdc.terminus.json",
141
- shell=True,
142
- )
143
- except subprocess.CalledProcessError as e:
144
- if e.stdout:
145
- context.log.debug(e.stdout.decode())
146
- if e.stderr:
147
- context.log.error(e.stderr.decode())
148
- context.log.debug(str(e.returncode))
149
- raise e
150
-
151
- with open(f"{tmpdirname}/nmdc.terminus.json") as f:
152
- woql_dict = json.load(f)
153
-
154
- context.log.info("Updating terminus schema via WOQLQuery")
155
- rv = WQ(query=woql_dict).execute(
156
- context.resources.terminus.client, "update schema via WOQL"
157
- )
158
- context.log.info(str(rv))
159
- return rv
160
-
161
-
162
132
  @op(
163
133
  required_resource_keys={"mongo", "runtime_api_site_client"},
164
134
  retry_policy=RetryPolicy(max_retries=2),
@@ -765,6 +735,33 @@ def export_json_to_drs(
765
735
  return ["/objects/" + drs_object["id"]]
766
736
 
767
737
 
738
+ @op(
739
+ description="NCBI Submission XML file rendered in a Dagster Asset",
740
+ out=Out(description="XML content rendered through Dagit UI"),
741
+ )
742
+ def ncbi_submission_xml_asset(context: OpExecutionContext, data: str):
743
+ filename = "ncbi_submission.xml"
744
+ file_path = os.path.join(context.instance.storage_directory(), filename)
745
+
746
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
747
+
748
+ with open(file_path, "w") as f:
749
+ f.write(data)
750
+
751
+ context.log_event(
752
+ AssetMaterialization(
753
+ asset_key="ncbi_submission_xml",
754
+ description="NCBI Submission XML Data",
755
+ metadata={
756
+ "file_path": MetadataValue.path(file_path),
757
+ "xml": MetadataValue.text(data),
758
+ },
759
+ )
760
+ )
761
+
762
+ return Output(data)
763
+
764
+
768
765
  def unique_field_values(docs: List[Dict[str, Any]], field: str):
769
766
  return {doc[field] for doc in docs if field in doc}
770
767
 
@@ -784,6 +781,11 @@ def get_neon_pipeline_benthic_data_product(context: OpExecutionContext) -> dict:
784
781
  return context.op_config["benthic_data_product"]
785
782
 
786
783
 
784
+ @op(config_schema={"surface_water_data_product": dict})
785
+ def get_neon_pipeline_surface_water_data_product(context: OpExecutionContext) -> dict:
786
+ return context.op_config["surface_water_data_product"]
787
+
788
+
787
789
  @op(required_resource_keys={"neon_api_client"})
788
790
  def neon_data_by_product(
789
791
  context: OpExecutionContext, data_product: dict
@@ -862,6 +864,32 @@ def nmdc_schema_database_from_neon_benthic_data(
862
864
  return database
863
865
 
864
866
 
867
+ @op(required_resource_keys={"runtime_api_site_client"})
868
+ def nmdc_schema_database_from_neon_surface_water_data(
869
+ context: OpExecutionContext,
870
+ surface_water_data: Dict[str, pd.DataFrame],
871
+ site_code_mapping: Dict[str, str],
872
+ neon_envo_mappings_file: pd.DataFrame,
873
+ neon_raw_data_file_mappings_file: pd.DataFrame,
874
+ ) -> nmdc.Database:
875
+ client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
876
+
877
+ def id_minter(*args, **kwargs):
878
+ response = client.mint_id(*args, **kwargs)
879
+ return response.json()
880
+
881
+ translator = NeonSurfaceWaterDataTranslator(
882
+ surface_water_data,
883
+ site_code_mapping,
884
+ neon_envo_mappings_file,
885
+ neon_raw_data_file_mappings_file,
886
+ id_minter=id_minter,
887
+ )
888
+
889
+ database = translator.get_database()
890
+ return database
891
+
892
+
865
893
  @op(
866
894
  out={
867
895
  "neon_envo_mappings_file_url": Out(),
@@ -943,3 +971,85 @@ def site_code_mapping() -> dict:
943
971
  raise Exception(
944
972
  f"Failed to fetch site data from {endpoint}. Status code: {response.status_code}, Content: {response.content}"
945
973
  )
974
+
975
+
976
+ @op(config_schema={"nmdc_study_id": str}, required_resource_keys={"mongo"})
977
+ def get_ncbi_export_pipeline_study(context: OpExecutionContext) -> Any:
978
+ nmdc_study = find_study_by_id(
979
+ context.op_config["nmdc_study_id"], context.resources.mongo.db
980
+ )
981
+ return nmdc_study
982
+
983
+
984
+ @op(
985
+ config_schema={
986
+ "nmdc_ncbi_attribute_mapping_file_url": str,
987
+ "ncbi_submission_metadata": Field(
988
+ Permissive(
989
+ {
990
+ "organization": String,
991
+ }
992
+ ),
993
+ is_required=True,
994
+ description="General metadata about the NCBI submission.",
995
+ ),
996
+ "ncbi_biosample_metadata": Field(
997
+ Permissive(
998
+ {
999
+ "organism_name": String,
1000
+ }
1001
+ ),
1002
+ is_required=True,
1003
+ description="Metadata for one or many NCBI BioSample in the Submission.",
1004
+ ),
1005
+ },
1006
+ out=Out(Dict),
1007
+ )
1008
+ def get_ncbi_export_pipeline_inputs(context: OpExecutionContext) -> str:
1009
+ nmdc_ncbi_attribute_mapping_file_url = context.op_config[
1010
+ "nmdc_ncbi_attribute_mapping_file_url"
1011
+ ]
1012
+ ncbi_submission_metadata = context.op_config.get("ncbi_submission_metadata", {})
1013
+ ncbi_biosample_metadata = context.op_config.get("ncbi_biosample_metadata", {})
1014
+
1015
+ return {
1016
+ "nmdc_ncbi_attribute_mapping_file_url": nmdc_ncbi_attribute_mapping_file_url,
1017
+ "ncbi_submission_metadata": ncbi_submission_metadata,
1018
+ "ncbi_biosample_metadata": ncbi_biosample_metadata,
1019
+ }
1020
+
1021
+
1022
+ @op(required_resource_keys={"mongo"})
1023
+ def get_data_objects_from_biosamples(context: OpExecutionContext, biosamples: list):
1024
+ mdb = context.resources.mongo.db
1025
+ alldocs_collection = mdb["alldocs"]
1026
+ biosample_data_objects = fetch_data_objects_from_biosamples(
1027
+ alldocs_collection, biosamples
1028
+ )
1029
+ return biosample_data_objects
1030
+
1031
+
1032
+ @op(required_resource_keys={"mongo"})
1033
+ def get_omics_processing_from_biosamples(context: OpExecutionContext, biosamples: list):
1034
+ mdb = context.resources.mongo.db
1035
+ alldocs_collection = mdb["alldocs"]
1036
+ biosample_omics_processing = fetch_omics_processing_from_biosamples(
1037
+ alldocs_collection, biosamples
1038
+ )
1039
+ return biosample_omics_processing
1040
+
1041
+
1042
+ @op
1043
+ def ncbi_submission_xml_from_nmdc_study(
1044
+ context: OpExecutionContext,
1045
+ nmdc_study: Any,
1046
+ ncbi_exporter_metadata: dict,
1047
+ biosamples: list,
1048
+ omics_processing_records: list,
1049
+ data_objects: list,
1050
+ ) -> str:
1051
+ ncbi_exporter = NCBISubmissionXML(nmdc_study, ncbi_exporter_metadata)
1052
+ ncbi_xml = ncbi_exporter.get_submission_xml(
1053
+ biosamples, omics_processing_records, data_objects
1054
+ )
1055
+ return ncbi_xml
@@ -38,8 +38,11 @@ from nmdc_runtime.site.graphs import (
38
38
  hello_graph,
39
39
  translate_neon_api_soil_metadata_to_nmdc_schema_database,
40
40
  translate_neon_api_benthic_metadata_to_nmdc_schema_database,
41
+ translate_neon_api_surface_water_metadata_to_nmdc_schema_database,
41
42
  ingest_neon_soil_metadata,
42
43
  ingest_neon_benthic_metadata,
44
+ ingest_neon_surface_water_metadata,
45
+ nmdc_study_to_ncbi_submission_export,
43
46
  )
44
47
  from nmdc_runtime.site.resources import (
45
48
  get_mongo,
@@ -48,7 +51,6 @@ from nmdc_runtime.site.resources import (
48
51
  nmdc_portal_api_client_resource,
49
52
  gold_api_client_resource,
50
53
  neon_api_client_resource,
51
- terminus_resource,
52
54
  mongo_resource,
53
55
  )
54
56
  from nmdc_runtime.site.resources import (
@@ -66,7 +68,6 @@ resource_defs = {
66
68
  "nmdc_portal_api_client": nmdc_portal_api_client_resource,
67
69
  "gold_api_client": gold_api_client_resource,
68
70
  "neon_api_client": neon_api_client_resource,
69
- "terminus": terminus_resource,
70
71
  "mongo": mongo_resource,
71
72
  }
72
73
 
@@ -513,8 +514,8 @@ def biosample_submission_ingest():
513
514
  "nmdc_portal_api_client": {
514
515
  "config": {
515
516
  "base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
516
- "session_cookie": {
517
- "env": "NMDC_PORTAL_API_SESSION_COOKIE"
517
+ "refresh_token": {
518
+ "env": "NMDC_PORTAL_API_REFRESH_TOKEN"
518
519
  },
519
520
  }
520
521
  }
@@ -553,8 +554,8 @@ def biosample_submission_ingest():
553
554
  "nmdc_portal_api_client": {
554
555
  "config": {
555
556
  "base_url": {"env": "NMDC_PORTAL_API_BASE_URL"},
556
- "session_cookie": {
557
- "env": "NMDC_PORTAL_API_SESSION_COOKIE"
557
+ "refresh_token": {
558
+ "env": "NMDC_PORTAL_API_REFRESH_TOKEN"
558
559
  },
559
560
  }
560
561
  }
@@ -764,6 +765,140 @@ def biosample_submission_ingest():
764
765
  },
765
766
  },
766
767
  ),
768
+ translate_neon_api_surface_water_metadata_to_nmdc_schema_database.to_job(
769
+ description="This job fetches the metadata associated with a given NEON data product code and translates it into an equivalent nmdc:Database object. The object is serialized to JSON and stored in DRS. This can be considered a dry-run for the `ingest_neon_metadata` job.",
770
+ resource_defs=resource_defs,
771
+ config={
772
+ "resources": merge(
773
+ unfreeze(normal_resources),
774
+ {
775
+ "neon_api_client": {
776
+ "config": {
777
+ "base_url": {"env": "NEON_API_BASE_URL"},
778
+ "api_token": {"env": "NEON_API_TOKEN"},
779
+ },
780
+ },
781
+ "mongo": {
782
+ "config": {
783
+ "dbname": {"env": "MONGO_DBNAME"},
784
+ "host": {"env": "MONGO_HOST"},
785
+ "password": {"env": "MONGO_PASSWORD"},
786
+ "username": {"env": "MONGO_USERNAME"},
787
+ },
788
+ },
789
+ "runtime_api_site_client": {
790
+ "config": {
791
+ "base_url": {"env": "API_HOST"},
792
+ "client_id": {"env": "API_SITE_CLIENT_ID"},
793
+ "client_secret": {"env": "API_SITE_CLIENT_SECRET"},
794
+ "site_id": {"env": "API_SITE_ID"},
795
+ },
796
+ },
797
+ },
798
+ ),
799
+ "ops": {
800
+ "export_json_to_drs": {"config": {"username": "..."}},
801
+ "get_neon_pipeline_inputs": {
802
+ "inputs": {
803
+ "neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
804
+ "neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
805
+ }
806
+ },
807
+ "get_neon_pipeline_surface_water_data_product": {
808
+ "config": {
809
+ "surface_water_data_product": {
810
+ "product_id": "DP1.20281.001",
811
+ "product_tables": "mms_swMetagenomeSequencing, mms_swMetagenomeDnaExtraction, amc_fieldGenetic, amc_fieldSuperParent",
812
+ }
813
+ }
814
+ },
815
+ },
816
+ },
817
+ ),
818
+ ingest_neon_surface_water_metadata.to_job(
819
+ description="",
820
+ resource_defs=resource_defs,
821
+ config={
822
+ "resources": merge(
823
+ unfreeze(normal_resources),
824
+ {
825
+ "neon_api_client": {
826
+ "config": {
827
+ "base_url": {"env": "NEON_API_BASE_URL"},
828
+ "api_token": {"env": "NEON_API_TOKEN"},
829
+ },
830
+ }
831
+ },
832
+ ),
833
+ "ops": {
834
+ "get_neon_pipeline_surface_water_data_product": {
835
+ "config": {
836
+ "surface_water_data_product": {
837
+ "product_id": "DP1.20281.001",
838
+ "product_tables": "mms_swMetagenomeSequencing, mms_swMetagenomeDnaExtraction, amc_fieldGenetic, amc_fieldSuperParent",
839
+ }
840
+ }
841
+ },
842
+ "get_neon_pipeline_inputs": {
843
+ "inputs": {
844
+ "neon_envo_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/neon_mixs_env_triad_mappings/neon-nlcd-local-broad-mappings.tsv",
845
+ "neon_raw_data_file_mappings_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/main/assets/misc/neon_raw_data_file_mappings.tsv",
846
+ }
847
+ },
848
+ },
849
+ },
850
+ ),
851
+ ]
852
+
853
+
854
+ @repository
855
+ def biosample_export():
856
+ normal_resources = run_config_frozen__normal_env["resources"]
857
+ return [
858
+ nmdc_study_to_ncbi_submission_export.to_job(
859
+ resource_defs=resource_defs,
860
+ config={
861
+ "resources": merge(
862
+ unfreeze(normal_resources),
863
+ {
864
+ "mongo": {
865
+ "config": {
866
+ "host": {"env": "MONGO_HOST"},
867
+ "username": {"env": "MONGO_USERNAME"},
868
+ "password": {"env": "MONGO_PASSWORD"},
869
+ "dbname": {"env": "MONGO_DBNAME"},
870
+ },
871
+ },
872
+ "runtime_api_site_client": {
873
+ "config": {
874
+ "base_url": {"env": "API_HOST"},
875
+ "client_id": {"env": "API_SITE_CLIENT_ID"},
876
+ "client_secret": {"env": "API_SITE_CLIENT_SECRET"},
877
+ "site_id": {"env": "API_SITE_ID"},
878
+ },
879
+ },
880
+ },
881
+ ),
882
+ "ops": {
883
+ "get_ncbi_export_pipeline_study": {
884
+ "config": {
885
+ "nmdc_study_id": "",
886
+ }
887
+ },
888
+ "get_ncbi_export_pipeline_inputs": {
889
+ "config": {
890
+ "nmdc_ncbi_attribute_mapping_file_url": "",
891
+ "ncbi_submission_metadata": {
892
+ "organization": "",
893
+ },
894
+ "ncbi_biosample_metadata": {
895
+ "organism_name": "",
896
+ },
897
+ }
898
+ },
899
+ },
900
+ },
901
+ ),
767
902
  ]
768
903
 
769
904