nmdc-runtime 1.6.0__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

@@ -0,0 +1,206 @@
1
+ from io import BytesIO, StringIO
2
+ from nmdc_runtime.minter.config import typecodes
3
+ from lxml import etree
4
+
5
+ import csv
6
+ import requests
7
+
8
+
9
+ def _build_class_map(class_map_data):
10
+ return {
11
+ entry["name"]: entry["schema_class"].split(":")[1] for entry in class_map_data
12
+ }
13
+
14
+
15
+ def get_classname_from_typecode(doc_id):
16
+ class_map_data = typecodes()
17
+ class_map = _build_class_map(class_map_data)
18
+
19
+ typecode = doc_id.split(":")[1].split("-")[0]
20
+ return class_map.get(typecode)
21
+
22
+
23
+ def fetch_data_objects_from_biosamples(all_docs_collection, biosamples_list):
24
+ biosample_data_objects = []
25
+
26
+ for biosample in biosamples_list:
27
+ current_ids = [biosample["id"]]
28
+ collected_data_objects = []
29
+
30
+ while current_ids:
31
+ new_current_ids = []
32
+ for current_id in current_ids:
33
+ query = {"has_input": current_id}
34
+ document = all_docs_collection.find_one(query)
35
+
36
+ if not document:
37
+ continue
38
+
39
+ has_output = document.get("has_output")
40
+ if not has_output:
41
+ continue
42
+
43
+ for output_id in has_output:
44
+ if get_classname_from_typecode(output_id) == "DataObject":
45
+ data_object_doc = all_docs_collection.find_one(
46
+ {"id": output_id}
47
+ )
48
+ if data_object_doc:
49
+ collected_data_objects.append(data_object_doc)
50
+ else:
51
+ new_current_ids.append(output_id)
52
+
53
+ current_ids = new_current_ids
54
+
55
+ if collected_data_objects:
56
+ biosample_data_objects.append({biosample["id"]: collected_data_objects})
57
+
58
+ return biosample_data_objects
59
+
60
+
61
+ def fetch_omics_processing_from_biosamples(all_docs_collection, biosamples_list):
62
+ biosample_data_objects = []
63
+
64
+ for biosample in biosamples_list:
65
+ current_ids = [biosample["id"]]
66
+ collected_data_objects = []
67
+
68
+ while current_ids:
69
+ new_current_ids = []
70
+ for current_id in current_ids:
71
+ query = {"has_input": current_id}
72
+ document = all_docs_collection.find_one(query)
73
+
74
+ if not document:
75
+ continue
76
+
77
+ has_output = document.get("has_output")
78
+ if not has_output:
79
+ continue
80
+
81
+ for output_id in has_output:
82
+ if get_classname_from_typecode(output_id) == "DataObject":
83
+ omics_processing_doc = all_docs_collection.find_one(
84
+ {"id": document["id"]}
85
+ )
86
+ if omics_processing_doc:
87
+ collected_data_objects.append(omics_processing_doc)
88
+ else:
89
+ new_current_ids.append(output_id)
90
+
91
+ current_ids = new_current_ids
92
+
93
+ if collected_data_objects:
94
+ biosample_data_objects.append({biosample["id"]: collected_data_objects})
95
+
96
+ return biosample_data_objects
97
+
98
+
99
+ def handle_quantity_value(slot_value):
100
+ if "has_numeric_value" in slot_value and "has_unit" in slot_value:
101
+ return f"{slot_value['has_numeric_value']} {slot_value['has_unit']}"
102
+ elif (
103
+ "has_maximum_numeric_value" in slot_value
104
+ and "has_minimum_numeric_value" in slot_value
105
+ and "has_unit" in slot_value
106
+ ):
107
+ range_value = (
108
+ slot_value["has_maximum_numeric_value"]
109
+ - slot_value["has_minimum_numeric_value"]
110
+ )
111
+ return f"{range_value} {slot_value['has_unit']}"
112
+ elif "has_raw_value" in slot_value:
113
+ return slot_value["has_raw_value"]
114
+ return "Unknown format"
115
+
116
+
117
+ def handle_text_value(slot_value):
118
+ return slot_value.get("has_raw_value", "Unknown format")
119
+
120
+
121
+ def handle_timestamp_value(slot_value):
122
+ return slot_value.get("has_raw_value", "Unknown format")
123
+
124
+
125
+ def handle_controlled_term_value(slot_value):
126
+ if "term" in slot_value:
127
+ term = slot_value["term"]
128
+ if "name" in term and "id" in term:
129
+ return f"{term['name']} [{term['id']}]"
130
+ elif "id" in term:
131
+ return term["id"]
132
+ elif "name" in term:
133
+ return term["name"]
134
+ elif "has_raw_value" in slot_value:
135
+ return slot_value["has_raw_value"]
136
+ return "Unknown format"
137
+
138
+
139
+ def handle_controlled_identified_term_value(slot_value):
140
+ if "term" in slot_value:
141
+ term = slot_value["term"]
142
+ if "name" in term and "id" in term:
143
+ return f"{term['name']} [{term['id']}]"
144
+ elif "id" in term:
145
+ return term["id"]
146
+ elif "has_raw_value" in slot_value:
147
+ return slot_value["has_raw_value"]
148
+ return "Unknown format"
149
+
150
+
151
+ def handle_geolocation_value(slot_value):
152
+ if "latitude" in slot_value and "longitude" in slot_value:
153
+ return f"{slot_value['latitude']} {slot_value['longitude']}"
154
+ elif "has_raw_value" in slot_value:
155
+ return slot_value["has_raw_value"]
156
+ return "Unknown format"
157
+
158
+
159
+ def handle_float_value(slot_value):
160
+ return f"{slot_value:.2f}"
161
+
162
+
163
+ def handle_string_value(slot_value):
164
+ return f"{slot_value}"
165
+
166
+
167
+ def load_mappings(url):
168
+ response = requests.get(url)
169
+ response.raise_for_status()
170
+ file_content = response.text
171
+
172
+ attribute_mappings = {}
173
+ slot_range_mappings = {}
174
+ reader = csv.DictReader(StringIO(file_content), delimiter="\t")
175
+ for row in reader:
176
+ if row["ignore"].strip():
177
+ continue
178
+
179
+ json_key = row["nmdc_schema_slot"]
180
+ # attribute mappings
181
+ xml_attribute_name = row["ncbi_biosample_attribute_name"]
182
+ attribute_mappings[json_key] = (
183
+ xml_attribute_name if xml_attribute_name else json_key
184
+ )
185
+
186
+ # slot range mappings
187
+ data_type = row["nmdc_schema_slot_range"]
188
+ slot_range_mappings[json_key] = data_type if data_type else "default"
189
+
190
+ return attribute_mappings, slot_range_mappings
191
+
192
+
193
+ def validate_xml(xml, xsd_url):
194
+ response = requests.get(xsd_url)
195
+ response.raise_for_status()
196
+ xsd_content = response.text
197
+
198
+ xml_schema_doc = etree.parse(BytesIO(xsd_content.encode("utf-8")))
199
+ xml_schema = etree.XMLSchema(xml_schema_doc)
200
+
201
+ xml_doc = etree.parse(BytesIO(xml.encode("utf-8")))
202
+
203
+ if not xml_schema.validate(xml_doc):
204
+ raise ValueError(f"There were errors while validating against: {xsd_url}")
205
+
206
+ return True
@@ -5,7 +5,6 @@ Get NMDC study-associated metadata from search api
5
5
  import csv
6
6
  from io import StringIO
7
7
 
8
- import requests
9
8
  from dagster import (
10
9
  op,
11
10
  get_dagster_logger,
@@ -26,13 +25,27 @@ def get_all_docs(client, collection, filter_):
26
25
  per_page = 200
27
26
  url_base = f"/{collection}?filter={filter_}&per_page={per_page}"
28
27
  results = []
29
- rv = client.request("GET", url_base).json()
28
+ response = client.request("GET", url_base)
29
+ if response.status_code != 200:
30
+ raise Exception(
31
+ f"Runtime API request failed with status {response.status_code}."
32
+ f" Check URL: {url_base}"
33
+ )
34
+ rv = response.json()
30
35
  results.extend(rv.get("results", []))
31
36
  page, count = rv["meta"]["page"], rv["meta"]["count"]
32
37
  assert count <= 10_000
33
38
  while page * per_page < count:
34
- rv = requests.get(url_base + f"&page={page + 1}").json()
35
- results.extend(rv["results"])
39
+ page += 1
40
+ url = f"{url_base}&page={page}"
41
+ response = client.request("GET", url)
42
+ if response.status_code != 200:
43
+ raise Exception(
44
+ f"Runtime API request failed with status {response.status_code}."
45
+ f" Check URL: {url}"
46
+ )
47
+ rv = response.json()
48
+ results.extend(rv.get("results", []))
36
49
  return results
37
50
 
38
51
 
@@ -115,3 +128,10 @@ def export_study_biosamples_as_csv(context: OpExecutionContext, study_export_inf
115
128
  def export_study_biosamples_metadata():
116
129
  outputs = export_study_biosamples_as_csv(get_study_biosamples_metadata())
117
130
  add_output_run_event(outputs)
131
+
132
+
133
+ @op(required_resource_keys={"runtime_api_site_client"})
134
+ def get_biosamples_by_study_id(context: OpExecutionContext, nmdc_study: dict):
135
+ client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
136
+ biosamples = get_all_docs(client, "biosamples", f"part_of:{nmdc_study['id']}")
137
+ return biosamples
@@ -22,7 +22,6 @@ from nmdc_runtime.site.ops import (
22
22
  hello,
23
23
  mongo_stats,
24
24
  submit_metadata_to_db,
25
- update_schema,
26
25
  filter_ops_undone_expired,
27
26
  construct_jobs,
28
27
  maybe_post_jobs,
@@ -49,7 +48,15 @@ from nmdc_runtime.site.ops import (
49
48
  get_neon_pipeline_inputs,
50
49
  get_df_from_url,
51
50
  site_code_mapping,
51
+ materialize_alldocs,
52
+ get_ncbi_export_pipeline_study,
53
+ get_data_objects_from_biosamples,
54
+ get_omics_processing_from_biosamples,
55
+ get_ncbi_export_pipeline_inputs,
56
+ ncbi_submission_xml_from_nmdc_study,
57
+ ncbi_submission_xml_asset,
52
58
  )
59
+ from nmdc_runtime.site.export.study_metadata import get_biosamples_by_study_id
53
60
 
54
61
 
55
62
  @graph
@@ -88,19 +95,13 @@ def hello_mongo():
88
95
 
89
96
 
90
97
  @graph
91
- def update_terminus():
92
- """
93
- A pipeline definition. This example pipeline has a single solid.
94
-
95
- For more hints on writing Dagster pipelines, see our documentation overview on Pipelines:
96
- https://docs.dagster.io/overview/solids-pipelines/pipelines
97
- """
98
- update_schema()
98
+ def housekeeping():
99
+ delete_operations(list_operations(filter_ops_undone_expired()))
99
100
 
100
101
 
101
102
  @graph
102
- def housekeeping():
103
- delete_operations(list_operations(filter_ops_undone_expired()))
103
+ def ensure_alldocs():
104
+ materialize_alldocs()
104
105
 
105
106
 
106
107
  @graph
@@ -381,3 +382,20 @@ def ingest_neon_surface_water_metadata():
381
382
  )
382
383
  run_id = submit_metadata_to_db(database)
383
384
  poll_for_run_completion(run_id)
385
+
386
+
387
+ @graph
388
+ def nmdc_study_to_ncbi_submission_export():
389
+ nmdc_study = get_ncbi_export_pipeline_study()
390
+ ncbi_submission_metadata = get_ncbi_export_pipeline_inputs()
391
+ biosamples = get_biosamples_by_study_id(nmdc_study)
392
+ omics_processing_records = get_omics_processing_from_biosamples(biosamples)
393
+ data_objects = get_data_objects_from_biosamples(biosamples)
394
+ xml_data = ncbi_submission_xml_from_nmdc_study(
395
+ nmdc_study,
396
+ ncbi_submission_metadata,
397
+ biosamples,
398
+ omics_processing_records,
399
+ data_objects,
400
+ )
401
+ ncbi_submission_xml_asset(xml_data)
nmdc_runtime/site/ops.py CHANGED
@@ -9,9 +9,11 @@ from datetime import datetime, timezone
9
9
  from io import BytesIO, StringIO
10
10
  from typing import Tuple
11
11
  from zipfile import ZipFile
12
+
12
13
  import pandas as pd
13
14
  import requests
14
15
 
16
+
15
17
  from bson import ObjectId, json_util
16
18
  from dagster import (
17
19
  Any,
@@ -29,10 +31,14 @@ from dagster import (
29
31
  String,
30
32
  op,
31
33
  Optional,
34
+ Field,
35
+ Permissive,
36
+ Bool,
32
37
  )
33
38
  from gridfs import GridFS
34
39
  from linkml_runtime.dumpers import json_dumper
35
40
  from linkml_runtime.utils.yamlutils import YAMLRoot
41
+ from nmdc_runtime.api.db.mongo import get_mongo_db
36
42
  from nmdc_runtime.api.core.idgen import generate_one_id
37
43
  from nmdc_runtime.api.core.metadata import (
38
44
  _validate_changesheet,
@@ -42,6 +48,7 @@ from nmdc_runtime.api.core.metadata import (
42
48
  )
43
49
  from nmdc_runtime.api.core.util import dotted_path_for, hash_from_str, json_clean, now
44
50
  from nmdc_runtime.api.endpoints.util import persist_content_and_get_drs_object
51
+ from nmdc_runtime.api.endpoints.find import find_study_by_id
45
52
  from nmdc_runtime.api.models.job import Job, JobOperationMetadata
46
53
  from nmdc_runtime.api.models.metadata import ChangesheetIn
47
54
  from nmdc_runtime.api.models.operation import (
@@ -55,6 +62,11 @@ from nmdc_runtime.api.models.run import (
55
62
  _add_run_complete_event,
56
63
  )
57
64
  from nmdc_runtime.api.models.util import ResultT
65
+ from nmdc_runtime.site.export.ncbi_xml import NCBISubmissionXML
66
+ from nmdc_runtime.site.export.ncbi_xml_utils import (
67
+ fetch_data_objects_from_biosamples,
68
+ fetch_omics_processing_from_biosamples,
69
+ )
58
70
  from nmdc_runtime.site.drsobjects.ingest import mongo_add_docs_result_as_dict
59
71
  from nmdc_runtime.site.resources import (
60
72
  NmdcPortalApiClient,
@@ -81,12 +93,15 @@ from nmdc_runtime.util import (
81
93
  put_object,
82
94
  validate_json,
83
95
  specialize_activity_set_docs,
96
+ collection_name_to_class_names,
97
+ class_hierarchy_as_list,
98
+ populated_schema_collection_names_with_id_field,
84
99
  )
85
100
  from nmdc_schema import nmdc
101
+ from nmdc_schema.nmdc import Database as NMDCDatabase
86
102
  from pydantic import BaseModel
87
103
  from pymongo.database import Database as MongoDatabase
88
104
  from starlette import status
89
- from terminusdb_client.woqlquery import WOQLQuery as WQ
90
105
  from toolz import assoc, dissoc, get_in, valfilter, identity
91
106
 
92
107
 
@@ -111,14 +126,6 @@ def log_env(context):
111
126
  context.log.info("\n".join(out))
112
127
 
113
128
 
114
- @op(required_resource_keys={"terminus"})
115
- def list_databases(context) -> List[String]:
116
- client = context.resources.terminus.client
117
- list_ = client.list_databases()
118
- context.log.info(f"databases: {list_}")
119
- return list_
120
-
121
-
122
129
  @op(required_resource_keys={"mongo"})
123
130
  def mongo_stats(context) -> List[str]:
124
131
  db = context.resources.mongo.db
@@ -127,41 +134,6 @@ def mongo_stats(context) -> List[str]:
127
134
  return collection_names
128
135
 
129
136
 
130
- @op(required_resource_keys={"terminus"})
131
- def update_schema(context):
132
- with tempfile.TemporaryDirectory() as tmpdirname:
133
- try:
134
- context.log.info("shallow-cloning nmdc-schema repo")
135
- subprocess.check_output(
136
- "git clone https://github.com/microbiomedata/nmdc-schema.git"
137
- f" --branch main --single-branch {tmpdirname}/nmdc-schema",
138
- shell=True,
139
- )
140
- context.log.info("generating TerminusDB JSON-LD from NMDC LinkML")
141
- subprocess.check_output(
142
- f"gen-terminusdb {tmpdirname}/nmdc-schema/src/schema/nmdc.yaml"
143
- f" > {tmpdirname}/nmdc.terminus.json",
144
- shell=True,
145
- )
146
- except subprocess.CalledProcessError as e:
147
- if e.stdout:
148
- context.log.debug(e.stdout.decode())
149
- if e.stderr:
150
- context.log.error(e.stderr.decode())
151
- context.log.debug(str(e.returncode))
152
- raise e
153
-
154
- with open(f"{tmpdirname}/nmdc.terminus.json") as f:
155
- woql_dict = json.load(f)
156
-
157
- context.log.info("Updating terminus schema via WOQLQuery")
158
- rv = WQ(query=woql_dict).execute(
159
- context.resources.terminus.client, "update schema via WOQL"
160
- )
161
- context.log.info(str(rv))
162
- return rv
163
-
164
-
165
137
  @op(
166
138
  required_resource_keys={"mongo", "runtime_api_site_client"},
167
139
  retry_policy=RetryPolicy(max_retries=2),
@@ -768,6 +740,33 @@ def export_json_to_drs(
768
740
  return ["/objects/" + drs_object["id"]]
769
741
 
770
742
 
743
+ @op(
744
+ description="NCBI Submission XML file rendered in a Dagster Asset",
745
+ out=Out(description="XML content rendered through Dagit UI"),
746
+ )
747
+ def ncbi_submission_xml_asset(context: OpExecutionContext, data: str):
748
+ filename = "ncbi_submission.xml"
749
+ file_path = os.path.join(context.instance.storage_directory(), filename)
750
+
751
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
752
+
753
+ with open(file_path, "w") as f:
754
+ f.write(data)
755
+
756
+ context.log_event(
757
+ AssetMaterialization(
758
+ asset_key="ncbi_submission_xml",
759
+ description="NCBI Submission XML Data",
760
+ metadata={
761
+ "file_path": MetadataValue.path(file_path),
762
+ "xml": MetadataValue.text(data),
763
+ },
764
+ )
765
+ )
766
+
767
+ return Output(data)
768
+
769
+
771
770
  def unique_field_values(docs: List[Dict[str, Any]], field: str):
772
771
  return {doc[field] for doc in docs if field in doc}
773
772
 
@@ -977,3 +976,140 @@ def site_code_mapping() -> dict:
977
976
  raise Exception(
978
977
  f"Failed to fetch site data from {endpoint}. Status code: {response.status_code}, Content: {response.content}"
979
978
  )
979
+
980
+
981
+ @op(required_resource_keys={"mongo"})
982
+ def materialize_alldocs(context) -> int:
983
+ mdb = context.resources.mongo.db
984
+ collection_names = populated_schema_collection_names_with_id_field(mdb)
985
+
986
+ for name in collection_names:
987
+ assert (
988
+ len(collection_name_to_class_names[name]) == 1
989
+ ), f"{name} collection has class name of {collection_name_to_class_names[name]} and len {len(collection_name_to_class_names[name])}"
990
+
991
+ context.log.info(f"{collection_names=}")
992
+
993
+ # Drop any existing `alldocs` collection (e.g. from previous use of this op).
994
+ mdb.alldocs.drop()
995
+
996
+ # Build alldocs
997
+ context.log.info("constructing `alldocs` collection")
998
+
999
+ for collection in collection_names:
1000
+ # Calculate class_hierarchy_as_list once per collection, using the first document in list
1001
+ try:
1002
+ nmdcdb = NMDCDatabase(
1003
+ **{collection: [dissoc(mdb[collection].find_one(), "_id")]}
1004
+ )
1005
+ exemplar = getattr(nmdcdb, collection)[0]
1006
+ newdoc_type: list[str] = class_hierarchy_as_list(exemplar)
1007
+ except ValueError as e:
1008
+ context.log.info(f"Collection {collection} does not exist.")
1009
+ raise e
1010
+
1011
+ context.log.info(
1012
+ f"Found {mdb[collection].estimated_document_count()} estimated documents for {collection=}."
1013
+ )
1014
+ # For each document in this collection, replace the value of the `type` field with
1015
+ # a _list_ of the document's own class and ancestor classes, remove the `_id` field,
1016
+ # and insert the resulting document into the `alldocs` collection.
1017
+
1018
+ inserted_many_result = mdb.alldocs.insert_many(
1019
+ [
1020
+ assoc(dissoc(doc, "type", "_id"), "type", newdoc_type)
1021
+ for doc in mdb[collection].find()
1022
+ ]
1023
+ )
1024
+ context.log.info(
1025
+ f"Inserted {len(inserted_many_result.inserted_ids)} documents for {collection=}."
1026
+ )
1027
+
1028
+ # Re-idx for `alldocs` collection
1029
+ mdb.alldocs.create_index("id", unique=True)
1030
+ context.log.info(
1031
+ f"refreshed {mdb.alldocs} collection with {mdb.alldocs.estimated_document_count()} docs."
1032
+ )
1033
+ return mdb.alldocs.estimated_document_count()
1034
+
1035
+
1036
+ @op(config_schema={"nmdc_study_id": str}, required_resource_keys={"mongo"})
1037
+ def get_ncbi_export_pipeline_study(context: OpExecutionContext) -> Any:
1038
+ nmdc_study = find_study_by_id(
1039
+ context.op_config["nmdc_study_id"], context.resources.mongo.db
1040
+ )
1041
+ return nmdc_study
1042
+
1043
+
1044
+ @op(
1045
+ config_schema={
1046
+ "nmdc_ncbi_attribute_mapping_file_url": str,
1047
+ "ncbi_submission_metadata": Field(
1048
+ Permissive(
1049
+ {
1050
+ "organization": String,
1051
+ }
1052
+ ),
1053
+ is_required=True,
1054
+ description="General metadata about the NCBI submission.",
1055
+ ),
1056
+ "ncbi_biosample_metadata": Field(
1057
+ Permissive(
1058
+ {
1059
+ "organism_name": String,
1060
+ }
1061
+ ),
1062
+ is_required=True,
1063
+ description="Metadata for one or many NCBI BioSample in the Submission.",
1064
+ ),
1065
+ },
1066
+ out=Out(Dict),
1067
+ )
1068
+ def get_ncbi_export_pipeline_inputs(context: OpExecutionContext) -> str:
1069
+ nmdc_ncbi_attribute_mapping_file_url = context.op_config[
1070
+ "nmdc_ncbi_attribute_mapping_file_url"
1071
+ ]
1072
+ ncbi_submission_metadata = context.op_config.get("ncbi_submission_metadata", {})
1073
+ ncbi_biosample_metadata = context.op_config.get("ncbi_biosample_metadata", {})
1074
+
1075
+ return {
1076
+ "nmdc_ncbi_attribute_mapping_file_url": nmdc_ncbi_attribute_mapping_file_url,
1077
+ "ncbi_submission_metadata": ncbi_submission_metadata,
1078
+ "ncbi_biosample_metadata": ncbi_biosample_metadata,
1079
+ }
1080
+
1081
+
1082
+ @op(required_resource_keys={"mongo"})
1083
+ def get_data_objects_from_biosamples(context: OpExecutionContext, biosamples: list):
1084
+ mdb = context.resources.mongo.db
1085
+ alldocs_collection = mdb["alldocs"]
1086
+ biosample_data_objects = fetch_data_objects_from_biosamples(
1087
+ alldocs_collection, biosamples
1088
+ )
1089
+ return biosample_data_objects
1090
+
1091
+
1092
+ @op(required_resource_keys={"mongo"})
1093
+ def get_omics_processing_from_biosamples(context: OpExecutionContext, biosamples: list):
1094
+ mdb = context.resources.mongo.db
1095
+ alldocs_collection = mdb["alldocs"]
1096
+ biosample_omics_processing = fetch_omics_processing_from_biosamples(
1097
+ alldocs_collection, biosamples
1098
+ )
1099
+ return biosample_omics_processing
1100
+
1101
+
1102
+ @op
1103
+ def ncbi_submission_xml_from_nmdc_study(
1104
+ context: OpExecutionContext,
1105
+ nmdc_study: Any,
1106
+ ncbi_exporter_metadata: dict,
1107
+ biosamples: list,
1108
+ omics_processing_records: list,
1109
+ data_objects: list,
1110
+ ) -> str:
1111
+ ncbi_exporter = NCBISubmissionXML(nmdc_study, ncbi_exporter_metadata)
1112
+ ncbi_xml = ncbi_exporter.get_submission_xml(
1113
+ biosamples, omics_processing_records, data_objects
1114
+ )
1115
+ return ncbi_xml