nmdc-runtime 2.1.1__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nmdc-runtime might be problematic. Click here for more details.

@@ -1,4 +1,5 @@
1
1
  import os
2
+ import re
2
3
  import datetime
3
4
  import xml.etree.ElementTree as ET
4
5
  import xml.dom.minidom
@@ -6,6 +7,7 @@ import xml.dom.minidom
6
7
  from typing import Any
7
8
  from urllib.parse import urlparse
8
9
  from nmdc_runtime.site.export.ncbi_xml_utils import (
10
+ get_instruments,
9
11
  handle_controlled_identified_term_value,
10
12
  handle_controlled_term_value,
11
13
  handle_geolocation_value,
@@ -170,7 +172,39 @@ class NCBISubmissionXML:
170
172
 
171
173
  for json_key, value in biosample.items():
172
174
  if isinstance(value, list):
173
- continue # Skip processing for list values
175
+ for item in value:
176
+ if json_key not in attribute_mappings:
177
+ continue
178
+
179
+ xml_key = attribute_mappings[json_key]
180
+ value_type = slot_range_mappings.get(json_key, "string")
181
+ handler = self.type_handlers.get(
182
+ value_type, handle_string_value
183
+ )
184
+
185
+ # Special handling for "elev" key
186
+ if json_key == "elev":
187
+ value = f"{float(value)} m" # Convert to float if possible
188
+ attributes[xml_key] = value
189
+ continue # Skip applying the handler to this key
190
+
191
+ # Special handling for "host_taxid"
192
+ if json_key == "host_taxid" and isinstance(value, dict):
193
+ if "term" in value and "id" in value["term"]:
194
+ value = re.findall(
195
+ r"\d+", value["term"]["id"].split(":")[1]
196
+ )[0]
197
+ attributes[xml_key] = value
198
+ continue # Skip applying the handler to this key
199
+
200
+ formatted_value = handler(item)
201
+
202
+ # Combine multiple values with a separator for list elements
203
+ if xml_key in attributes:
204
+ attributes[xml_key] += f"| {formatted_value}"
205
+ else:
206
+ attributes[xml_key] = formatted_value
207
+ continue
174
208
 
175
209
  if json_key == "env_package":
176
210
  env_package = f"MIMS.me.{handle_text_value(value)}.6.0"
@@ -187,6 +221,20 @@ class NCBISubmissionXML:
187
221
  value_type = slot_range_mappings.get(json_key, "string")
188
222
  handler = self.type_handlers.get(value_type, handle_string_value)
189
223
 
224
+ # Special handling for "elev" key
225
+ if json_key == "elev":
226
+ value = f"{float(value)} m" # Convert to float if possible
227
+ attributes[xml_key] = value
228
+ continue # Skip applying the handler to this key
229
+
230
+ # Special handling for "host_taxid"
231
+ if json_key == "host_taxid" and isinstance(value, dict):
232
+ if "term" in value and "id" in value["term"]:
233
+ value = re.findall(r"\d+", value["term"]["id"].split(":")[1])[0]
234
+ attributes[xml_key] = value
235
+ continue # Skip applying the handler to this key
236
+
237
+ # Default processing for other keys
190
238
  formatted_value = handler(value)
191
239
  attributes[xml_key] = formatted_value
192
240
 
@@ -286,6 +334,7 @@ class NCBISubmissionXML:
286
334
  nmdc_nucleotide_sequencing: list,
287
335
  nmdc_biosamples: list,
288
336
  nmdc_library_preparation: list,
337
+ all_instruments: dict,
289
338
  ):
290
339
  bsm_id_name_dict = {
291
340
  biosample["id"]: biosample["name"] for biosample in nmdc_biosamples
@@ -296,9 +345,10 @@ class NCBISubmissionXML:
296
345
  biosample_ids = []
297
346
  nucleotide_sequencing_ids = {}
298
347
  lib_prep_protocol_names = {}
299
- instrument_name = ""
300
348
  analyte_category = ""
301
349
  library_name = ""
350
+ instrument_vendor = ""
351
+ instrument_model = ""
302
352
 
303
353
  for biosample_id, data_objects in entry.items():
304
354
  biosample_ids.append(biosample_id)
@@ -316,7 +366,11 @@ class NCBISubmissionXML:
316
366
  )
317
367
  # Currently, we are making the assumption that only one instrument
318
368
  # is used to sequence a Biosample
319
- instrument_name = ntseq.get("instrument_used", "")[0]
369
+ instrument_id = ntseq.get("instrument_used", "")[0]
370
+ instrument = all_instruments.get(instrument_id, {})
371
+ instrument_vendor = instrument.get("vendor", "")
372
+ instrument_model = instrument.get("model", "")
373
+
320
374
  analyte_category = ntseq.get("analyte_category", "")
321
375
  library_name = bsm_id_name_dict.get(biosample_id, "")
322
376
 
@@ -353,9 +407,9 @@ class NCBISubmissionXML:
353
407
  "RefId",
354
408
  children=[
355
409
  self.set_element(
356
- "SPUID",
410
+ "PrimaryId",
357
411
  bioproject_id,
358
- {"spuid_namespace": org},
412
+ {"db": "BioProject"},
359
413
  )
360
414
  ],
361
415
  )
@@ -384,11 +438,11 @@ class NCBISubmissionXML:
384
438
  )
385
439
 
386
440
  sra_attributes = []
387
- if instrument_name.lower().startswith("illumina"):
441
+ if instrument_vendor == "illumina":
388
442
  sra_attributes.append(
389
443
  self.set_element("Attribute", "ILLUMINA", {"name": "platform"})
390
444
  )
391
- if "nextseq550" in instrument_name.lower():
445
+ if instrument_model == "nextseq_550":
392
446
  sra_attributes.append(
393
447
  self.set_element(
394
448
  "Attribute", "NextSeq 550", {"name": "instrument_model"}
@@ -501,6 +555,7 @@ class NCBISubmissionXML:
501
555
  biosample_nucleotide_sequencing_list: list,
502
556
  biosample_data_objects_list: list,
503
557
  biosample_library_preparation_list: list,
558
+ instruments_dict: dict,
504
559
  ):
505
560
  data_type = None
506
561
  ncbi_project_id = None
@@ -545,6 +600,7 @@ class NCBISubmissionXML:
545
600
  nmdc_nucleotide_sequencing=biosample_nucleotide_sequencing_list,
546
601
  nmdc_biosamples=biosamples_list,
547
602
  nmdc_library_preparation=biosample_library_preparation_list,
603
+ all_instruments=instruments_dict,
548
604
  )
549
605
 
550
606
  rough_string = ET.tostring(self.root, "unicode")
@@ -20,6 +20,31 @@ def get_classname_from_typecode(doc_id):
20
20
  return class_map.get(typecode)
21
21
 
22
22
 
23
+ def get_instruments(instrument_set_collection):
24
+ # dictionary to capture a list of all instruments
25
+ # Structure of dict:
26
+ # {"instrument_id": {"vendor": "vendor_name", "model": "model_name"}}
27
+ all_instruments = {}
28
+
29
+ try:
30
+ query = {"type": "nmdc:Instrument"}
31
+ cursor = instrument_set_collection.find(query)
32
+
33
+ for document in cursor:
34
+ instrument_id = document.get("id")
35
+ vendor = document.get("vendor")
36
+ model = document.get("model")
37
+
38
+ if not instrument_id or not vendor or not model:
39
+ continue
40
+
41
+ all_instruments[instrument_id] = {"vendor": vendor, "model": model}
42
+
43
+ return all_instruments
44
+ except Exception as e:
45
+ raise RuntimeError(f"An error occurred while fetching instrument data: {e}")
46
+
47
+
23
48
  def fetch_data_objects_from_biosamples(all_docs_collection, biosamples_list):
24
49
  biosample_data_objects = []
25
50
 
@@ -53,6 +53,7 @@ from nmdc_runtime.site.ops import (
53
53
  get_data_objects_from_biosamples,
54
54
  get_nucleotide_sequencing_from_biosamples,
55
55
  get_library_preparation_from_biosamples,
56
+ get_all_instruments,
56
57
  get_ncbi_export_pipeline_inputs,
57
58
  ncbi_submission_xml_from_nmdc_study,
58
59
  ncbi_submission_xml_asset,
@@ -126,9 +127,12 @@ def apply_metadata_in():
126
127
 
127
128
  @graph
128
129
  def gold_study_to_database():
129
- (study_id, study_type, gold_nmdc_instrument_mapping_file_url) = (
130
- get_gold_study_pipeline_inputs()
131
- )
130
+ (
131
+ study_id,
132
+ study_type,
133
+ gold_nmdc_instrument_mapping_file_url,
134
+ include_field_site_info,
135
+ ) = get_gold_study_pipeline_inputs()
132
136
 
133
137
  projects = gold_projects_by_study(study_id)
134
138
  biosamples = gold_biosamples_by_study(study_id)
@@ -143,6 +147,7 @@ def gold_study_to_database():
143
147
  biosamples,
144
148
  analysis_projects,
145
149
  gold_nmdc_instrument_map_df,
150
+ include_field_site_info,
146
151
  )
147
152
  database_dict = nmdc_schema_object_to_dict(database)
148
153
  filename = nmdc_schema_database_export_filename(study)
@@ -449,6 +454,7 @@ def nmdc_study_to_ncbi_submission_export():
449
454
  )
450
455
  data_object_records = get_data_objects_from_biosamples(biosamples)
451
456
  library_preparation_records = get_library_preparation_from_biosamples(biosamples)
457
+ all_instruments = get_all_instruments()
452
458
  xml_data = ncbi_submission_xml_from_nmdc_study(
453
459
  nmdc_study,
454
460
  ncbi_submission_metadata,
@@ -456,5 +462,6 @@ def nmdc_study_to_ncbi_submission_export():
456
462
  nucleotide_sequencing_records,
457
463
  data_object_records,
458
464
  library_preparation_records,
465
+ all_instruments,
459
466
  )
460
467
  ncbi_submission_xml_asset(xml_data)
nmdc_runtime/site/ops.py CHANGED
@@ -7,6 +7,7 @@ import tempfile
7
7
  from collections import defaultdict
8
8
  from datetime import datetime, timezone
9
9
  from io import BytesIO, StringIO
10
+ from toolz.dicttoolz import keyfilter
10
11
  from typing import Tuple
11
12
  from zipfile import ZipFile
12
13
  from itertools import chain
@@ -68,6 +69,7 @@ from nmdc_runtime.site.export.ncbi_xml_utils import (
68
69
  fetch_data_objects_from_biosamples,
69
70
  fetch_nucleotide_sequencing_from_biosamples,
70
71
  fetch_library_preparation_from_biosamples,
72
+ get_instruments,
71
73
  )
72
74
  from nmdc_runtime.site.drsobjects.ingest import mongo_add_docs_result_as_dict
73
75
  from nmdc_runtime.site.resources import (
@@ -92,17 +94,20 @@ from nmdc_runtime.site.translation.submission_portal_translator import (
92
94
  from nmdc_runtime.site.util import run_and_log, schema_collection_has_index_on_id
93
95
  from nmdc_runtime.util import (
94
96
  drs_object_in_for,
97
+ get_names_of_classes_in_effective_range_of_slot,
95
98
  pluralize,
96
99
  put_object,
97
100
  validate_json,
98
101
  specialize_activity_set_docs,
99
102
  collection_name_to_class_names,
100
103
  class_hierarchy_as_list,
104
+ nmdc_schema_view,
101
105
  populated_schema_collection_names_with_id_field,
102
106
  )
103
107
  from nmdc_schema import nmdc
104
108
  from nmdc_schema.nmdc import Database as NMDCDatabase
105
109
  from pydantic import BaseModel
110
+ from pymongo import InsertOne
106
111
  from pymongo.database import Database as MongoDatabase
107
112
  from starlette import status
108
113
  from toolz import assoc, dissoc, get_in, valfilter, identity
@@ -588,18 +593,23 @@ def add_output_run_event(context: OpExecutionContext, outputs: List[str]):
588
593
  "study_id": str,
589
594
  "study_type": str,
590
595
  "gold_nmdc_instrument_mapping_file_url": str,
596
+ "include_field_site_info": bool,
591
597
  },
592
598
  out={
593
599
  "study_id": Out(str),
594
600
  "study_type": Out(str),
595
601
  "gold_nmdc_instrument_mapping_file_url": Out(str),
602
+ "include_field_site_info": Out(bool),
596
603
  },
597
604
  )
598
- def get_gold_study_pipeline_inputs(context: OpExecutionContext) -> Tuple[str, str, str]:
605
+ def get_gold_study_pipeline_inputs(
606
+ context: OpExecutionContext,
607
+ ) -> Tuple[str, str, str, bool]:
599
608
  return (
600
609
  context.op_config["study_id"],
601
610
  context.op_config["study_type"],
602
611
  context.op_config["gold_nmdc_instrument_mapping_file_url"],
612
+ context.op_config["include_field_site_info"],
603
613
  )
604
614
 
605
615
 
@@ -642,6 +652,7 @@ def nmdc_schema_database_from_gold_study(
642
652
  biosamples: List[Dict[str, Any]],
643
653
  analysis_projects: List[Dict[str, Any]],
644
654
  gold_nmdc_instrument_map_df: pd.DataFrame,
655
+ include_field_site_info: bool,
645
656
  ) -> nmdc.Database:
646
657
  client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
647
658
 
@@ -656,6 +667,7 @@ def nmdc_schema_database_from_gold_study(
656
667
  projects,
657
668
  analysis_projects,
658
669
  gold_nmdc_instrument_map_df,
670
+ include_field_site_info,
659
671
  id_minter=id_minter,
660
672
  )
661
673
  database = translator.get_database()
@@ -1029,23 +1041,51 @@ def site_code_mapping() -> dict:
1029
1041
 
1030
1042
  @op(required_resource_keys={"mongo"})
1031
1043
  def materialize_alldocs(context) -> int:
1044
+ """
1045
+ This function re-creates the alldocs collection to reflect the current state of the Mongo database.
1046
+ See nmdc-runtime/docs/nb/bulk_validation_referential_integrity_check.ipynb for more details.
1047
+ """
1032
1048
  mdb = context.resources.mongo.db
1033
- collection_names = populated_schema_collection_names_with_id_field(mdb)
1049
+ schema_view = nmdc_schema_view()
1034
1050
 
1035
- # Insert a no-op as an anchor point for this comment.
1036
- #
1037
- # Note: There used to be code here that `assert`-ed that each collection could only contain documents of a single
1038
- # type. With the legacy schema, that assertion was true. With the Berkeley schema, it is false. That code was
1039
- # in place because subsequent code (further below) used a single document in a collection as the source of the
1040
- # class ancestry information of _all_ documents in that collection; an optimization that spared us from
1041
- # having to do the same for every single document in that collection. With the Berkeley schema, we have
1042
- # eliminated that optimization (since it is inadequate; it would produce some incorrect class ancestries
1043
- # for descendants of `PlannedProcess`, for example).
1044
- #
1045
- pass
1051
+ # batch size for writing documents to alldocs
1052
+ BULK_WRITE_BATCH_SIZE = 2000
1046
1053
 
1054
+ # TODO include functional_annotation_agg for "real-time" ref integrity checking.
1055
+ # For now, production use cases for materialized `alldocs` are limited to `id`-having collections.
1056
+ collection_names = populated_schema_collection_names_with_id_field(mdb)
1047
1057
  context.log.info(f"{collection_names=}")
1048
1058
 
1059
+ # Build alldocs
1060
+ context.log.info("constructing `alldocs` collection")
1061
+
1062
+ document_class_names = set(
1063
+ chain.from_iterable(collection_name_to_class_names.values())
1064
+ )
1065
+
1066
+ cls_slot_map = {
1067
+ cls_name: {
1068
+ slot.name: slot for slot in schema_view.class_induced_slots(cls_name)
1069
+ }
1070
+ for cls_name in document_class_names
1071
+ }
1072
+
1073
+ # Any ancestor of a document class is a document-referenceable range, i.e., a valid range of a document-reference-ranged slot.
1074
+ document_referenceable_ranges = set(
1075
+ chain.from_iterable(
1076
+ schema_view.class_ancestors(cls_name) for cls_name in document_class_names
1077
+ )
1078
+ )
1079
+
1080
+ document_reference_ranged_slots = defaultdict(list)
1081
+ for cls_name, slot_map in cls_slot_map.items():
1082
+ for slot_name, slot in slot_map.items():
1083
+ if (
1084
+ set(get_names_of_classes_in_effective_range_of_slot(schema_view, slot))
1085
+ & document_referenceable_ranges
1086
+ ):
1087
+ document_reference_ranged_slots[cls_name].append(slot_name)
1088
+
1049
1089
  # Drop any existing `alldocs` collection (e.g. from previous use of this op).
1050
1090
  #
1051
1091
  # FIXME: This "nuke and pave" approach introduces a race condition.
@@ -1054,90 +1094,41 @@ def materialize_alldocs(context) -> int:
1054
1094
  #
1055
1095
  mdb.alldocs.drop()
1056
1096
 
1057
- # Build alldocs
1058
- context.log.info("constructing `alldocs` collection")
1059
-
1060
- # For each collection, group its documents by their `type` value, transform them, and load them into `alldocs`.
1061
- for collection_name in collection_names:
1097
+ for coll_name in collection_names:
1098
+ context.log.info(f"{coll_name=}")
1099
+ requests = []
1100
+ documents_processed_counter = 0
1101
+ for doc in mdb[coll_name].find():
1102
+ doc_type = doc["type"][5:] # lop off "nmdc:" prefix
1103
+ slots_to_include = ["id", "type"] + document_reference_ranged_slots[
1104
+ doc_type
1105
+ ]
1106
+ new_doc = keyfilter(lambda slot: slot in slots_to_include, doc)
1107
+ new_doc["_type_and_ancestors"] = schema_view.class_ancestors(doc_type)
1108
+ requests.append(InsertOne(new_doc))
1109
+ if len(requests) == BULK_WRITE_BATCH_SIZE:
1110
+ _ = mdb.alldocs.bulk_write(requests, ordered=False)
1111
+ requests.clear()
1112
+ documents_processed_counter += BULK_WRITE_BATCH_SIZE
1113
+ if len(requests) > 0:
1114
+ _ = mdb.alldocs.bulk_write(requests, ordered=False)
1115
+ documents_processed_counter += len(requests)
1062
1116
  context.log.info(
1063
- f"Found {mdb[collection_name].estimated_document_count()} estimated documents for {collection_name=}."
1117
+ f"Inserted {documents_processed_counter} documents from {coll_name=} "
1064
1118
  )
1065
1119
 
1066
- # Process all the distinct `type` values (i.e. value in the `type` field) of the documents in this collection.
1067
- #
1068
- # References:
1069
- # - https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.distinct
1070
- #
1071
- distinct_type_values = mdb[collection_name].distinct(key="type")
1072
- context.log.info(
1073
- f"Found {len(distinct_type_values)} distinct `type` values in {collection_name=}: {distinct_type_values=}"
1074
- )
1075
- for type_value in distinct_type_values:
1076
-
1077
- # Process all the documents in this collection that have this value in their `type` field.
1078
- #
1079
- # References:
1080
- # - https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.count_documents
1081
- # - https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.find
1082
- #
1083
- filter_ = {"type": type_value}
1084
- num_docs_having_type = mdb[collection_name].count_documents(filter=filter_)
1085
- docs_having_type = mdb[collection_name].find(filter=filter_)
1086
- context.log.info(
1087
- f"Found {num_docs_having_type} documents having {type_value=} in {collection_name=}."
1088
- )
1089
-
1090
- # Get a "representative" document from the result.
1091
- #
1092
- # Note: Since all of the documents in this batch have the same class ancestry, we will save time by
1093
- # determining the class ancestry of only _one_ of them (we call this the "representative") and then
1094
- # (later) attributing that class ancestry to all of them.
1095
- #
1096
- representative_doc = next(docs_having_type)
1097
-
1098
- # Instantiate the Python class represented by the "representative" document.
1099
- db_dict = {
1100
- # Shed the `_id` attribute, since the constructor doesn't allow it.
1101
- collection_name: [dissoc(representative_doc, "_id")]
1102
- }
1103
- nmdc_db = NMDCDatabase(**db_dict)
1104
- representative_instance = getattr(nmdc_db, collection_name)[0]
1105
-
1106
- # Get the class ancestry of that instance, as a list of class names (including its own class name).
1107
- ancestor_class_names = class_hierarchy_as_list(representative_instance)
1108
-
1109
- # Store the documents belonging to this group, in the `alldocs` collection, setting their `type` field
1110
- # to the list of class names obtained from the "representative" document above.
1111
- #
1112
- # TODO: Document why clobbering the existing contents of the `type` field is OK.
1113
- #
1114
- # Note: The reason we `chain()` our "representative" document (in an iterable) with the `docs_having_type`
1115
- # iterator here is that, when we called `next(docs_having_type)` above, we "consumed" our
1116
- # "representative" document from that iterator. We use `chain()` here so that that document gets
1117
- # inserted alongside its cousins (i.e. the documents _still_ accessible via `docs_having_type`).
1118
- # Reference: https://docs.python.org/3/library/itertools.html#itertools.chain
1119
- #
1120
- inserted_many_result = mdb.alldocs.insert_many(
1121
- [
1122
- assoc(dissoc(doc, "type", "_id"), "type", ancestor_class_names)
1123
- for doc in chain([representative_doc], docs_having_type)
1124
- ]
1125
- )
1126
- context.log.info(
1127
- f"Inserted {len(inserted_many_result.inserted_ids)} documents from {collection_name=} "
1128
- f"originally having {type_value=}."
1129
- )
1120
+ context.log.info(
1121
+ f"refreshed {mdb.alldocs} collection with {mdb.alldocs.estimated_document_count()} docs."
1122
+ )
1130
1123
 
1131
1124
  # Re-idx for `alldocs` collection
1132
1125
  mdb.alldocs.create_index("id", unique=True)
1133
1126
  # The indexes were added to improve the performance of the
1134
1127
  # /data_objects/study/{study_id} endpoint
1135
- mdb.alldocs.create_index("has_input")
1136
- mdb.alldocs.create_index("has_output")
1137
- mdb.alldocs.create_index("was_informed_by")
1138
- context.log.info(
1139
- f"refreshed {mdb.alldocs} collection with {mdb.alldocs.estimated_document_count()} docs."
1140
- )
1128
+ slots_to_index = ["has_input", "has_output", "was_informed_by"]
1129
+ [mdb.alldocs.create_index(slot) for slot in slots_to_index]
1130
+
1131
+ context.log.info(f"created indexes on id, {slots_to_index}.")
1141
1132
  return mdb.alldocs.estimated_document_count()
1142
1133
 
1143
1134
 
@@ -1221,6 +1212,14 @@ def get_library_preparation_from_biosamples(
1221
1212
  return biosample_lib_prep
1222
1213
 
1223
1214
 
1215
+ @op(required_resource_keys={"mongo"})
1216
+ def get_all_instruments(context: OpExecutionContext):
1217
+ mdb = context.resources.mongo.db
1218
+ instrument_set_collection = mdb["instrument_set"]
1219
+ all_instruments = get_instruments(instrument_set_collection)
1220
+ return all_instruments
1221
+
1222
+
1224
1223
  @op
1225
1224
  def ncbi_submission_xml_from_nmdc_study(
1226
1225
  context: OpExecutionContext,
@@ -1230,6 +1229,7 @@ def ncbi_submission_xml_from_nmdc_study(
1230
1229
  omics_processing_records: list,
1231
1230
  data_object_records: list,
1232
1231
  library_preparation_records: list,
1232
+ all_instruments: dict,
1233
1233
  ) -> str:
1234
1234
  ncbi_exporter = NCBISubmissionXML(nmdc_study, ncbi_exporter_metadata)
1235
1235
  ncbi_xml = ncbi_exporter.get_submission_xml(
@@ -1237,5 +1237,6 @@ def ncbi_submission_xml_from_nmdc_study(
1237
1237
  omics_processing_records,
1238
1238
  data_object_records,
1239
1239
  library_preparation_records,
1240
+ all_instruments,
1240
1241
  )
1241
1242
  return ncbi_xml
@@ -506,6 +506,7 @@ def biosample_submission_ingest():
506
506
  "study_id": "",
507
507
  "study_type": "research_study",
508
508
  "gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
509
+ "include_field_site_info": False,
509
510
  },
510
511
  },
511
512
  "export_json_to_drs": {"config": {"username": ""}},
@@ -331,9 +331,26 @@ class GoldApiClient(BasicAuthClient):
331
331
  """
332
332
  return id.replace("gold:", "")
333
333
 
334
- def fetch_biosamples_by_study(self, study_id: str) -> List[Dict[str, Any]]:
334
+ def fetch_biosamples_by_study(
335
+ self, study_id: str, include_project=True
336
+ ) -> List[Dict[str, Any]]:
335
337
  id = self._normalize_id(study_id)
336
338
  results = self.request("/biosamples", params={"studyGoldId": id})
339
+ if include_project:
340
+ projects = self.fetch_projects_by_study(id)
341
+ biosamples_by_id = {
342
+ biosample["biosampleGoldId"]: biosample for biosample in results
343
+ }
344
+ for project in projects:
345
+ sample_id = project.get("biosampleGoldId")
346
+ if not sample_id:
347
+ continue
348
+ if sample_id not in biosamples_by_id:
349
+ continue
350
+ biosample = biosamples_by_id[sample_id]
351
+ if "projects" not in biosample:
352
+ biosample["projects"] = []
353
+ biosample["projects"].append(project)
337
354
  return results
338
355
 
339
356
  def fetch_projects_by_study(self, study_id: str) -> List[Dict[str, Any]]:
@@ -7,6 +7,10 @@ import pandas as pd
7
7
 
8
8
  from nmdc_runtime.site.translation.translator import JSON_OBJECT, Translator
9
9
 
10
+ # Dictionary of sequencing strategies from GOLD that we are filtering on
11
+ # based on the kind of samples that are required for NMDC
12
+ SEQUENCING_STRATEGIES = {"Metagenome", "Metatranscriptome"}
13
+
10
14
 
11
15
  class GoldStudyTranslator(Translator):
12
16
  def __init__(
@@ -17,6 +21,7 @@ class GoldStudyTranslator(Translator):
17
21
  projects: List[JSON_OBJECT] = [],
18
22
  analysis_projects: List[JSON_OBJECT] = [],
19
23
  gold_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
24
+ include_field_site_info: bool = False,
20
25
  *args,
21
26
  **kwargs,
22
27
  ) -> None:
@@ -24,9 +29,39 @@ class GoldStudyTranslator(Translator):
24
29
 
25
30
  self.study = study
26
31
  self.study_type = nmdc.StudyCategoryEnum(study_type)
27
- self.biosamples = biosamples
28
- self.projects = projects
29
- self.analysis_projects = analysis_projects
32
+ self.include_field_site_info = include_field_site_info
33
+ # Filter biosamples to only those with `sequencingStrategy` of
34
+ # "Metagenome" or "Metatranscriptome"
35
+ self.biosamples = [
36
+ biosample
37
+ for biosample in biosamples
38
+ if any(
39
+ project.get("sequencingStrategy") in SEQUENCING_STRATEGIES
40
+ for project in biosample.get("projects", [])
41
+ )
42
+ ]
43
+ # Fetch the valid projectGoldIds that are associated with filtered
44
+ # biosamples on their `projects` field
45
+ valid_project_ids = {
46
+ project.get("projectGoldId")
47
+ for biosample in self.biosamples
48
+ for project in biosample.get("projects", [])
49
+ }
50
+ # Filter projects to only those with `projectGoldId` in valid_project_ids
51
+ self.projects = [
52
+ project
53
+ for project in projects
54
+ if project.get("projectGoldId") in valid_project_ids
55
+ ]
56
+ # Filter analysis_projects to only those with all `projects` in valid_project_ids
57
+ self.analysis_projects = [
58
+ analysis_project
59
+ for analysis_project in analysis_projects
60
+ if all(
61
+ project_id in valid_project_ids
62
+ for project_id in analysis_project.get("projects", [])
63
+ )
64
+ ]
30
65
  self.gold_nmdc_instrument_map_df = gold_nmdc_instrument_map_df
31
66
 
32
67
  self._projects_by_id = self._index_by_id(self.projects, "projectGoldId")
@@ -596,7 +631,11 @@ class GoldStudyTranslator(Translator):
596
631
  principal_investigator=self._get_pi(gold_project),
597
632
  processing_institution=self._get_processing_institution(gold_project),
598
633
  instrument_used=self._get_instrument(gold_project),
599
- analyte_category="metagenome",
634
+ analyte_category=(
635
+ gold_project.get("sequencingStrategy").lower()
636
+ if gold_project.get("sequencingStrategy")
637
+ else None
638
+ ),
600
639
  associated_studies=[nmdc_study_id],
601
640
  )
602
641
 
@@ -621,21 +660,24 @@ class GoldStudyTranslator(Translator):
621
660
  nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(self.biosamples))
622
661
  gold_to_nmdc_biosample_ids = dict(zip(gold_biosample_ids, nmdc_biosample_ids))
623
662
 
624
- gold_field_site_names = sorted(
625
- {self._get_field_site_name(biosample) for biosample in self.biosamples}
626
- )
627
- nmdc_field_site_ids = self._id_minter(
628
- "nmdc:FieldResearchSite", len(gold_field_site_names)
629
- )
630
- gold_name_to_nmdc_field_site_ids = dict(
631
- zip(gold_field_site_names, nmdc_field_site_ids)
632
- )
633
- gold_biosample_to_nmdc_field_site_ids = {
634
- biosample["biosampleGoldId"]: gold_name_to_nmdc_field_site_ids[
635
- self._get_field_site_name(biosample)
636
- ]
637
- for biosample in self.biosamples
638
- }
663
+ if self.include_field_site_info:
664
+ gold_field_site_names = sorted(
665
+ {self._get_field_site_name(biosample) for biosample in self.biosamples}
666
+ )
667
+ nmdc_field_site_ids = self._id_minter(
668
+ "nmdc:FieldResearchSite", len(gold_field_site_names)
669
+ )
670
+ gold_name_to_nmdc_field_site_ids = dict(
671
+ zip(gold_field_site_names, nmdc_field_site_ids)
672
+ )
673
+ gold_biosample_to_nmdc_field_site_ids = {
674
+ biosample["biosampleGoldId"]: gold_name_to_nmdc_field_site_ids[
675
+ self._get_field_site_name(biosample)
676
+ ]
677
+ for biosample in self.biosamples
678
+ }
679
+ else:
680
+ gold_biosample_to_nmdc_field_site_ids = {}
639
681
 
640
682
  gold_project_ids = [project["projectGoldId"] for project in self.projects]
641
683
  nmdc_nucleotide_sequencing_ids = self._id_minter(
@@ -653,16 +695,17 @@ class GoldStudyTranslator(Translator):
653
695
  biosample["biosampleGoldId"]
654
696
  ],
655
697
  nmdc_study_id=nmdc_study_id,
656
- nmdc_field_site_id=gold_biosample_to_nmdc_field_site_ids[
657
- biosample["biosampleGoldId"]
658
- ],
698
+ nmdc_field_site_id=gold_biosample_to_nmdc_field_site_ids.get(
699
+ biosample["biosampleGoldId"], None
700
+ ),
659
701
  )
660
702
  for biosample in self.biosamples
661
703
  ]
662
- database.field_research_site_set = [
663
- nmdc.FieldResearchSite(id=id, name=name, type="nmdc:FieldResearchSite")
664
- for name, id in gold_name_to_nmdc_field_site_ids.items()
665
- ]
704
+ if self.include_field_site_info:
705
+ database.field_research_site_set = [
706
+ nmdc.FieldResearchSite(id=id, name=name, type="nmdc:FieldResearchSite")
707
+ for name, id in gold_name_to_nmdc_field_site_ids.items()
708
+ ]
666
709
  database.data_generation_set = [
667
710
  self._translate_nucleotide_sequencing(
668
711
  project,
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import re
3
3
  from datetime import datetime
4
+ from enum import Enum
4
5
  from functools import lru_cache
5
6
  from importlib import resources
6
7
  from typing import Any, List, Optional, Union
@@ -8,14 +9,36 @@ from typing import Any, List, Optional, Union
8
9
  from linkml_runtime import SchemaView
9
10
  from linkml_runtime.linkml_model import SlotDefinition
10
11
  from nmdc_schema import nmdc
11
- from toolz import get_in, groupby, concat, valmap, dissoc
12
+ from toolz import concat, dissoc, get_in, groupby, valmap
12
13
 
13
14
  from nmdc_runtime.site.translation.translator import JSON_OBJECT, Translator
14
15
 
15
-
16
16
  BIOSAMPLE_UNIQUE_KEY_SLOT = "samp_name"
17
17
 
18
18
 
19
+ class EnvironmentPackage(Enum):
20
+ r"""
21
+ Enumeration of all possible environmental packages.
22
+
23
+ >>> EnvironmentPackage.AIR.value
24
+ 'air'
25
+ >>> EnvironmentPackage.SEDIMENT.value
26
+ 'sediment'
27
+ """
28
+
29
+ AIR = "air"
30
+ BIOFILM = "microbial mat_biofilm"
31
+ BUILT_ENV = "built environment"
32
+ HCR_CORES = "hydrocarbon resources-cores"
33
+ HRC_FLUID_SWABS = "hydrocarbon resources-fluids_swabs"
34
+ HOST_ASSOCIATED = "host-associated"
35
+ MISC_ENVS = "miscellaneous natural or artificial environment"
36
+ PLANT_ASSOCIATED = "plant-associated"
37
+ SEDIMENT = "sediment"
38
+ SOIL = "soil"
39
+ WATER = "water"
40
+
41
+
19
42
  @lru_cache
20
43
  def _get_schema_view():
21
44
  """Return a SchemaView instance representing the NMDC schema"""
@@ -550,7 +573,6 @@ class SubmissionPortalTranslator(Translator):
550
573
  sample_data: List[JSON_OBJECT],
551
574
  nmdc_biosample_id: str,
552
575
  nmdc_study_id: str,
553
- default_env_package: str,
554
576
  ) -> nmdc.Biosample:
555
577
  """Translate sample data from portal submission into an `nmdc:Biosample` object.
556
578
 
@@ -565,18 +587,23 @@ class SubmissionPortalTranslator(Translator):
565
587
  from each applicable submission portal tab
566
588
  :param nmdc_biosample_id: Minted nmdc:Biosample identifier for the translated object
567
589
  :param nmdc_study_id: Minted nmdc:Study identifier for the related Study
568
- :param default_env_package: Default value for `env_package` slot
569
590
  :return: nmdc:Biosample
570
591
  """
571
- biosample_key = sample_data[0].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip()
592
+ env_idx = next(
593
+ (
594
+ i
595
+ for i, tab in enumerate(sample_data)
596
+ if tab.get("env_package") is not None
597
+ ),
598
+ 0,
599
+ )
600
+ biosample_key = sample_data[env_idx].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip()
572
601
  slots = {
573
602
  "id": nmdc_biosample_id,
574
603
  "associated_studies": [nmdc_study_id],
575
604
  "type": "nmdc:Biosample",
576
- "name": sample_data[0].get("samp_name", "").strip(),
577
- "env_package": nmdc.TextValue(
578
- has_raw_value=default_env_package, type="nmdc:TextValue"
579
- ),
605
+ "name": sample_data[env_idx].get("samp_name", "").strip(),
606
+ "env_package": sample_data[env_idx].get("env_package"),
580
607
  }
581
608
  for tab in sample_data:
582
609
  transformed_tab = self._transform_dict_for_class(tab, "Biosample")
@@ -613,9 +640,18 @@ class SubmissionPortalTranslator(Translator):
613
640
  ]
614
641
 
615
642
  sample_data = metadata_submission_data.get("sampleData", {})
616
- package_name = metadata_submission_data["packageName"]
643
+ for key in sample_data.keys():
644
+ env = key.removesuffix("_data").upper()
645
+ try:
646
+ package_name = EnvironmentPackage[env].value
647
+ for sample in sample_data[key]:
648
+ sample["env_package"] = package_name
649
+ except KeyError:
650
+ pass
651
+
617
652
  sample_data_by_id = groupby(
618
- BIOSAMPLE_UNIQUE_KEY_SLOT, concat(sample_data.values())
653
+ BIOSAMPLE_UNIQUE_KEY_SLOT,
654
+ concat(sample_data.values()),
619
655
  )
620
656
  nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(sample_data_by_id))
621
657
  sample_data_to_nmdc_biosample_ids = dict(
@@ -627,7 +663,6 @@ class SubmissionPortalTranslator(Translator):
627
663
  sample_data,
628
664
  nmdc_biosample_id=sample_data_to_nmdc_biosample_ids[sample_data_id],
629
665
  nmdc_study_id=nmdc_study_id,
630
- default_env_package=package_name,
631
666
  )
632
667
  for sample_data_id, sample_data in sample_data_by_id.items()
633
668
  if sample_data
nmdc_runtime/util.py CHANGED
@@ -17,6 +17,8 @@ import fastjsonschema
17
17
  import requests
18
18
  from frozendict import frozendict
19
19
  from jsonschema.validators import Draft7Validator
20
+ from linkml_runtime import linkml_model
21
+ from linkml_runtime.utils.schemaview import SchemaView
20
22
  from nmdc_schema.nmdc import Database as NMDCDatabase
21
23
  from nmdc_schema.get_nmdc_view import ViewGetter
22
24
  from pydantic import Field, BaseModel
@@ -29,6 +31,48 @@ from nmdc_runtime.api.models.object import DrsObjectIn
29
31
  from typing_extensions import Annotated
30
32
 
31
33
 
34
+ def get_names_of_classes_in_effective_range_of_slot(
35
+ schema_view: SchemaView, slot_definition: linkml_model.SlotDefinition
36
+ ) -> List[str]:
37
+ r"""
38
+ Determine the slot's "effective" range, by taking into account its `any_of` constraints (if defined).
39
+
40
+ Note: The `any_of` constraints constrain the slot's "effective" range beyond that described by the
41
+ induced slot definition's `range` attribute. `SchemaView` does not seem to provide the result
42
+ of applying those additional constraints, so we do it manually here (if any are defined).
43
+ Reference: https://github.com/orgs/linkml/discussions/2101#discussion-6625646
44
+
45
+ Reference: https://linkml.io/linkml-model/latest/docs/any_of/
46
+ """
47
+
48
+ # Initialize the list to be empty.
49
+ names_of_eligible_target_classes = []
50
+
51
+ # If the `any_of` constraint is defined on this slot, use that instead of the `range`.
52
+ if "any_of" in slot_definition and len(slot_definition.any_of) > 0:
53
+ for slot_expression in slot_definition.any_of:
54
+ # Use the slot expression's `range` to get the specified eligible class name
55
+ # and the names of all classes that inherit from that eligible class.
56
+ if slot_expression.range in schema_view.all_classes():
57
+ own_and_descendant_class_names = schema_view.class_descendants(
58
+ slot_expression.range
59
+ )
60
+ names_of_eligible_target_classes.extend(own_and_descendant_class_names)
61
+ else:
62
+ # Use the slot's `range` to get the specified eligible class name
63
+ # and the names of all classes that inherit from that eligible class.
64
+ if slot_definition.range in schema_view.all_classes():
65
+ own_and_descendant_class_names = schema_view.class_descendants(
66
+ slot_definition.range
67
+ )
68
+ names_of_eligible_target_classes.extend(own_and_descendant_class_names)
69
+
70
+ # Remove duplicate class names.
71
+ names_of_eligible_target_classes = list(set(names_of_eligible_target_classes))
72
+
73
+ return names_of_eligible_target_classes
74
+
75
+
32
76
  def get_class_names_from_collection_spec(
33
77
  spec: dict, prefix: Optional[str] = None
34
78
  ) -> List[str]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nmdc_runtime
3
- Version: 2.1.1
3
+ Version: 2.2.0
4
4
  Summary: A runtime system for NMDC data management and orchestration
5
5
  Home-page: https://github.com/microbiomedata/nmdc-runtime
6
6
  Author: Donny Winston
@@ -145,8 +145,6 @@ http://127.0.0.1:8000/redoc/.
145
145
 
146
146
  Tests can be found in `tests` and are run with the following commands:
147
147
 
148
- On an M1 Mac? May need to `export DOCKER_DEFAULT_PLATFORM=linux/amd64`.
149
-
150
148
  ```bash
151
149
  make up-test
152
150
  make test
@@ -2,7 +2,7 @@ nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  nmdc_runtime/config.py,sha256=qyV_To6t--DQUpYJ3SrE6sZlxuVXLPmx2dVtZV-3l-c,33
3
3
  nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
4
4
  nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- nmdc_runtime/util.py,sha256=Wd2GuuskyUqf1eV5mHLZws8BHAOsqnc0Qj7_4WhSvAM,20736
5
+ nmdc_runtime/util.py,sha256=aMzS8eATEjpXOiuyAFYthx92fb_cgIzWWd5ZQU6ZlAY,22931
6
6
  nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
@@ -36,10 +36,10 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
36
36
  nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
38
38
  nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- nmdc_runtime/site/graphs.py,sha256=ZHglSPwVHfXzdgR2CGvmbzLLbmsijloU58XvIe9Thjs,13996
40
- nmdc_runtime/site/ops.py,sha256=6P3kn4BygY8LKD_OpfKX2U0AYQKDlB2jw12Yn-hEmD0,44651
41
- nmdc_runtime/site/repository.py,sha256=rDtwUjozhyOxlkuF9HvaheOQDQWkgZYqVtsB50BcUp4,39121
42
- nmdc_runtime/site/resources.py,sha256=ZSH1yvA-li0R7Abc22_v0XLbjBYf5igETr2G01J3hnc,17557
39
+ nmdc_runtime/site/graphs.py,sha256=mu4bE8799TItWXaPBfOeFB2XMyYwPZcj-VJQmadN2MA,14171
40
+ nmdc_runtime/site/ops.py,sha256=T9_WrwDaySGnu6olwOHQizHQfeofMOaqMcq_vYEIzO0,43140
41
+ nmdc_runtime/site/repository.py,sha256=JtHlp6l3UVo0QhV670TGns9bMfht7NOQrNWQtvsYr2g,39183
42
+ nmdc_runtime/site/resources.py,sha256=6bmvplgql3KdEXKI49BibSk0Sug96SFJi8eOs2zeKK0,18252
43
43
  nmdc_runtime/site/util.py,sha256=zAY0oIY7GRf63ecqWelmS27N7PVrAXVwEhtnpescBSw,1415
44
44
  nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
@@ -51,21 +51,21 @@ nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
51
51
  nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
52
52
  nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
53
53
  nmdc_runtime/site/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- nmdc_runtime/site/export/ncbi_xml.py,sha256=bfGnvFO7jQmlNAdzXpQiNBw7DGvWQ3pTPfgbhczb_kM,22561
55
- nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=71LSFIYioF61xalKgsQ_Po63322dNbfQfYzqo2hZ720,7575
54
+ nmdc_runtime/site/export/ncbi_xml.py,sha256=Vb4rNP3uhnGlHqrwUGgA2DzpOotCf3S8G4sIJml7gl4,25287
55
+ nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=Jd-d8GGkB3e71TPpl_lPukQ54TioQZynO1yPSLX_aHs,8390
56
56
  nmdc_runtime/site/export/study_metadata.py,sha256=yR5pXL6JG8d7cAtqcF-60Hp7bLD3dJ0Rut4AtYc0tXA,4844
57
57
  nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
58
  nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
59
59
  nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
60
  nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
61
61
  nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
62
- nmdc_runtime/site/translation/gold_translator.py,sha256=wkl1WwJ45EFwz73l_-t0D9Y3SilctDC1obTieY0eqxM,29600
62
+ nmdc_runtime/site/translation/gold_translator.py,sha256=RfAB68dJ9hDep20wETmCNBc0gugZbEKqVimT8h2t0uM,31470
63
63
  nmdc_runtime/site/translation/jgi.py,sha256=qk878KhIw674TkrVfbl2x1QJrKi3zlvE0vesIpe9slM,876
64
64
  nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=QIDqYLuf-NlGY9_88gy_5qTswkei3OfgJ5AOFpEXzJo,23985
65
65
  nmdc_runtime/site/translation/neon_soil_translator.py,sha256=Rol0g67nVBGSBySUzpfdW4Fwes7bKtvnlv2g5cB0aTI,38550
66
66
  nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=MQgjIfWPgoRe-bhzyfqHSe2mZwFsjcwjdT8tNqpIhlc,27729
67
67
  nmdc_runtime/site/translation/neon_utils.py,sha256=d00o7duKKugpLHmsEifNbp4WjeC4GOqcgw0b5qlCg4I,5549
68
- nmdc_runtime/site/translation/submission_portal_translator.py,sha256=FVBqCvk6NAJIA22IhtFOTyvAQIiFN3KsznHc5zmOG40,29676
68
+ nmdc_runtime/site/translation/submission_portal_translator.py,sha256=9KhFn2jlRlGEAhsZWRPkpmInpxuVmnbCyR6jhlD7ooA,30587
69
69
  nmdc_runtime/site/translation/translator.py,sha256=V6Aq0y03LoQ4LTL2iHDHxGTh_eMjOmDJJSwNHSrp2wo,837
70
70
  nmdc_runtime/site/translation/util.py,sha256=w_l3SiExGsl6cXRqto0a_ssDmHkP64ITvrOVfPxmNpY,4366
71
71
  nmdc_runtime/site/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -73,9 +73,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
73
73
  nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
74
74
  nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
75
75
  nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
76
- nmdc_runtime-2.1.1.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
77
- nmdc_runtime-2.1.1.dist-info/METADATA,sha256=jiwY6Bhzhc5sNIqqF-ib1ouWTezhdWBlmW-yD-qR1IA,7329
78
- nmdc_runtime-2.1.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
79
- nmdc_runtime-2.1.1.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
80
- nmdc_runtime-2.1.1.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
81
- nmdc_runtime-2.1.1.dist-info/RECORD,,
76
+ nmdc_runtime-2.2.0.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
77
+ nmdc_runtime-2.2.0.dist-info/METADATA,sha256=igSdpzN5dxlLV9r_O8btdkVPMTvLDzkn032LUdb-3hY,7256
78
+ nmdc_runtime-2.2.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
79
+ nmdc_runtime-2.2.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
80
+ nmdc_runtime-2.2.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
81
+ nmdc_runtime-2.2.0.dist-info/RECORD,,