PyPI - nmdc-runtime - Versions diffs - 2.8.0__py3-none-any.whl → 2.9.0__py3-none-any.whl - Mend

nmdc-runtime 2.8.0py3-none-any.whl → 2.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (15) hide show

nmdc_runtime/config.py CHANGED Viewed

@@ -1,5 +1,57 @@
-DATABASE_CLASS_NAME = "Database"
+"""
+This module acts as a unified interface between the codebase and the environment.
+We will eventually move all of the Runtime's environment variables reads into this
+module, instead of leaving them sprinkled throughout the codebase.
+TODO: Move all environment variable reads into this module and update references accordingly.
+"""
+from typing import Set
+import os
+def is_env_var_true(name: str, default: str = "false") -> bool:
+    r"""
+    Checks whether the value of the specified environment variable
+    meets our criteria for true-ness.
+    Reference: https://docs.python.org/3/library/os.html#os.environ
+    Run doctests via: $ python -m doctest nmdc_runtime/config.py
+    >>> import os
+    >>> name = "EXAMPLE_ENV_VAR"
+    >>> os.unsetenv(name)  # Undefined
+    >>> is_env_var_true(name)
+    False
+    >>> is_env_var_true(name, "true")  # Undefined, overridden default
+    True
+    >>> os.environ[name] = "false"  # Defined as false
+    >>> is_env_var_true(name)
+    False
+    >>> os.environ[name] = "true"  # Defined as true
+    >>> is_env_var_true(name)
+    True
+    >>> os.environ[name] = "TRUE"  # Case-insensitive
+    >>> is_env_var_true(name)
+    True
+    >>> os.environ[name] = "potato"  # Non-boolean string
+    >>> is_env_var_true(name)
+    False
+    """
+    lowercase_true_strings: Set[str] = {"true"}
+    return os.environ.get(name, default).lower() in lowercase_true_strings
+# The name of the schema class representing the database. We don't bother to
+# make this customizable via the environment, as we expect it to never change.
+DATABASE_CLASS_NAME: str = "Database"
 # Feature flag that can be used to enable/disable the `/nmdcschema/related_ids`
 # endpoint and the tests that target it.
-IS_RELATED_IDS_ENDPOINT_ENABLED = False
+IS_RELATED_IDS_ENDPOINT_ENABLED: bool = is_env_var_true(
+    "IS_RELATED_IDS_ENDPOINT_ENABLED", default="true"
+)
+# Feature flag that can be used to enable/disable the `/scalar` endpoint.
+IS_SCALAR_ENABLED: bool = is_env_var_true("IS_SCALAR_ENABLED", default="true")

nmdc_runtime/site/graphs.py CHANGED Viewed

@@ -160,6 +160,7 @@ def gold_study_to_database():
         study_type,
         gold_nmdc_instrument_mapping_file_url,
         include_field_site_info,
+        enable_biosample_filtering,
     ) = get_gold_study_pipeline_inputs()
     projects = gold_projects_by_study(study_id)
@@ -176,6 +177,7 @@ def gold_study_to_database():
         analysis_projects,
         gold_nmdc_instrument_map_df,
         include_field_site_info,
+        enable_biosample_filtering,
     )
     database_dict = nmdc_schema_object_to_dict(database)
     filename = nmdc_schema_database_export_filename(study)
@@ -506,11 +508,19 @@ def nmdc_study_to_ncbi_submission_export():
 @graph
 def generate_data_generation_set_for_biosamples_in_nmdc_study():
-    (study_id, gold_nmdc_instrument_mapping_file_url) = get_database_updater_inputs()
+    (
+        study_id,
+        gold_nmdc_instrument_mapping_file_url,
+        include_field_site_info,
+        enable_biosample_filtering,
+    ) = get_database_updater_inputs()
     gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
     database = generate_data_generation_set_post_biosample_ingest(
-        study_id, gold_nmdc_instrument_map_df
+        study_id,
+        gold_nmdc_instrument_map_df,
+        include_field_site_info,
+        enable_biosample_filtering,
     )
     database_dict = nmdc_schema_object_to_dict(database)
@@ -523,11 +533,19 @@ def generate_data_generation_set_for_biosamples_in_nmdc_study():
 @graph
 def generate_biosample_set_from_samples_in_gold():
-    (study_id, gold_nmdc_instrument_mapping_file_url) = get_database_updater_inputs()
+    (
+        study_id,
+        gold_nmdc_instrument_mapping_file_url,
+        include_field_site_info,
+        enable_biosample_filtering,
+    ) = get_database_updater_inputs()
     gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
     database = generate_biosample_set_for_nmdc_study_from_gold(
-        study_id, gold_nmdc_instrument_map_df
+        study_id,
+        gold_nmdc_instrument_map_df,
+        include_field_site_info,
+        enable_biosample_filtering,
     )
     database_dict = nmdc_schema_object_to_dict(database)
     filename = post_submission_portal_biosample_ingest_record_stitching_filename(
@@ -545,10 +563,18 @@ def generate_update_script_for_insdc_biosample_identifiers():
     to generate a script for updating biosample records with INSDC identifiers obtained from GOLD.
     The script is returned as a dictionary that can be executed against MongoDB.
     """
-    (study_id, gold_nmdc_instrument_mapping_file_url) = get_database_updater_inputs()
+    (
+        study_id,
+        gold_nmdc_instrument_mapping_file_url,
+        include_field_site_info,
+        enable_biosample_filtering,
+    ) = get_database_updater_inputs()
     gold_nmdc_instrument_map_df = get_df_from_url(gold_nmdc_instrument_mapping_file_url)
     update_script = run_script_to_update_insdc_biosample_identifiers(
-        study_id, gold_nmdc_instrument_map_df
+        study_id,
+        gold_nmdc_instrument_map_df,
+        include_field_site_info,
+        enable_biosample_filtering,
     )
     render_text(update_script)

nmdc_runtime/site/ops.py CHANGED Viewed

@@ -10,7 +10,7 @@ from datetime import datetime, timezone
 from io import BytesIO, StringIO
 from pprint import pformat
 from toolz.dicttoolz import keyfilter
-from typing import Tuple, Set
+from typing import Tuple, Set, Union
 from zipfile import ZipFile
 from itertools import chain
 from ontology_loader.ontology_load_controller import OntologyLoaderController
@@ -44,7 +44,7 @@ from dagster import (
 from gridfs import GridFS
 from linkml_runtime.utils.dictutils import as_simple_dict
 from linkml_runtime.utils.yamlutils import YAMLRoot
-from nmdc_runtime.api.db.mongo import get_mongo_db
+from nmdc_runtime.api.db.mongo import get_mongo_db, validate_json
 from nmdc_runtime.api.core.idgen import generate_one_id
 from nmdc_runtime.api.core.metadata import (
     _validate_changesheet,
@@ -106,7 +106,6 @@ from nmdc_runtime.util import (
     get_names_of_classes_in_effective_range_of_slot,
     pluralize,
     put_object,
-    validate_json,
     specialize_activity_set_docs,
     collection_name_to_class_names,
     class_hierarchy_as_list,
@@ -481,83 +480,6 @@ def get_json_in(context):
     return rv.json()
-def ensure_data_object_type(docs: Dict[str, list], mdb: MongoDatabase):
-    """
-    Does not ensure ordering of `docs`.
-    TODO: Document this function. What _does_ it do (or what was it designed to do)?
-          What, conceptually, did the author design it to receive (as `docs`); a dict
-          having a `data_object_set` item whose value is a list of documents.
-          What, conceptually, did the author design it to return?
-    """
-    if ("data_object_set" not in docs) or len(docs["data_object_set"]) == 0:
-        return docs, 0
-    do_docs = docs["data_object_set"]
-    class FileTypeEnumBase(BaseModel):
-        name: str
-        description: str
-        filter: str  # JSON-encoded data_object_set mongo collection filter document
-    class FileTypeEnum(FileTypeEnumBase):
-        id: str
-    # Make a temporary collection (which will be dropped below) and insert the
-    # specified `data_object_set` documents into it.
-    temp_collection_name = f"tmp.data_object_set.{ObjectId()}"
-    temp_collection = mdb[temp_collection_name]
-    temp_collection.insert_many(do_docs)
-    temp_collection.create_index("id")
-    def fte_matches(fte_filter: str) -> List[dict]:
-        r"""
-        Returns a list of documents—without their `_id` field—that match the specified filter,
-        which is encoded as a JSON string.
-        """
-        return [
-            dissoc(d, "_id") for d in mdb.temp_collection.find(json.loads(fte_filter))
-        ]
-    # Create a mapping from each document's `id` to the document, itself.
-    do_docs_map = {d["id"]: d for d in do_docs}
-    n_docs_with_types_added = 0
-    # For each `file_type_enum` document in the database, find all the documents (among the
-    # `data_object_set` documents provided by the caller) that match that `file_type_enum`
-    # document's filter.
-    #
-    # If any of those documents lacks a `data_object_type` field, update the original
-    # `data_object_set` document so that its `data_object_type` field is set to
-    # the `file_type_enum` document's `id` (why not its `name`?).
-    #
-    # TODO: I don't know why this sets `data_object_type` to `file_type_enum.id`,
-    #       as opposed to `file_type_enum.name`.
-    #
-    for fte_doc in mdb.file_type_enum.find():
-        fte = FileTypeEnum(**fte_doc)
-        docs_matching = fte_matches(fte.filter)
-        for doc in docs_matching:
-            if "data_object_type" not in doc:
-                do_docs_map[doc["id"]] = assoc(doc, "data_object_type", fte.id)
-                n_docs_with_types_added += 1
-    mdb.drop_collection(temp_collection_name)
-    # Returns a tuple. The first item is the original `docs` dictionary, but with the
-    # `data_object_set` list replaced by the list of the documents that are in the
-    # `do_docs_map` dictionary (with their `_id` fields omitted). The second item is
-    # the number of documents to which this function added a `data_object_type` field.
-    return (
-        assoc(
-            docs, "data_object_set", [dissoc(v, "_id") for v in do_docs_map.values()]
-        ),
-        n_docs_with_types_added,
-    )
 @op(required_resource_keys={"runtime_api_site_client", "mongo"})
 def perform_mongo_updates(context, json_in):
     mongo = context.resources.mongo
@@ -566,8 +488,6 @@ def perform_mongo_updates(context, json_in):
     docs = json_in
     docs, _ = specialize_activity_set_docs(docs)
-    docs, n_docs_with_types_added = ensure_data_object_type(docs, mongo.db)
-    context.log.info(f"added `data_object_type` to {n_docs_with_types_added} docs")
     context.log.debug(f"{docs}")
     rv = validate_json(
@@ -636,22 +556,25 @@ def add_output_run_event(context: OpExecutionContext, outputs: List[str]):
         "study_type": str,
         "gold_nmdc_instrument_mapping_file_url": str,
         "include_field_site_info": bool,
+        "enable_biosample_filtering": bool,
     },
     out={
         "study_id": Out(str),
         "study_type": Out(str),
         "gold_nmdc_instrument_mapping_file_url": Out(str),
         "include_field_site_info": Out(bool),
+        "enable_biosample_filtering": Out(bool),
     },
 )
 def get_gold_study_pipeline_inputs(
     context: OpExecutionContext,
-) -> Tuple[str, str, str, bool]:
+) -> Tuple[str, str, str, bool, bool]:
     return (
         context.op_config["study_id"],
         context.op_config["study_type"],
         context.op_config["gold_nmdc_instrument_mapping_file_url"],
         context.op_config["include_field_site_info"],
+        context.op_config["enable_biosample_filtering"],
     )
@@ -695,6 +618,7 @@ def nmdc_schema_database_from_gold_study(
     analysis_projects: List[Dict[str, Any]],
     gold_nmdc_instrument_map_df: pd.DataFrame,
     include_field_site_info: bool,
+    enable_biosample_filtering: bool,
 ) -> nmdc.Database:
     client: RuntimeApiSiteClient = context.resources.runtime_api_site_client
@@ -710,6 +634,7 @@ def nmdc_schema_database_from_gold_study(
         analysis_projects,
         gold_nmdc_instrument_map_df,
         include_field_site_info,
+        enable_biosample_filtering,
         id_minter=id_minter,
     )
     database = translator.get_database()
@@ -1110,6 +1035,8 @@ def load_ontology(context: OpExecutionContext):
         source_ontology=source_ontology,
         output_directory=output_directory,
         generate_reports=generate_reports,
+        mongo_client=context.resources.mongo.client,
+        db_name=context.resources.mongo.db.name,
     )
     loader.run_ontology_loader()
@@ -1192,8 +1119,8 @@ def _add_related_ids_to_alldocs(
         "has_mass_spectrometry_configuration",  # a `nmdc:PlannedProcess` was influenced by its `nmdc:Configuration`.
         "instrument_used",  # a `nmdc:PlannedProcess` was influenced by a used `nmdc:Instrument`.
         "uses_calibration",  # a `nmdc:PlannedProcess` was influenced by `nmdc:CalibrationInformation`.
-        "was_generated_by",  # prov:wasGeneratedBy rdfs:subPropertyOf prov:wasInfluencedBy .
-        "was_informed_by",  # prov:wasInformedBy rdfs:subPropertyOf prov:wasInfluencedBy .
+        "was_generated_by",  # prov:wasGeneratedBy rdfs:subPropertyOf prov:wasInfluencedBy.
+        "was_informed_by",  # prov:wasInformedBy rdfs:subPropertyOf prov:wasInfluencedBy.
     ]
     # An "outbound" slot is one for which an entity in the domain "influences"
     # (i.e., [owl:inverseOf prov:wasInfluencedBy]) an entity in the range.
@@ -1572,16 +1499,24 @@ def post_submission_portal_biosample_ingest_record_stitching_filename(
     config_schema={
         "nmdc_study_id": str,
         "gold_nmdc_instrument_mapping_file_url": str,
+        "include_field_site_info": bool,
+        "enable_biosample_filtering": bool,
     },
     out={
         "nmdc_study_id": Out(str),
         "gold_nmdc_instrument_mapping_file_url": Out(str),
+        "include_field_site_info": Out(bool),
+        "enable_biosample_filtering": Out(bool),
     },
 )
-def get_database_updater_inputs(context: OpExecutionContext) -> Tuple[str, str]:
+def get_database_updater_inputs(
+    context: OpExecutionContext,
+) -> Tuple[str, str, bool, bool]:
     return (
         context.op_config["nmdc_study_id"],
         context.op_config["gold_nmdc_instrument_mapping_file_url"],
+        context.op_config["include_field_site_info"],
+        context.op_config["enable_biosample_filtering"],
     )
@@ -1596,6 +1531,8 @@ def generate_data_generation_set_post_biosample_ingest(
     context: OpExecutionContext,
     nmdc_study_id: str,
     gold_nmdc_instrument_map_df: pd.DataFrame,
+    include_field_site_info: bool,
+    enable_biosample_filtering: bool,
 ) -> nmdc.Database:
     runtime_api_user_client: RuntimeApiUserClient = (
         context.resources.runtime_api_user_client
@@ -1611,6 +1548,8 @@ def generate_data_generation_set_post_biosample_ingest(
         gold_api_client,
         nmdc_study_id,
         gold_nmdc_instrument_map_df,
+        include_field_site_info,
+        enable_biosample_filtering,
     )
     database = (
         database_updater.generate_data_generation_set_records_from_gold_api_for_study()
@@ -1630,6 +1569,8 @@ def generate_biosample_set_for_nmdc_study_from_gold(
     context: OpExecutionContext,
     nmdc_study_id: str,
     gold_nmdc_instrument_map_df: pd.DataFrame,
+    include_field_site_info: bool = False,
+    enable_biosample_filtering: bool = False,
 ) -> nmdc.Database:
     runtime_api_user_client: RuntimeApiUserClient = (
         context.resources.runtime_api_user_client
@@ -1645,6 +1586,8 @@ def generate_biosample_set_for_nmdc_study_from_gold(
         gold_api_client,
         nmdc_study_id,
         gold_nmdc_instrument_map_df,
+        include_field_site_info,
+        enable_biosample_filtering,
     )
     database = database_updater.generate_biosample_set_from_gold_api_for_study()
@@ -1656,13 +1599,16 @@ def generate_biosample_set_for_nmdc_study_from_gold(
         "runtime_api_user_client",
         "runtime_api_site_client",
         "gold_api_client",
-    }
+    },
+    out=Out(Any),
 )
 def run_script_to_update_insdc_biosample_identifiers(
     context: OpExecutionContext,
     nmdc_study_id: str,
     gold_nmdc_instrument_map_df: pd.DataFrame,
-) -> Dict[str, Any]:
+    include_field_site_info: bool,
+    enable_biosample_filtering: bool,
+):
     """Generates a MongoDB update script to add INSDC biosample identifiers to biosamples.
     This op uses the DatabaseUpdater to generate a script that can be used to update biosample
@@ -1674,7 +1620,7 @@ def run_script_to_update_insdc_biosample_identifiers(
         gold_nmdc_instrument_map_df: A dataframe mapping GOLD instrument IDs to NMDC instrument set records
     Returns:
-        A dictionary containing the MongoDB update script
+        A dictionary or list of dictionaries containing the MongoDB update script(s)
     """
     runtime_api_user_client: RuntimeApiUserClient = (
         context.resources.runtime_api_user_client
@@ -1690,11 +1636,17 @@ def run_script_to_update_insdc_biosample_identifiers(
         gold_api_client,
         nmdc_study_id,
         gold_nmdc_instrument_map_df,
+        include_field_site_info,
+        enable_biosample_filtering,
     )
     update_script = database_updater.queries_run_script_to_update_insdc_identifiers()
+    if isinstance(update_script, list):
+        total_updates = sum(len(item.get("updates", [])) for item in update_script)
+    else:
+        total_updates = len(update_script.get("updates", []))
     context.log.info(
-        f"Generated update script for study {nmdc_study_id} with {len(update_script.get('updates', []))} updates"
+        f"Generated update script for study {nmdc_study_id} with {total_updates} updates"
     )
     return update_script

nmdc_runtime/site/repair/database_updater.py CHANGED Viewed

@@ -18,6 +18,8 @@ class DatabaseUpdater:
         gold_api_client: GoldApiClient,
         study_id: str,
         gold_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
+        include_field_site_info: bool = False,
+        enable_biosample_filtering: bool = True,
     ):
         """This class serves as an API for repairing connections in the database by
         adding records that are essentially missing "links"/"connections". As we identify
@@ -39,6 +41,8 @@ class DatabaseUpdater:
         self.gold_api_client = gold_api_client
         self.study_id = study_id
         self.gold_nmdc_instrument_map_df = gold_nmdc_instrument_map_df
+        self.include_field_site_info = include_field_site_info
+        self.enable_biosample_filtering = enable_biosample_filtering
     @lru_cache
     def _fetch_gold_biosample(self, gold_biosample_id: str) -> List[Dict[str, Any]]:
@@ -95,6 +99,8 @@ class DatabaseUpdater:
             biosamples=all_gold_biosamples,
             projects=all_gold_projects,
             gold_nmdc_instrument_map_df=self.gold_nmdc_instrument_map_df,
+            include_field_site_info=self.include_field_site_info,
+            enable_biosample_filtering=self.enable_biosample_filtering,
         )
         # The GoldStudyTranslator class has some pre-processing logic which filters out
@@ -214,6 +220,8 @@ class DatabaseUpdater:
             projects=gold_sequencing_projects_for_study,
             analysis_projects=gold_analysis_projects_for_study,
             gold_nmdc_instrument_map_df=self.gold_nmdc_instrument_map_df,
+            include_field_site_info=self.include_field_site_info,
+            enable_biosample_filtering=self.enable_biosample_filtering,
         )
         translated_biosamples = gold_study_translator.biosamples

nmdc_runtime/site/repository.py CHANGED Viewed

@@ -463,11 +463,6 @@ def claim_and_run_apply_changesheet_jobs(_context):
         yield SkipReason("; ".join(skip_notes))
-# TODO ensure data_object_type values from file_type_enum
-#    see /metadata-translation/notebooks/202106_curation_updates.ipynb
-#    for details ("Create file_type_enum collection" section).
 @sensor(job=create_objects_from_site_object_puts.to_job(**preset_normal))
 def done_object_put_ops(_context):
     client = get_runtime_api_site_client(run_config_frozen__normal_env)
@@ -574,6 +569,7 @@ def biosample_submission_ingest():
                             "study_type": "research_study",
                             "gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/berkeley-schema-fy24/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
                             "include_field_site_info": False,
+                            "enable_biosample_filtering": True,
                         },
                     },
                     "export_json_to_drs": {"config": {"username": ""}},
@@ -1018,6 +1014,8 @@ def database_records_stitching():
                         "config": {
                             "nmdc_study_id": "",
                             "gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/refs/heads/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
+                            "include_field_site_info": False,
+                            "enable_biosample_filtering": True,
                         }
                     },
                     "export_json_to_drs": {"config": {"username": ""}},
@@ -1060,6 +1058,8 @@ def database_records_stitching():
                         "config": {
                             "nmdc_study_id": "",
                             "gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/refs/heads/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
+                            "include_field_site_info": False,
+                            "enable_biosample_filtering": True,
                         }
                     },
                     "export_json_to_drs": {"config": {"username": ""}},
@@ -1102,6 +1102,8 @@ def database_records_stitching():
                         "config": {
                             "nmdc_study_id": "",
                             "gold_nmdc_instrument_mapping_file_url": "https://raw.githubusercontent.com/microbiomedata/nmdc-schema/refs/heads/main/assets/misc/gold_seqMethod_to_nmdc_instrument_set.tsv",
+                            "include_field_site_info": False,
+                            "enable_biosample_filtering": True,
                         }
                     },
                 },

nmdc_runtime/site/resources.py CHANGED Viewed

@@ -109,7 +109,7 @@ class RuntimeApiUserClient(RuntimeApiClient):
             },
         )
         response.raise_for_status()
-        return response.json()["cursor"]["firstBatch"]
+        return response.json()["cursor"]["batch"]
     def get_omics_processing_records_by_gold_project_id(self, gold_project_id: str):
         gold_project_id = normalize_gold_id(gold_project_id)
@@ -126,7 +126,7 @@ class RuntimeApiUserClient(RuntimeApiClient):
             },
         )
         response.raise_for_status()
-        return response.json()["cursor"]["firstBatch"]
+        return response.json()["cursor"]["batch"]
     def get_biosamples_for_study(self, study_id: str):
         # TODO: 10000 is an arbitrarily large number that has been chosen for the max_page_size param.
@@ -170,7 +170,7 @@ class RuntimeApiUserClient(RuntimeApiClient):
             },
         )
         response.raise_for_status()
-        return response.json()["cursor"]["firstBatch"]
+        return response.json()["cursor"]["batch"]
     def get_study(self, study_id: str):
         response = self.request(
@@ -182,7 +182,7 @@ class RuntimeApiUserClient(RuntimeApiClient):
             },
         )
         response.raise_for_status()
-        return response.json()["cursor"]["firstBatch"]
+        return response.json()["cursor"]["batch"]
 class RuntimeApiSiteClient(RuntimeApiClient):

nmdc_runtime/site/translation/gold_translator.py CHANGED Viewed

@@ -45,6 +45,7 @@ class GoldStudyTranslator(Translator):
         analysis_projects: List[JSON_OBJECT] = [],
         gold_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
         include_field_site_info: bool = False,
+        enable_biosample_filtering: bool = True,
         *args,
         **kwargs,
     ) -> None:
@@ -53,15 +54,20 @@ class GoldStudyTranslator(Translator):
         self.study = study
         self.study_type = nmdc.StudyCategoryEnum(study_type)
         self.include_field_site_info = include_field_site_info
+        self.enable_biosample_filtering = enable_biosample_filtering
         # Filter biosamples to only those with `sequencingStrategy` of
-        # "Metagenome" or "Metatranscriptome"
-        self.biosamples = [
-            biosample
-            for biosample in biosamples
-            if any(
-                _is_valid_project(project) for project in biosample.get("projects", [])
-            )
-        ]
+        # "Metagenome" or "Metatranscriptome" if filtering is enabled
+        if enable_biosample_filtering:
+            self.biosamples = [
+                biosample
+                for biosample in biosamples
+                if any(
+                    _is_valid_project(project)
+                    for project in biosample.get("projects", [])
+                )
+            ]
+        else:
+            self.biosamples = biosamples
         # Fetch the valid projectGoldIds that are associated with filtered
         # biosamples on their `projects` field
         valid_project_ids = {
@@ -116,6 +122,9 @@ class GoldStudyTranslator(Translator):
         :param gold_entity: GOLD entity object
         :return: PersonValue corresponding to the first PI in the `contacts` field
         """
+        if "contacts" not in gold_entity:
+            return None
         pi_dict = next(
             (
                 contact
@@ -169,7 +178,7 @@ class GoldStudyTranslator(Translator):
                 project["ncbiBioSampleAccession"], default_prefix="biosample"
             )
             for project in biosample_projects
-            if project["ncbiBioSampleAccession"]
+            if project.get("ncbiBioSampleAccession")
         ]
     def _get_samp_taxon_id(

nmdc_runtime/site/translation/submission_portal_translator.py CHANGED Viewed

@@ -47,6 +47,12 @@ DATA_URL_SET_AND_ANALYTE_TO_DATA_OBJECT_TYPE: dict[tuple[DataUrlSet, str], str]
     (INTERLEAVED, str(METATRANSCRIPTOME)): "Metatranscriptome Raw Reads",
 }
+UNIT_OVERRIDES: dict[str, dict[str, str]] = {
+    "Biosample": {
+        "depth": "m",
+    }
+}
 class EnvironmentPackage(Enum):
     r"""
@@ -475,6 +481,50 @@ class SubmissionPortalTranslator(Translator):
         return value
+    def _get_study_dois(self, metadata_submission) -> Union[List[nmdc.Doi], None]:
+        """Collect and format DOIs from submission portal schema in nmdc format DOIs
+        If there were no DOIs, None is returned.
+        :param metadata_submission: submission portal entry
+        :return: list of nmdc.DOI objects
+        """
+        data_dois = self._get_from(metadata_submission, ["studyForm", "dataDois"])
+        award_dois = self._get_from(
+            metadata_submission, ["multiOmicsForm", "awardDois"]
+        )
+        if data_dois and len(data_dois) > 0:
+            updated_data_dois = [
+                nmdc.Doi(
+                    doi_category="dataset_doi",
+                    doi_provider=doi["provider"],
+                    doi_value=self._ensure_curie(doi["value"], default_prefix="doi"),
+                    type="nmdc:Doi",
+                )
+                for doi in data_dois
+            ]
+        else:
+            updated_data_dois = []
+        if award_dois and len(award_dois) > 0:
+            updated_award_dois = [
+                nmdc.Doi(
+                    doi_category="award_doi",
+                    doi_provider=doi["provider"],
+                    doi_value=self._ensure_curie(doi["value"], default_prefix="doi"),
+                    type="nmdc:Doi",
+                )
+                for doi in award_dois
+            ]
+        else:
+            updated_award_dois = []
+        return_val = updated_data_dois + updated_award_dois
+        if len(return_val) == 0:
+            return_val = None
+        return return_val
     def _get_data_objects_from_fields(
         self,
         sample_data: JSON_OBJECT,
@@ -591,6 +641,7 @@ class SubmissionPortalTranslator(Translator):
             websites=self._get_from(
                 metadata_submission, ["studyForm", "linkOutWebpage"]
             ),
+            associated_dois=self._get_study_dois(metadata_submission),
         )
     def _transform_value_for_slot(
@@ -660,6 +711,17 @@ class SubmissionPortalTranslator(Translator):
                 logging.warning(f"No slot '{slot_name}' on class '{class_name}'")
                 continue
+            # This step handles cases where the submission portal/schema instructs a user to
+            # provide a value in a specific unit. The unit cannot be parsed out of the raw value
+            # in these cases, so we have to manually set it via UNIT_OVERRIDES. This part can
+            # go away once units are encoded in the schema itself.
+            # See: https://github.com/microbiomedata/nmdc-schema/issues/2517
+            if class_name in UNIT_OVERRIDES:
+                # If the class has unit overrides, check if the slot is in the overrides
+                unit_overrides = UNIT_OVERRIDES[class_name]
+                if slot_name in unit_overrides:
+                    unit = unit_overrides[slot_name]
             slot_definition = self.schema_view.induced_slot(slot_name, class_name)
             if slot_definition.multivalued:
                 value_list = value

nmdc_runtime/util.py CHANGED Viewed

@@ -3,36 +3,28 @@ import mimetypes
 import os
 import pkgutil
 from collections.abc import Iterable
-from contextlib import AbstractContextManager
 from copy import deepcopy
 from datetime import datetime, timezone
 from functools import lru_cache
 from io import BytesIO
 from itertools import chain
 from pathlib import Path
-from uuid import uuid4
 from typing import Callable, List, Optional, Set, Dict
 import fastjsonschema
 import requests
 from frozendict import frozendict
-from jsonschema.validators import Draft7Validator
 from linkml_runtime import linkml_model
 from linkml_runtime.utils.schemaview import SchemaView
-from nmdc_schema.nmdc import Database as NMDCDatabase
 from nmdc_schema.get_nmdc_view import ViewGetter
-from pydantic import Field, BaseModel
 from pymongo.database import Database as MongoDatabase
 from pymongo.errors import OperationFailure
 from refscan.lib.helpers import identify_references
-from refscan.lib.Finder import Finder
 from refscan.lib.ReferenceList import ReferenceList
-from refscan.scanner import scan_outgoing_references
-from toolz import merge, unique
+from toolz import merge
 from nmdc_runtime.api.core.util import sha256hash_from_file
 from nmdc_runtime.api.models.object import DrsObjectIn
-from typing_extensions import Annotated
 def get_names_of_classes_in_effective_range_of_slot(
@@ -499,6 +491,11 @@ def populated_schema_collection_names_with_id_field(mdb: MongoDatabase) -> List[
 def ensure_unique_id_indexes(mdb: MongoDatabase):
     """Ensure that any collections with an "id" field have an index on "id"."""
+    # Note: The pipe (i.e. `|`) operator performs a union of the two sets. In this case,
+    #       it creates a set (i.e. `candidate_names`) consisting of the names of both
+    #       (a) all collections in the real database, and (b) all collections that
+    #       the NMDC schema says can contain instances of classes that have an "id" slot.
     candidate_names = (
         set(mdb.list_collection_names()) | schema_collection_names_with_id_field()
     )
@@ -533,271 +530,6 @@ def ensure_unique_id_indexes(mdb: MongoDatabase):
                     raise
-class UpdateStatement(BaseModel):
-    q: dict
-    u: dict
-    upsert: bool = False
-    multi: bool = False
-class DeleteStatement(BaseModel):
-    q: dict
-    limit: Annotated[int, Field(ge=0, le=1)] = 1
-class OverlayDBError(Exception):
-    pass
-class OverlayDB(AbstractContextManager):
-    """Provides a context whereby a base Database is overlaid with a temporary one.
-    If you need to run basic simulations of updates to a base database,
-    you don't want to actually commit transactions to the base database.
-    For example, to insert or replace (matching on "id") many documents into a collection in order
-    to then validate the resulting total set of collection documents, an OverlayDB writes to
-    an overlay collection that "shadows" the base collection during a "find" query
-    (the "merge_find" method of an OverlayDB object): if a document with `id0` is found in the
-    overlay collection, that id is marked as "seen" and will not also be returned when
-    subsequently scanning the (unmodified) base-database collection.
-    Note: The OverlayDB object does not provide a means to perform arbitrary MongoDB queries on the virtual "merged"
-          database. Callers can access the real database via `overlay_db._bottom_db` and the overlaying database via
-          `overlay_db._top_db` and perform arbitrary MongoDB queries on the individual databases that way. Access to
-          the virtual "merged" database is limited to the methods of the `OverlayDB` class, which simulates the
-          "merging" just-in-time to process the method invocation. You can see an example of this in the implementation
-          of the `merge_find` method, which internally accesses both the real database and the overlaying database.
-    Mongo "update" commands (as the "apply_updates" method) are simulated by first copying affected
-    documents from a base collection to the overlay, and then applying the updates to the overlay,
-    so that again, base collections are unmodified, and a "merge_find" call will produce a result
-    *as if* the base collection(s) were modified.
-    Mongo deletions (as the "delete" method) also copy affected documents from the base collection
-    to the overlay collection, and flag them using the "_deleted" field. In this way, a `merge_find`
-    call will match a relevant document given a suitable filter, and will mark the document's id
-    as "seen" *without* returning the document. Thus, the result is as if the document were deleted.
-    Usage:
-    ````
-    with OverlayDB(mdb) as odb:
-        # do stuff, e.g. `odb.replace_or_insert_many(...)`
-    ```
-    """
-    def __init__(self, mdb: MongoDatabase):
-        self._bottom_db = mdb
-        self._top_db = self._bottom_db.client.get_database(f"overlay-{uuid4()}")
-        ensure_unique_id_indexes(self._top_db)
-    def __enter__(self):
-        return self
-    def __exit__(self, exc_type, exc_value, traceback):
-        self._bottom_db.client.drop_database(self._top_db.name)
-    def replace_or_insert_many(self, coll_name, documents: list):
-        try:
-            self._top_db[coll_name].insert_many(documents)
-        except OperationFailure as e:
-            raise OverlayDBError(str(e.details))
-    def apply_updates(self, coll_name, updates: list):
-        """prepare overlay db and apply updates to it."""
-        assert all(UpdateStatement(**us) for us in updates)
-        for update_spec in updates:
-            for bottom_doc in self._bottom_db[coll_name].find(update_spec["q"]):
-                self._top_db[coll_name].insert_one(bottom_doc)
-        try:
-            self._top_db.command({"update": coll_name, "updates": updates})
-        except OperationFailure as e:
-            raise OverlayDBError(str(e.details))
-    def delete(self, coll_name, deletes: list):
-        """ "apply" delete command by flagging docs in overlay database"""
-        assert all(DeleteStatement(**us) for us in deletes)
-        for delete_spec in deletes:
-            for bottom_doc in self._bottom_db[coll_name].find(
-                delete_spec["q"], limit=delete_spec.get("limit", 1)
-            ):
-                bottom_doc["_deleted"] = True
-                self._top_db[coll_name].insert_one(bottom_doc)
-    def merge_find(self, coll_name, find_spec: dict):
-        """Yield docs first from overlay and then from base db, minding deletion flags."""
-        # ensure projection of "id" and "_deleted"
-        if "projection" in find_spec:
-            proj = find_spec["projection"]
-            if isinstance(proj, dict):
-                proj = merge(proj, {"id": 1, "_deleted": 1})
-            elif isinstance(proj, list):
-                proj = list(unique(proj + ["id", "_deleted"]))
-        top_docs = self._top_db[coll_name].find(**find_spec)
-        bottom_docs = self._bottom_db[coll_name].find(**find_spec)
-        top_seen_ids = set()
-        for doc in top_docs:
-            if not doc.get("_deleted"):
-                yield doc
-            top_seen_ids.add(doc["id"])
-        for doc in bottom_docs:
-            if doc["id"] not in top_seen_ids:
-                yield doc
-def validate_json(
-    in_docs: dict, mdb: MongoDatabase, check_inter_document_references: bool = False
-):
-    r"""
-    Checks whether the specified dictionary represents a valid instance of the `Database` class
-    defined in the NMDC Schema. Referential integrity checking is performed on an opt-in basis.
-    Example dictionary:
-    {
-        "biosample_set": [
-            {"id": "nmdc:bsm-00-000001", ...},
-            {"id": "nmdc:bsm-00-000002", ...}
-        ],
-        "study_set": [
-            {"id": "nmdc:sty-00-000001", ...},
-            {"id": "nmdc:sty-00-000002", ...}
-        ]
-    }
-    :param in_docs: The dictionary you want to validate
-    :param mdb: A reference to a MongoDB database
-    :param check_inter_document_references: Whether you want this function to check whether every document that
-                                            is referenced by any of the documents passed in would, indeed, exist
-                                            in the database, if the documents passed in were to be inserted into
-                                            the database. In other words, set this to `True` if you want this
-                                            function to perform referential integrity checks.
-    """
-    validator = Draft7Validator(get_nmdc_jsonschema_dict())
-    docs = deepcopy(in_docs)
-    validation_errors = {}
-    known_coll_names = set(nmdc_database_collection_names())
-    for coll_name, coll_docs in docs.items():
-        if coll_name not in known_coll_names:
-            # FIXME: Document what `@type` is (conceptually; e.g., why this function accepts it as a collection name).
-            #        See: https://github.com/microbiomedata/nmdc-runtime/discussions/858
-            if coll_name == "@type" and coll_docs in ("Database", "nmdc:Database"):
-                continue
-            else:
-                validation_errors[coll_name] = [
-                    f"'{coll_name}' is not a known schema collection name"
-                ]
-                continue
-        errors = list(validator.iter_errors({coll_name: coll_docs}))
-        validation_errors[coll_name] = [e.message for e in errors]
-        if coll_docs:
-            if not isinstance(coll_docs, list):
-                validation_errors[coll_name].append("value must be a list")
-            elif not all(isinstance(d, dict) for d in coll_docs):
-                validation_errors[coll_name].append(
-                    "all elements of list must be dicts"
-                )
-            if not validation_errors[coll_name]:
-                try:
-                    with OverlayDB(mdb) as odb:
-                        odb.replace_or_insert_many(coll_name, coll_docs)
-                except OverlayDBError as e:
-                    validation_errors[coll_name].append(str(e))
-    if all(len(v) == 0 for v in validation_errors.values()):
-        # Second pass. Try instantiating linkml-sourced dataclass
-        in_docs.pop("@type", None)
-        try:
-            NMDCDatabase(**in_docs)
-        except Exception as e:
-            return {"result": "errors", "detail": str(e)}
-        # Third pass (if enabled): Check inter-document references.
-        if check_inter_document_references is True:
-            # Prepare to use `refscan`.
-            #
-            # Note: We check the inter-document references in two stages, which are:
-            #       1. For each document in the JSON payload, check whether each document it references already exists
-            #          (in the collections the schema says it can exist in) in the database. We use the
-            #          `refscan` package to do this, which returns violation details we'll use in the second stage.
-            #       2. For each violation found in the first stage (i.e. each reference to a not-found document), we
-            #          check whether that document exists (in the collections the schema says it can exist in) in the
-            #          JSON payload. If it does, then we "waive" (i.e. discard) that violation.
-            #       The violations that remain after those two stages are the ones we return to the caller.
-            #
-            # Note: The reason we do not insert documents into an `OverlayDB` and scan _that_, is that the `OverlayDB`
-            #       does not provide a means to perform arbitrary queries against its virtual "merged" database. It
-            #       is not a drop-in replacement for a pymongo's `Database` class, which is the only thing that
-            #       `refscan`'s `Finder` class accepts.
-            #
-            finder = Finder(database=mdb)
-            references = get_allowed_references()
-            reference_field_names_by_source_class_name = (
-                references.get_reference_field_names_by_source_class_name()
-            )
-            # Iterate over the collections in the JSON payload.
-            for source_collection_name, documents in in_docs.items():
-                for document in documents:
-                    # Add an `_id` field to the document, since `refscan` requires the document to have one.
-                    source_document = dict(document, _id=None)
-                    violations = scan_outgoing_references(
-                        document=source_document,
-                        schema_view=nmdc_schema_view(),
-                        reference_field_names_by_source_class_name=reference_field_names_by_source_class_name,
-                        references=references,
-                        finder=finder,
-                        collection_names=nmdc_database_collection_names(),
-                        source_collection_name=source_collection_name,
-                        user_wants_to_locate_misplaced_documents=False,
-                    )
-                    # For each violation, check whether the misplaced document is in the JSON payload, itself.
-                    for violation in violations:
-                        can_waive_violation = False
-                        # Determine which collections can contain the referenced document, based upon
-                        # the schema class of which this source document is an instance.
-                        target_collection_names = (
-                            references.get_target_collection_names(
-                                source_class_name=violation.source_class_name,
-                                source_field_name=violation.source_field_name,
-                            )
-                        )
-                        # Check whether the referenced document exists in any of those collections in the JSON payload.
-                        for json_coll_name, json_coll_docs in in_docs.items():
-                            if json_coll_name in target_collection_names:
-                                for json_coll_doc in json_coll_docs:
-                                    if json_coll_doc["id"] == violation.target_id:
-                                        can_waive_violation = True
-                                        break  # stop checking
-                            if can_waive_violation:
-                                break  # stop checking
-                        if not can_waive_violation:
-                            violation_as_str = (
-                                f"Document '{violation.source_document_id}' "
-                                f"in collection '{violation.source_collection_name}' "
-                                f"has a field '{violation.source_field_name}' that "
-                                f"references a document having id "
-                                f"'{violation.target_id}', but the latter document "
-                                f"does not exist in any of the collections the "
-                                f"NMDC Schema says it can exist in."
-                            )
-                            validation_errors[source_collection_name].append(
-                                violation_as_str
-                            )
-            # If any collection's error list is not empty, return an error response.
-            if any(len(v) > 0 for v in validation_errors.values()):
-                return {"result": "errors", "detail": validation_errors}
-        return {"result": "All Okay!"}
-    else:
-        return {"result": "errors", "detail": validation_errors}
 def decorate_if(condition: bool = False) -> Callable:
     r"""
     Decorator that applies another decorator only when `condition` is `True`.

{nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.9.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nmdc_runtime
-Version: 2.8.0
+Version: 2.9.0
 Summary: A runtime system for NMDC data management and orchestration
 Home-page: https://github.com/microbiomedata/nmdc-runtime
 Author: Donny Winston

{nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.9.0.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
 nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nmdc_runtime/config.py,sha256=GKmovwYD3tIiUQX-mAOcHI8NMEMLhogjHDB9I8azA4c,195
+nmdc_runtime/config.py,sha256=CW6LnN8Idsbra_mZnHU-kcWsYBZWbgivqVEp8rpOMi4,1989
 nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
 nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/mongo_util.py,sha256=7NRvqFE8W2CUcpcXAA4KElUACIdAkBehZ9TBG4k7zNE,3000
-nmdc_runtime/util.py,sha256=FfGNfcnHKS6Yzuwbdj0FtCcL-ks9HUjwWUfsPs1H2ao,33285
+nmdc_runtime/util.py,sha256=Rw-OiQDHrz4cNX3ZdC-cgfHYUMq1qsk-_Mv81UrDlC8,19823
 nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
@@ -37,10 +37,10 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
 nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
 nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nmdc_runtime/site/graphs.py,sha256=cJfLCRYH6l3SW-0MYIOihORit6Fe_gziwQ6BJaph55c,17713
-nmdc_runtime/site/ops.py,sha256=m9p8dlfNVpdEyu0o06cT9jMLkjZh0GGFxEQxDuDPUaA,65917
-nmdc_runtime/site/repository.py,sha256=ZkIykDDaFTxB4QW1Eo_w-9IywQrXXTV7Ugogf8vQ604,47439
-nmdc_runtime/site/resources.py,sha256=2R9X-06f9ZpDWYKltOkl_IIAScQGEEbsZF-URm4O6dM,20164
+nmdc_runtime/site/graphs.py,sha256=CWbLLtoaakmNgSoaQWylXvcOY6qS7qwkTexEUDiMNfM,18295
+nmdc_runtime/site/ops.py,sha256=y6bBJhAytrSqt0COkOqXVKgfSGVdgQ7uByUP8S-zUB4,63935
+nmdc_runtime/site/repository.py,sha256=g0bZytvCrUjLpWuvkAzzmI16mChsrYPbWcvVFPNZFnM,47687
+nmdc_runtime/site/resources.py,sha256=dLNtNa4FfSKN_6b21eItn-i8e0ZHyveoBsexl2I6zmo,20144
 nmdc_runtime/site/util.py,sha256=h70UJCT9g-I63EJn0drZjv1iaQ8LHJTbG29R9kqJ04c,1821
 nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
@@ -58,17 +58,17 @@ nmdc_runtime/site/export/study_metadata.py,sha256=yR5pXL6JG8d7cAtqcF-60Hp7bLD3dJ
 nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
 nmdc_runtime/site/repair/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nmdc_runtime/site/repair/database_updater.py,sha256=gRZ-NxZzXNd-vTIuygabEUqUSiF9eL4hL2rI9Qdf2WI,20764
+nmdc_runtime/site/repair/database_updater.py,sha256=a6POYZcLEl0JvnuWxPjaOJtwZjkJhhvvUg1ABhnBiP8,21268
 nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
 nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
-nmdc_runtime/site/translation/gold_translator.py,sha256=HGbWeuxppqlVfU8F5oKTYIDoC6qaftugJeWFIALB9XE,32720
+nmdc_runtime/site/translation/gold_translator.py,sha256=n7PrAyZb6ODG1uaZ0cay91DygAHIefOL2qXLuukOyIM,33075
 nmdc_runtime/site/translation/jgi.py,sha256=qk878KhIw674TkrVfbl2x1QJrKi3zlvE0vesIpe9slM,876
 nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=8_QF75Gf-dc2xVeO6jzTmdDrlGdh1-QrLJKG2SwUhCA,23797
 nmdc_runtime/site/translation/neon_soil_translator.py,sha256=IMeq4ABgWaSUbB_gmG8vBCMeynQSlbCUw9p2be6o8kE,38620
 nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=Js8_r6vHBW8b-_BpFySTUuYOFe7r51k8HwaNCQ7nAAg,30587
 nmdc_runtime/site/translation/neon_utils.py,sha256=d00o7duKKugpLHmsEifNbp4WjeC4GOqcgw0b5qlCg4I,5549
-nmdc_runtime/site/translation/submission_portal_translator.py,sha256=UEeqlkz_YGqcnx8vomFysetOlXxDu23q0Ryr93SZy78,41684
+nmdc_runtime/site/translation/submission_portal_translator.py,sha256=d5ycQhd-I07iUeuqN0vcHvMkOHqrwB67j2Q64aFkKBw,44147
 nmdc_runtime/site/translation/translator.py,sha256=V6Aq0y03LoQ4LTL2iHDHxGTh_eMjOmDJJSwNHSrp2wo,837
 nmdc_runtime/site/translation/util.py,sha256=w_l3SiExGsl6cXRqto0a_ssDmHkP64ITvrOVfPxmNpY,4366
 nmdc_runtime/site/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -76,9 +76,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
 nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
 nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
 nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
-nmdc_runtime-2.8.0.dist-info/licenses/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
-nmdc_runtime-2.8.0.dist-info/METADATA,sha256=B8Vhde36JVAAwdCqKkcFaTyF13D0uWL8KEQnsyJUajc,8953
-nmdc_runtime-2.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nmdc_runtime-2.8.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
-nmdc_runtime-2.8.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
-nmdc_runtime-2.8.0.dist-info/RECORD,,
+nmdc_runtime-2.9.0.dist-info/licenses/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
+nmdc_runtime-2.9.0.dist-info/METADATA,sha256=4NgNI-Et3t1WLDfZPbSFT18JnMBVEuSCoFAZbm_V0xk,8953
+nmdc_runtime-2.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nmdc_runtime-2.9.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
+nmdc_runtime-2.9.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
+nmdc_runtime-2.9.0.dist-info/RECORD,,

{nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.9.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.9.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.9.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{nmdc_runtime-2.8.0.dist-info → nmdc_runtime-2.9.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

nmdc-runtime 2.8.0__py3-none-any.whl → 2.9.0__py3-none-any.whl

Potentially problematic release.

nmdc-runtime 2.8.0py3-none-any.whl → 2.9.0py3-none-any.whl