PyPI - nmdc-runtime - Versions diffs - 2.2.1__py3-none-any.whl → 2.4.0__py3-none-any.whl - Mend

nmdc-runtime 2.2.1py3-none-any.whl → 2.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (19) hide show

nmdc_runtime/minter/config.py +18 -50
nmdc_runtime/site/export/ncbi_xml.py +23 -2
nmdc_runtime/site/export/ncbi_xml_utils.py +81 -30
nmdc_runtime/site/graphs.py +39 -0
nmdc_runtime/site/ops.py +131 -31
nmdc_runtime/site/repair/__init__.py +0 -0
nmdc_runtime/site/repair/database_updater.py +230 -0
nmdc_runtime/site/repository.py +109 -9
nmdc_runtime/site/resources.py +36 -5
nmdc_runtime/site/translation/gold_translator.py +26 -4
nmdc_runtime/site/translation/neon_surface_water_translator.py +128 -78
nmdc_runtime/site/util.py +7 -2
nmdc_runtime/util.py +143 -1
{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/METADATA +11 -3
{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/RECORD +19 -17
{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/WHEEL +1 -1
{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/LICENSE +0 -0
{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/entry_points.txt +0 -0
{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/top_level.txt +0 -0

nmdc_runtime/site/translation/neon_surface_water_translator.py CHANGED Viewed

@@ -71,6 +71,7 @@ class NeonSurfaceWaterDataTranslator(Translator):
         neon_amb_data_tables = (
             "mms_swMetagenomeSequencing",
             "mms_swMetagenomeDnaExtraction",
+            "mms_swRawDataFiles",
             "amc_fieldGenetic",
             "amc_fieldSuperParent",
         )
@@ -88,6 +89,9 @@ class NeonSurfaceWaterDataTranslator(Translator):
                 if_exists="replace",
                 index=False,
             )
+            surface_water_data["mms_swRawDataFiles"].to_sql(
+                "mms_swRawDataFiles", self.conn, if_exists="replace", index=False
+            )
             surface_water_data["amc_fieldGenetic"].to_sql(
                 "amc_fieldGenetic", self.conn, if_exists="replace", index=False
             )
@@ -103,10 +107,7 @@ class NeonSurfaceWaterDataTranslator(Translator):
             "neonEnvoTerms", self.conn, if_exists="replace", index=False
         )
-        self.neon_raw_data_file_mappings_df = neon_raw_data_file_mappings_file
-        self.neon_raw_data_file_mappings_df.to_sql(
-            "neonRawDataFile", self.conn, if_exists="replace", index=False
-        )
+        self.neon_raw_data_file_mappings_df = surface_water_data["mms_swRawDataFiles"]
         self.site_code_mapping = site_code_mapping
@@ -371,7 +372,7 @@ class NeonSurfaceWaterDataTranslator(Translator):
         )
     def _translate_data_object(
-        self, do_id: str, url: str, do_type: str, checksum: str
+        self, do_id: str, url: str, do_type: str, manifest_id: str
     ) -> nmdc.DataObject:
         """Create nmdc DataObject which is the output of a NucleotideSequencing process. This
         object mainly contains information about the sequencing file that was generated as
@@ -395,8 +396,15 @@ class NeonSurfaceWaterDataTranslator(Translator):
             url=url,
             description=f"sequencing results for {basename}",
             type="nmdc:DataObject",
-            md5_checksum=checksum,
             data_object_type=do_type,
+            in_manifest=manifest_id,
+        )
+    def _translate_manifest(self, manifest_id: str) -> nmdc.Manifest:
+        return nmdc.Manifest(
+            id=manifest_id,
+            manifest_category=nmdc.ManifestCategoryEnum.poolable_replicates,
+            type="nmdc:Manifest",
         )
     def get_database(self):
@@ -477,6 +485,9 @@ class NeonSurfaceWaterDataTranslator(Translator):
         """
         surface_water_samples = pd.read_sql_query(query, self.conn)
+        # --------------------------------------------------
+        # Create mappings for minted NMDC IDs
+        # --------------------------------------------------
         neon_biosample_ids = surface_water_samples["parentSampleID"]
         nmdc_biosample_ids = self._id_minter("nmdc:Biosample", len(neon_biosample_ids))
         neon_to_nmdc_biosample_ids = dict(zip(neon_biosample_ids, nmdc_biosample_ids))
@@ -511,30 +522,20 @@ class NeonSurfaceWaterDataTranslator(Translator):
             zip(neon_lib_prep_processed_ids, nmdc_lib_prep_processed_ids)
         )
-        neon_omprc_ids = surface_water_samples["parentSampleID"]
-        nmdc_omprc_ids = self._id_minter(
-            "nmdc:NucleotideSequencing", len(neon_omprc_ids)
-        )
-        neon_to_nmdc_omprc_ids = dict(zip(neon_omprc_ids, nmdc_omprc_ids))
-        neon_raw_data_file_mappings_df = self.neon_raw_data_file_mappings_df
-        neon_raw_file_paths = neon_raw_data_file_mappings_df["rawDataFilePath"]
-        nmdc_data_object_ids = self._id_minter(
-            "nmdc:DataObject", len(neon_raw_file_paths)
-        )
-        neon_to_nmdc_data_object_ids = dict(
-            zip(neon_raw_file_paths, nmdc_data_object_ids)
-        )
+        # --------------------------------------------------
+        # STEP 1: Insert Biosamples
+        # --------------------------------------------------
         for neon_id, nmdc_id in neon_to_nmdc_biosample_ids.items():
             biosample_row = surface_water_samples[
                 surface_water_samples["parentSampleID"] == neon_id
             ]
+            # database.biosample_set.append(
+            #     self._translate_biosample(neon_id, nmdc_id, biosample_row)
+            # )
-            database.biosample_set.append(
-                self._translate_biosample(neon_id, nmdc_id, biosample_row)
-            )
+        # --------------------------------------------------
+        # STEP 2: Insert Extraction Processes
+        # --------------------------------------------------
         for neon_id, nmdc_id in neon_to_nmdc_extraction_ids.items():
             extraction_row = surface_water_samples[
                 surface_water_samples["parentSampleID"] == neon_id
@@ -557,6 +558,7 @@ class NeonSurfaceWaterDataTranslator(Translator):
                     extraction_row, "genomicsSampleID"
                 )
+                # Each Extraction process output => ProcessedSample
                 database.processed_sample_set.append(
                     self._translate_processed_sample(
                         processed_sample_id,
@@ -564,23 +566,9 @@ class NeonSurfaceWaterDataTranslator(Translator):
                     )
                 )
-        query = """
-            SELECT dnaSampleID, GROUP_CONCAT(rawDataFilePath, '|') AS rawDataFilePaths
-            FROM neonRawDataFile
-            GROUP BY dnaSampleID
-        """
-        neon_raw_data_files = pd.read_sql_query(query, self.conn)
-        neon_raw_data_files_dict = (
-            neon_raw_data_files.set_index("dnaSampleID")["rawDataFilePaths"]
-            .str.split("|")
-            .to_dict()
-        )
-        filtered_neon_raw_data_files_dict = {
-            key: value
-            for key, value in neon_raw_data_files_dict.items()
-            if len(value) <= 2
-        }
+        # --------------------------------------------------
+        # STEP 3: Insert LibraryPreparation Processes
+        # --------------------------------------------------
         for neon_id, nmdc_id in neon_to_nmdc_lib_prep_ids.items():
             lib_prep_row = surface_water_samples[
                 surface_water_samples["parentSampleID"] == neon_id
@@ -601,6 +589,7 @@ class NeonSurfaceWaterDataTranslator(Translator):
                 dna_sample_id = _get_value_or_none(lib_prep_row, "dnaSampleID")
+                # Each LibraryPreparation process output => ProcessedSample
                 database.processed_sample_set.append(
                     self._translate_processed_sample(
                         processed_sample_id,
@@ -608,42 +597,103 @@ class NeonSurfaceWaterDataTranslator(Translator):
                     )
                 )
-                has_output = None
-                has_output_do_ids = []
-                if dna_sample_id in filtered_neon_raw_data_files_dict:
-                    has_output = filtered_neon_raw_data_files_dict[dna_sample_id]
-                    for item in has_output:
-                        if item in neon_to_nmdc_data_object_ids:
-                            has_output_do_ids.append(neon_to_nmdc_data_object_ids[item])
-                        checksum = None
-                        do_type = None
-                        checksum = neon_raw_data_file_mappings_df[
-                            neon_raw_data_file_mappings_df["rawDataFilePath"] == item
-                        ]["checkSum"].values[0]
-                        if "_R1.fastq.gz" in item:
-                            do_type = "Metagenome Raw Read 1"
-                        elif "_R2.fastq.gz" in item:
-                            do_type = "Metagenome Raw Read 2"
-                        database.data_object_set.append(
-                            self._translate_data_object(
-                                neon_to_nmdc_data_object_ids.get(item),
-                                item,
-                                do_type,
-                                checksum,
-                            )
-                        )
-                    database.data_generation_set.append(
-                        self._translate_nucleotide_sequencing(
-                            neon_to_nmdc_omprc_ids.get(neon_id),
-                            processed_sample_id,
-                            has_output_do_ids,
-                            lib_prep_row,
-                        )
+        # --------------------------------------------------
+        # STEP 4: Group raw files by (dnaSampleID, sequencerRunID)
+        #         and insert DataObjects + DataGeneration processes
+        # --------------------------------------------------
+        raw_query = """
+            SELECT dnaSampleID, sequencerRunID, rawDataFilePath
+            FROM mms_swRawDataFiles
+        """
+        neon_raw_data_files_df = pd.read_sql_query(raw_query, self.conn)
+        for neon_id, nmdc_libprep_id in neon_to_nmdc_lib_prep_ids.items():
+            # 1) Pull out the row that corresponds to this parentSampleID
+            lib_prep_row = surface_water_samples[
+                surface_water_samples["parentSampleID"] == neon_id
+            ]
+            # 2) Grab the dnaSampleID from that row
+            dna_sample_id = _get_value_or_none(lib_prep_row, "dnaSampleID")
+            if not dna_sample_id:
+                # No dnaSampleID => skip
+                continue
+            # 3) Find all raw files for that dnaSampleID
+            dna_files = neon_raw_data_files_df[
+                neon_raw_data_files_df["dnaSampleID"] == dna_sample_id
+            ]
+            if dna_files.empty:
+                # No raw files => skip
+                continue
+            # -----------------------------------------
+            # LOOKUP DICT: get "has_input" for this neon_id
+            # -----------------------------------------
+            has_input_value = self.samp_procsm_dict.get(neon_id)
+            # If some neon_id isn't in the dictionary, handle it as needed
+            if not has_input_value:
+                # Could skip, or raise an error, or set a default
+                continue
+            # -------------------------------------------
+            # 4) CREATE A MANIFEST IF MULTIPLE RAW FILES
+            #    for this row's dnaSampleID
+            # -------------------------------------------
+            manifest_id = None
+            if len(dna_files) > 2:
+                # For each row that references a dnaSampleID with multiple raw files,
+                # mint exactly one new manifest record
+                manifest_id = self._id_minter("nmdc:Manifest", 1)[0]
+                new_manifest = self._translate_manifest(manifest_id)
+                # Add to the database
+                database.manifest_set.append(new_manifest)
+            # -------------------------------------------
+            # 5) NOW GROUP FILES BY sequencerRunID
+            #    => one data_generation record per run
+            # -------------------------------------------
+            lib_prep_processed_sample_id = neon_to_nmdc_lib_prep_processed_ids.get(
+                neon_id
+            )
+            if not lib_prep_processed_sample_id:
+                # If we don't have a ProcessedSample for some reason, skip
+                continue
+            for run_id, group_df in dna_files.groupby("sequencerRunID"):
+                # a) Mint new data_generation (NucleotideSequencing) ID for this run
+                data_generation_id = self._id_minter("nmdc:NucleotideSequencing", 1)[0]
+                # b) Create DataObjects for each raw file in this run
+                data_object_ids = []
+                for raw_fp in group_df["rawDataFilePath"]:
+                    do_id = self._id_minter("nmdc:DataObject", 1)[0]
+                    # Distinguish read type
+                    do_type = None
+                    if "_R1.fastq.gz" in raw_fp:
+                        do_type = "Metagenome Raw Read 1"
+                    elif "_R2.fastq.gz" in raw_fp:
+                        do_type = "Metagenome Raw Read 2"
+                    # Create the DataObject
+                    data_obj = self._translate_data_object(
+                        do_id=do_id,
+                        url=raw_fp,
+                        do_type=do_type,
+                        manifest_id=manifest_id,  # link to the new Manifest if it exists
+                    )
+                    database.data_object_set.append(data_obj)
+                    data_object_ids.append(do_id)
+                # c) Finally, create the data generation record for this run
+                database.data_generation_set.append(
+                    self._translate_nucleotide_sequencing(
+                        nucleotide_sequencing_id=data_generation_id,
+                        processed_sample_id=has_input_value,
+                        raw_data_file_data=data_object_ids,
+                        nucleotide_sequencing_row=lib_prep_row,
                     )
+                )
         return database

nmdc_runtime/site/util.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
-from functools import lru_cache
-from subprocess import Popen, PIPE, STDOUT, CalledProcessError
+from dagster import op
+from functools import lru_cache
 from pymongo.database import Database as MongoDatabase
+from subprocess import Popen, PIPE, STDOUT, CalledProcessError
 from nmdc_runtime.api.db.mongo import get_collection_names_from_schema
 from nmdc_runtime.site.resources import mongo_resource
@@ -47,3 +48,7 @@ def schema_collection_has_index_on_id(mdb: MongoDatabase) -> dict:
 def get_basename(filename: str) -> str:
     return os.path.basename(filename)
+def nmdc_study_id_to_filename(nmdc_study_id: str) -> str:
+    return nmdc_study_id.replace(":", "_").replace("-", "_")

nmdc_runtime/util.py CHANGED Viewed

@@ -24,6 +24,10 @@ from nmdc_schema.get_nmdc_view import ViewGetter
 from pydantic import Field, BaseModel
 from pymongo.database import Database as MongoDatabase
 from pymongo.errors import OperationFailure
+from refscan.lib.helpers import identify_references
+from refscan.lib.Finder import Finder
+from refscan.lib.ReferenceList import ReferenceList
+from refscan.scanner import scan_outgoing_references
 from toolz import merge, unique
 from nmdc_runtime.api.core.util import sha256hash_from_file
@@ -120,6 +124,23 @@ def get_class_names_from_collection_spec(
     return class_names
+@lru_cache
+def get_allowed_references() -> ReferenceList:
+    r"""
+    Returns a `ReferenceList` of all the inter-document references that
+    the NMDC Schema allows a schema-compliant MongoDB database to contain.
+    """
+    # Identify the inter-document references that the schema allows a database to contain.
+    print("Identifying schema-allowed references.")
+    references = identify_references(
+        schema_view=nmdc_schema_view(),
+        collection_name_to_class_names=collection_name_to_class_names,
+    )
+    return references
 @lru_cache
 def get_type_collections() -> dict:
     """Returns a dictionary mapping class names to Mongo collection names."""
@@ -353,6 +374,14 @@ def nmdc_database_collection_instance_class_names():
 @lru_cache
 def nmdc_database_collection_names():
+    r"""
+    TODO: Document this function.
+    TODO: Assuming this function was designed to return a list of names of all Database slots that represents database
+          collections, use the function named `get_collection_names_from_schema` in `nmdc_runtime/api/db/mongo.py`
+          instead, since (a) it includes documentation and (b) it performs the additional checks the lead schema
+          maintainer expects (e.g. checking whether a slot is `multivalued` and `inlined_as_list`).
+    """
     names = []
     view = nmdc_schema_view()
     all_classes = set(view.all_classes())
@@ -513,6 +542,13 @@ class OverlayDB(AbstractContextManager):
     overlay collection, that id is marked as "seen" and will not also be returned when
     subsequently scanning the (unmodified) base-database collection.
+    Note: The OverlayDB object does not provide a means to perform arbitrary MongoDB queries on the virtual "merged"
+          database. Callers can access the real database via `overlay_db._bottom_db` and the overlaying database via
+          `overlay_db._top_db` and perform arbitrary MongoDB queries on the individual databases that way. Access to
+          the virtual "merged" database is limited to the methods of the `OverlayDB` class, which simulates the
+          "merging" just-in-time to process the method invocation. You can see an example of this in the implementation
+          of the `merge_find` method, which internally accesses both the real database and the overlaying database.
     Mongo "update" commands (as the "apply_updates" method) are simulated by first copying affected
     documents from a base collection to the overlay, and then applying the updates to the overlay,
     so that again, base collections are unmodified, and a "merge_find" call will produce a result
@@ -591,7 +627,33 @@ class OverlayDB(AbstractContextManager):
                 yield doc
-def validate_json(in_docs: dict, mdb: MongoDatabase):
+def validate_json(
+    in_docs: dict, mdb: MongoDatabase, check_inter_document_references: bool = False
+):
+    r"""
+    Checks whether the specified dictionary represents a valid instance of the `Database` class
+    defined in the NMDC Schema. Referential integrity checking is performed on an opt-in basis.
+    Example dictionary:
+    {
+        "biosample_set": [
+            {"id": "nmdc:bsm-00-000001", ...},
+            {"id": "nmdc:bsm-00-000002", ...}
+        ],
+        "study_set": [
+            {"id": "nmdc:sty-00-000001", ...},
+            {"id": "nmdc:sty-00-000002", ...}
+        ]
+    }
+    :param in_docs: The dictionary you want to validate
+    :param mdb: A reference to a MongoDB database
+    :param check_inter_document_references: Whether you want this function to check whether every document that
+                                            is referenced by any of the documents passed in would, indeed, exist
+                                            in the database, if the documents passed in were to be inserted into
+                                            the database. In other words, set this to `True` if you want this
+                                            function to perform referential integrity checks.
+    """
     validator = Draft7Validator(get_nmdc_jsonschema_dict())
     docs = deepcopy(in_docs)
     validation_errors = {}
@@ -599,6 +661,8 @@ def validate_json(in_docs: dict, mdb: MongoDatabase):
     known_coll_names = set(nmdc_database_collection_names())
     for coll_name, coll_docs in docs.items():
         if coll_name not in known_coll_names:
+            # FIXME: Document what `@type` is (conceptually; e.g., why this function accepts it as a collection name).
+            #        See: https://github.com/microbiomedata/nmdc-runtime/discussions/858
             if coll_name == "@type" and coll_docs in ("Database", "nmdc:Database"):
                 continue
             else:
@@ -631,6 +695,84 @@ def validate_json(in_docs: dict, mdb: MongoDatabase):
         except Exception as e:
             return {"result": "errors", "detail": str(e)}
+        # Third pass (if enabled): Check inter-document references.
+        if check_inter_document_references is True:
+            # Prepare to use `refscan`.
+            #
+            # Note: We check the inter-document references in two stages, which are:
+            #       1. For each document in the JSON payload, check whether each document it references already exists
+            #          (in the collections the schema says it can exist in) in the database. We use the
+            #          `refscan` package to do this, which returns violation details we'll use in the second stage.
+            #       2. For each violation found in the first stage (i.e. each reference to a not-found document), we
+            #          check whether that document exists (in the collections the schema says it can exist in) in the
+            #          JSON payload. If it does, then we "waive" (i.e. discard) that violation.
+            #       The violations that remain after those two stages are the ones we return to the caller.
+            #
+            # Note: The reason we do not insert documents into an `OverlayDB` and scan _that_, is that the `OverlayDB`
+            #       does not provide a means to perform arbitrary queries against its virtual "merged" database. It
+            #       is not a drop-in replacement for a pymongo's `Database` class, which is the only thing that
+            #       `refscan`'s `Finder` class accepts.
+            #
+            finder = Finder(database=mdb)
+            references = get_allowed_references()
+            reference_field_names_by_source_class_name = (
+                references.get_reference_field_names_by_source_class_name()
+            )
+            # Iterate over the collections in the JSON payload.
+            for source_collection_name, documents in in_docs.items():
+                for document in documents:
+                    # Add an `_id` field to the document, since `refscan` requires the document to have one.
+                    source_document = dict(document, _id=None)
+                    violations = scan_outgoing_references(
+                        document=source_document,
+                        schema_view=nmdc_schema_view(),
+                        reference_field_names_by_source_class_name=reference_field_names_by_source_class_name,
+                        references=references,
+                        finder=finder,
+                        collection_names=nmdc_database_collection_names(),
+                        source_collection_name=source_collection_name,
+                        user_wants_to_locate_misplaced_documents=False,
+                    )
+                    # For each violation, check whether the misplaced document is in the JSON payload, itself.
+                    for violation in violations:
+                        can_waive_violation = False
+                        # Determine which collections can contain the referenced document, based upon
+                        # the schema class of which this source document is an instance.
+                        target_collection_names = (
+                            references.get_target_collection_names(
+                                source_class_name=violation.source_class_name,
+                                source_field_name=violation.source_field_name,
+                            )
+                        )
+                        # Check whether the referenced document exists in any of those collections in the JSON payload.
+                        for json_coll_name, json_coll_docs in in_docs.items():
+                            if json_coll_name in target_collection_names:
+                                for json_coll_doc in json_coll_docs:
+                                    if json_coll_doc["id"] == violation.target_id:
+                                        can_waive_violation = True
+                                        break  # stop checking
+                            if can_waive_violation:
+                                break  # stop checking
+                        if not can_waive_violation:
+                            violation_as_str = (
+                                f"Document '{violation.source_document_id}' "
+                                f"in collection '{violation.source_collection_name}' "
+                                f"has a field '{violation.source_field_name}' that "
+                                f"references a document having id "
+                                f"'{violation.target_id}', but the latter document "
+                                f"does not exist in any of the collections the "
+                                f"NMDC Schema says it can exist in."
+                            )
+                            validation_errors[source_collection_name].append(
+                                violation_as_str
+                            )
+            # If any collection's error list is not empty, return an error response.
+            if any(len(v) > 0 for v in validation_errors.values()):
+                return {"result": "errors", "detail": validation_errors}
         return {"result": "All Okay!"}
     else:
         return {"result": "errors", "detail": validation_errors}

{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: nmdc_runtime
-Version: 2.2.1
+Version: 2.4.0
 Summary: A runtime system for NMDC data management and orchestration
 Home-page: https://github.com/microbiomedata/nmdc-runtime
 Author: Donny Winston
@@ -11,6 +11,14 @@ Classifier: License :: OSI Approved :: Apache Software License
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: requires-python
+Dynamic: summary
 A runtime system for NMDC data management and orchestration.
@@ -29,7 +37,7 @@ houses the LinkML schema specification, as well as generated artifacts (e.g. JSO
 * [nmdc-server](https://github.com/microbiomedata/nmdc-server)
 houses code specific to the data portal -- its database, back-end API, and front-end application.
-* [workflow_documentation](https://nmdc-workflow-documentation.readthedocs.io/en/latest/index.html)
+* [workflow_documentation](https://docs.microbiomedata.org/workflows/)
 references workflow code spread across several repositories, that take source data and produce computed data.
 * This repo (nmdc-runtime)

{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ nmdc_runtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/config.py,sha256=qyV_To6t--DQUpYJ3SrE6sZlxuVXLPmx2dVtZV-3l-c,33
 nmdc_runtime/containers.py,sha256=8m_S1wiFu8VOWvY7tyqzf-02X9gXY83YGc8FgjWzLGA,418
 nmdc_runtime/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nmdc_runtime/util.py,sha256=aMzS8eATEjpXOiuyAFYthx92fb_cgIzWWd5ZQU6ZlAY,22931
+nmdc_runtime/util.py,sha256=HzQsNMYG6Pb-IuBEE9HBzX_lNkII7jiNe65UFk34ZYA,31414
 nmdc_runtime/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/core/db/Database.py,sha256=WamgBUbq85A7-fr3p5B9Tk92U__yPdr9pBb4zyQok-4,377
@@ -28,7 +28,7 @@ nmdc_runtime/lib/nmdc_etl_class.py,sha256=tVh3rKVMkBHQE65_LhKeIjCsaCZQk_HJzbc9K4
 nmdc_runtime/lib/transform_nmdc_data.py,sha256=hij4lR3IMQRJQdL-rsP_I-m_WyFPsBMchV2MNFUkh0M,39906
 nmdc_runtime/minter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/minter/bootstrap.py,sha256=5Ej6pJVBRryRIi0ZwEloY78Zky7iE2okF6tPwRI2axM,822
-nmdc_runtime/minter/config.py,sha256=WrxX9WmyN7Ft4INRAQbd31jmlm5qwaDDaNS9AktieYA,4112
+nmdc_runtime/minter/config.py,sha256=gsXZropDeeTO5tmLAtRuoocwqL3HgfgqVAENyCbX-Gc,2739
 nmdc_runtime/minter/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/minter/adapters/repository.py,sha256=I-jmGP38-9kPhkogrwUht_Ir0CfHA9_5ZImw5I_wbcw,8323
 nmdc_runtime/minter/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -36,11 +36,11 @@ nmdc_runtime/minter/domain/model.py,sha256=WMOuKub3dVzkOt_EZSRDLeTsJPqFbKx01SMQ5
 nmdc_runtime/minter/entrypoints/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/minter/entrypoints/fastapi_app.py,sha256=JC4thvzfFwRc1mhWQ-kHy3yvs0SYxF6ktE7LXNCwqlI,4031
 nmdc_runtime/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nmdc_runtime/site/graphs.py,sha256=mu4bE8799TItWXaPBfOeFB2XMyYwPZcj-VJQmadN2MA,14171
-nmdc_runtime/site/ops.py,sha256=T9_WrwDaySGnu6olwOHQizHQfeofMOaqMcq_vYEIzO0,43140
-nmdc_runtime/site/repository.py,sha256=JtHlp6l3UVo0QhV670TGns9bMfht7NOQrNWQtvsYr2g,39183
-nmdc_runtime/site/resources.py,sha256=6bmvplgql3KdEXKI49BibSk0Sug96SFJi8eOs2zeKK0,18252
-nmdc_runtime/site/util.py,sha256=zAY0oIY7GRf63ecqWelmS27N7PVrAXVwEhtnpescBSw,1415
+nmdc_runtime/site/graphs.py,sha256=SA4Ibb1TenVgiYD4wNrMiOCwkMKrv6BTcQ8mbG4VXPs,15666
+nmdc_runtime/site/ops.py,sha256=p4F5SrDbFdKOrAHu1TUhWQA33QB7hdoQmCCuU-00Eqo,46445
+nmdc_runtime/site/repository.py,sha256=pfx7WAVgdNaPhtfF2pak-tllqPMf4-yUeOXSpr4hu30,43861
+nmdc_runtime/site/resources.py,sha256=sqtRWb4ewU61U-JZTphsC4wBvYT5B0wj33WU70vjq_k,19677
+nmdc_runtime/site/util.py,sha256=-DnNv15r6XmFZrDG3d3Z-bFn2pFo0xpyyudhKlYPJKc,1559
 nmdc_runtime/site/backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/site/backup/nmdcdb_mongodump.py,sha256=H5uosmEiXwLwklJrYJWrNhb_Nuf_ew8dBpZLl6_dYhs,2699
 nmdc_runtime/site/backup/nmdcdb_mongoexport.py,sha256=XIFI_AI3zl0dFr-ELOEmwvT41MyRKBGFaAT3RcamTNE,4166
@@ -51,19 +51,21 @@ nmdc_runtime/site/drsobjects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
 nmdc_runtime/site/drsobjects/ingest.py,sha256=pcMP69WSzFHFqHB9JIL55ePFhilnCLRc2XHCQ97w1Ik,3107
 nmdc_runtime/site/drsobjects/registration.py,sha256=D1T3QUuxEOxqKZIvB5rkb_6ZxFZiA-U9SMPajyeWC2Y,3572
 nmdc_runtime/site/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nmdc_runtime/site/export/ncbi_xml.py,sha256=Vb4rNP3uhnGlHqrwUGgA2DzpOotCf3S8G4sIJml7gl4,25287
-nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=Jd-d8GGkB3e71TPpl_lPukQ54TioQZynO1yPSLX_aHs,8390
+nmdc_runtime/site/export/ncbi_xml.py,sha256=Jny1SqaVlscjiMHD73aiO8dxjbdPPJF5ksvNApHh5Ck,26230
+nmdc_runtime/site/export/ncbi_xml_utils.py,sha256=Qh96kk3TxwXxzuaWi6fsqe4Smarc2NILFP8pWRD3gFA,10915
 nmdc_runtime/site/export/study_metadata.py,sha256=yR5pXL6JG8d7cAtqcF-60Hp7bLD3dJ0Rut4AtYc0tXA,4844
 nmdc_runtime/site/normalization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/site/normalization/gold.py,sha256=iISDD4qs4d6uLhv631WYNeQVOzY5DO201ZpPtxHdkVk,1311
+nmdc_runtime/site/repair/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nmdc_runtime/site/repair/database_updater.py,sha256=EMuY8MfwQEfdejJHp0Y-Gb1eb1zOgKgfJxbtm6wM3YU,10943
 nmdc_runtime/site/translation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nmdc_runtime/site/translation/emsl.py,sha256=-aCTJTSCNaK-Koh8BE_4fTf5nyxP1KkquR6lloLEJl0,1245
 nmdc_runtime/site/translation/gold.py,sha256=R3W99sdQb7Pgu_esN7ruIC-tyREQD_idJ4xCzkqWuGw,1622
-nmdc_runtime/site/translation/gold_translator.py,sha256=RfAB68dJ9hDep20wETmCNBc0gugZbEKqVimT8h2t0uM,31470
+nmdc_runtime/site/translation/gold_translator.py,sha256=Zw4IjxMWJ1wdLyxX44-faq-Nfmr42-Na1gw-Xn4tg6I,32272
 nmdc_runtime/site/translation/jgi.py,sha256=qk878KhIw674TkrVfbl2x1QJrKi3zlvE0vesIpe9slM,876
 nmdc_runtime/site/translation/neon_benthic_translator.py,sha256=QIDqYLuf-NlGY9_88gy_5qTswkei3OfgJ5AOFpEXzJo,23985
 nmdc_runtime/site/translation/neon_soil_translator.py,sha256=Rol0g67nVBGSBySUzpfdW4Fwes7bKtvnlv2g5cB0aTI,38550
-nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=MQgjIfWPgoRe-bhzyfqHSe2mZwFsjcwjdT8tNqpIhlc,27729
+nmdc_runtime/site/translation/neon_surface_water_translator.py,sha256=k06eULMTYx0sQ00UlyeNJvCJMcX-neClnES1G6zpPKg,30517
 nmdc_runtime/site/translation/neon_utils.py,sha256=d00o7duKKugpLHmsEifNbp4WjeC4GOqcgw0b5qlCg4I,5549
 nmdc_runtime/site/translation/submission_portal_translator.py,sha256=9KhFn2jlRlGEAhsZWRPkpmInpxuVmnbCyR6jhlD7ooA,30587
 nmdc_runtime/site/translation/translator.py,sha256=V6Aq0y03LoQ4LTL2iHDHxGTh_eMjOmDJJSwNHSrp2wo,837
@@ -73,9 +75,9 @@ nmdc_runtime/site/validation/emsl.py,sha256=OG20mv_3E2rkQqTQtYO0_SVRqFb-Z_zKCiAV
 nmdc_runtime/site/validation/gold.py,sha256=Z5ZzYdjERbrJ2Tu06d0TDTBSfwaFdL1Z23Rl-YkZ2Ow,803
 nmdc_runtime/site/validation/jgi.py,sha256=LdJfhqBVHWCDp0Kzyk8eJZMwEI5NQ-zuTda31BcGwOA,1299
 nmdc_runtime/site/validation/util.py,sha256=GGbMDSwR090sr_E_fHffCN418gpYESaiot6XghS7OYk,3349
-nmdc_runtime-2.2.1.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
-nmdc_runtime-2.2.1.dist-info/METADATA,sha256=yIkwZWVw8J1xDqhwVQy2Rxfz7cIc42yT4JkRBdsRBr4,7256
-nmdc_runtime-2.2.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-nmdc_runtime-2.2.1.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
-nmdc_runtime-2.2.1.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
-nmdc_runtime-2.2.1.dist-info/RECORD,,
+nmdc_runtime-2.4.0.dist-info/LICENSE,sha256=VWiv65r7gHGjgtr3jMJYVmQny5GRpQ6H-W9sScb1x70,2408
+nmdc_runtime-2.4.0.dist-info/METADATA,sha256=CeZZbucd3jrD0ZqGdreH2x7ALrM9pt4ksGV2olkkpPI,7401
+nmdc_runtime-2.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+nmdc_runtime-2.4.0.dist-info/entry_points.txt,sha256=JxdvOnvxHK_8046cwlvE30s_fV0-k-eTpQtkKYA69nQ,224
+nmdc_runtime-2.4.0.dist-info/top_level.txt,sha256=b0K1s09L_iHH49ueBKaLrB5-lh6cyrSv9vL6x4Qvyz8,13
+nmdc_runtime-2.4.0.dist-info/RECORD,,

{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
+Generator: setuptools (75.8.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nmdc_runtime-2.2.1.dist-info → nmdc_runtime-2.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

nmdc-runtime 2.2.1__py3-none-any.whl → 2.4.0__py3-none-any.whl

Potentially problematic release.

nmdc-runtime 2.2.1py3-none-any.whl → 2.4.0py3-none-any.whl