PyPI - nmdc-runtime - Versions diffs - 1.9.0__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend - Supply Chain Defender

nmdc-runtime 1.9.0py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (19) hide show

nmdc_runtime/site/translation/gold_translator.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import collections
+import csv
 import re
 from typing import List, Tuple, Union
 from nmdc_schema import nmdc
+import pandas as pd
 from nmdc_runtime.site.translation.translator import JSON_OBJECT, Translator
@@ -10,18 +12,22 @@ class GoldStudyTranslator(Translator):
     def __init__(
         self,
         study: JSON_OBJECT = {},
+        study_type: str = "research_study",
         biosamples: List[JSON_OBJECT] = [],
         projects: List[JSON_OBJECT] = [],
         analysis_projects: List[JSON_OBJECT] = [],
+        gold_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
         *args,
         **kwargs,
     ) -> None:
         super().__init__(*args, **kwargs)
         self.study = study
+        self.study_type = nmdc.StudyCategoryEnum(study_type)
         self.biosamples = biosamples
         self.projects = projects
         self.analysis_projects = analysis_projects
+        self.gold_nmdc_instrument_map_df = gold_nmdc_instrument_map_df
         self._projects_by_id = self._index_by_id(self.projects, "projectGoldId")
         self._analysis_projects_by_id = self._index_by_id(
@@ -69,6 +75,7 @@ class GoldStudyTranslator(Translator):
             has_raw_value=pi_dict.get("name"),
             name=pi_dict.get("name"),
             email=pi_dict.get("email"),
+            type="nmdc:PersonValue",
         )
     def _get_mod_date(self, gold_entity: JSON_OBJECT) -> Union[str, None]:
@@ -108,22 +115,58 @@ class GoldStudyTranslator(Translator):
     def _get_samp_taxon_id(
         self, gold_biosample: JSON_OBJECT
-    ) -> Union[nmdc.TextValue, None]:
-        """Get a TextValue representing the NCBI taxon for a GOLD biosample
+    ) -> Union[nmdc.ControlledIdentifiedTermValue, None]:
+        """Get a ControlledIdentifiedTermValue representing the NCBI taxon
+        for a GOLD biosample
         This method gets the `ncbiTaxName` and `ncbiTaxId` from a GOLD biosample object.
-        If both are not `None`, it constructs a TextValue of the format
+        If both are not `None`, it constructs a ControlledIdentifiedTermValue of the format
         `{ncbiTaxName} [NCBITaxon:{ncbiTaxId}]`. Otherwise, it returns `None`
         :param gold_biosample: GOLD biosample object
-        :return: TextValue object
+        :return: ControlledIdentifiedTermValue object
         """
         ncbi_tax_name = gold_biosample.get("ncbiTaxName")
         ncbi_tax_id = gold_biosample.get("ncbiTaxId")
         if ncbi_tax_name is None or ncbi_tax_id is None:
             return None
-        return nmdc.TextValue(f"{ncbi_tax_name} [NCBITaxon:{ncbi_tax_id}]")
+        raw_value = f"{ncbi_tax_name} [NCBITaxon:{ncbi_tax_id}]"
+        return nmdc.ControlledIdentifiedTermValue(
+            has_raw_value=raw_value,
+            term=nmdc.OntologyClass(
+                id=f"NCBITaxon:{ncbi_tax_id}",
+                name=ncbi_tax_name,
+                type="nmdc:OntologyClass",
+            ),
+            type="nmdc:ControlledIdentifiedTermValue",
+        )
+    def _get_host_taxid(
+        self, gold_biosample: JSON_OBJECT
+    ) -> Union[nmdc.ControlledIdentifiedTermValue, None]:
+        """Get a ControlledIdentifiedTermValue representing the NCBI host taxon id
+        for a GOLD biosample
+        This method gets the `hostNcbiTaxid` from a GOLD biosample object.
+        It constructs a ControlledIdentifiedTermValue of the format
+        `[NCBITaxon:{hostNcbiTaxid}]`. Otherwise, it returns `None`
+        :param gold_biosample: GOLD biosample object
+        :return: ControlledIdentifiedTermValue object
+        """
+        host_taxid = gold_biosample.get("hostNcbiTaxid")
+        if host_taxid is None:
+            return None
+        return nmdc.ControlledIdentifiedTermValue(
+            has_raw_value=f"NCBITaxon:{host_taxid}",
+            term=nmdc.OntologyClass(
+                id=f"NCBITaxon:{host_taxid}",
+                type="nmdc:OntologyClass",
+            ),
+            type="nmdc:ControlledIdentifiedTermValue",
+        )
     def _get_samp_name(self, gold_biosample: JSON_OBJECT) -> Union[str, None]:
         """Get a sample name for a GOLD biosample object
@@ -183,7 +226,9 @@ class GoldStudyTranslator(Translator):
         date_collected = gold_biosample.get("dateCollected")
         if date_collected is None:
             return None
-        return nmdc.TimestampValue(has_raw_value=date_collected)
+        return nmdc.TimestampValue(
+            has_raw_value=date_collected, type="nmdc:TimestampValue"
+        )
     def _get_quantity_value(
         self,
@@ -215,12 +260,14 @@ class GoldStudyTranslator(Translator):
                     has_raw_value=minimum_numeric_value,
                     has_numeric_value=nmdc.Double(minimum_numeric_value),
                     has_unit=unit,
+                    type="nmdc:QuantityValue",
                 )
             else:
                 return nmdc.QuantityValue(
                     has_minimum_numeric_value=nmdc.Double(minimum_numeric_value),
                     has_maximum_numeric_value=nmdc.Double(maximum_numeric_value),
                     has_unit=unit,
+                    type="nmdc:QuantityValue",
                 )
         field_value = gold_entity.get(gold_field)
@@ -231,6 +278,7 @@ class GoldStudyTranslator(Translator):
             has_raw_value=field_value,
             has_numeric_value=nmdc.Double(field_value),
             has_unit=unit,
+            type="nmdc:QuantityValue",
         )
     def _get_text_value(
@@ -249,7 +297,7 @@ class GoldStudyTranslator(Translator):
         field_value = gold_entity.get(gold_field)
         if field_value is None:
             return None
-        return nmdc.TextValue(has_raw_value=field_value)
+        return nmdc.TextValue(has_raw_value=field_value, type="nmdc:TextValue")
     def _get_controlled_term_value(
         self, gold_entity: JSON_OBJECT, gold_field: str
@@ -267,7 +315,9 @@ class GoldStudyTranslator(Translator):
         field_value = gold_entity.get(gold_field)
         if field_value is None:
             return None
-        return nmdc.ControlledTermValue(has_raw_value=field_value)
+        return nmdc.ControlledTermValue(
+            has_raw_value=field_value, type="nmdc:ControlledTermValue"
+        )
     def _get_env_term_value(
         self, gold_biosample: JSON_OBJECT, gold_field: str
@@ -277,8 +327,8 @@ class GoldStudyTranslator(Translator):
         In GOLD entities ENVO terms are represented as a nested object with `id` and `label`
         fields. This method extracts this type of nested object by the given field name, and
         returns it as an `nmdc:ControlledIdentifiedTermValue` object. The `id` in the original
-        GOLD object be reformatted by replacing `_` with `:` (e.g. `ENVO_00005801` to
-        `ENVO:00005801`). If the value of the given field is `None` or if does not contain
+        GOLD object should be reformatted by replacing `_` with `:` (e.g. `ENVO_00005801` to
+        `ENVO:00005801`). If the value of the given field is `None` or if it does not contain
         a nested object with an `id` field, `None` is returned.
         :param gold_biosample: GOLD biosample object
@@ -292,8 +342,10 @@ class GoldStudyTranslator(Translator):
             term=nmdc.OntologyClass(
                 id=env_field["id"].replace("_", ":"),
                 name=env_field.get("label"),
+                type="nmdc:OntologyClass",
             ),
             has_raw_value=env_field["id"],
+            type="nmdc:ControlledIdentifiedTermValue",
         )
     def _get_lat_lon(
@@ -316,22 +368,40 @@ class GoldStudyTranslator(Translator):
             has_raw_value=f"{latitude} {longitude}",
             latitude=nmdc.DecimalDegree(latitude),
             longitude=nmdc.DecimalDegree(longitude),
+            type="nmdc:GeolocationValue",
         )
-    def _get_instrument_name(self, gold_project: JSON_OBJECT) -> Union[str, None]:
-        """Get instrument name used in a GOLD project
+    def _get_instrument(self, gold_project: JSON_OBJECT) -> Union[str, None]:
+        """Get instrument id referenced in instrument_set collection in Mongo.
+        Note: The instrument id is not retrieved by making a call to the database,
+        but rather parsed out from a TSV file in the nmdc-schema repo stored at
+        self.gold_instrument_set_mapping_file_path.
-        This method gets the `seqMethod` field from a GOLD project object. If
-        that value is not `None` it should be a list and the first element of that
-        list is returned. If the value of the field is `None`, `None` is returned.
+        This method gets the seqMethod field from a GOLD project object. If
+        that value is not None and is in the self.gold_instrument_set_mapping_file_path
+        file's GOLD SeqMethod column, the corresponding instrument id from
+        NMDC instrument_set id column is returned. If the value of the field
+        is None, None is returned.
         :param gold_project: GOLD project object
-        :return: Instrument name
+        :return: id corresponding to an Instrument from instrument_set collection
         """
         seq_method = gold_project.get("seqMethod")
         if not seq_method:
             return None
-        return seq_method[0]
+        seq_method = seq_method[0].strip()
+        df = self.gold_nmdc_instrument_map_df
+        matching_row = df[df["GOLD SeqMethod"] == seq_method]
+        if not matching_row.empty:
+            instrument_id = matching_row["NMDC instrument_set id"].values[0]
+            return instrument_id
+        raise ValueError(
+            f"seqMethod '{seq_method}' could not be found in the GOLD-NMDC instrument mapping TSV file."
+        )
     def _get_processing_institution(
         self, gold_project: JSON_OBJECT
@@ -407,6 +477,7 @@ class GoldStudyTranslator(Translator):
             principal_investigator=self._get_pi(gold_study),
             title=gold_study.get("studyName"),
             type="nmdc:Study",
+            study_category=self.study_type,
         )
     def _translate_biosample(
@@ -454,7 +525,7 @@ class GoldStudyTranslator(Translator):
             gold_biosample_identifiers=self._get_curie("gold", gold_biosample_id),
             habitat=gold_biosample.get("habitat"),
             host_name=gold_biosample.get("hostName"),
-            host_taxid=self._get_text_value(gold_biosample, "hostNcbiTaxid"),
+            host_taxid=self._get_host_taxid(gold_biosample),
             id=nmdc_biosample_id,
             img_identifiers=self._get_img_identifiers(gold_biosample_id),
             insdc_biosample_identifiers=self._get_insdc_biosample_identifiers(
@@ -466,7 +537,6 @@ class GoldStudyTranslator(Translator):
             name=gold_biosample.get("biosampleName"),
             ncbi_taxonomy_name=gold_biosample.get("ncbiTaxName"),
             nitrite=self._get_quantity_value(gold_biosample, "nitrateConcentration"),
-            part_of=nmdc_study_id,
             ph=gold_biosample.get("ph"),
             pressure=self._get_quantity_value(gold_biosample, "pressure"),
             samp_name=self._get_samp_name(gold_biosample),
@@ -482,47 +552,46 @@ class GoldStudyTranslator(Translator):
                 gold_biosample, "sampleCollectionTemperature"
             ),
             type="nmdc:Biosample",
+            associated_studies=[nmdc_study_id],
         )
-    def _translate_omics_processing(
+    def _translate_nucleotide_sequencing(
         self,
         gold_project: JSON_OBJECT,
-        nmdc_omics_processing_id: str,
+        nmdc_nucleotide_sequencing_id: str,
         nmdc_biosample_id: str,
         nmdc_study_id: str,
-    ) -> nmdc.OmicsProcessing:
-        """Translate a GOLD project object into an `nmdc:OmicsProcessing` object.
+    ):
+        """Translate a GOLD project object into an `nmdc:NucleotideSequencing` object.
-        This method translates a GOLD project object into an equivalent `nmdc:OmicsProcessing`
+        This method translates a GOLD project object into an equivalent `nmdc:NucleotideSequencing`
         object. Any minted NMDC IDs must be passed to this method. Internally, each
-        slot of the `nmdc:OmicsProcessing` is either directly pulled from the GOLD object or
+        slot of the `nmdc:NucleotideSequencing` is either directly pulled from the GOLD object or
         one of the `_get_*` methods is used.
         :param gold_project: GOLD project object
-        :param nmdc_omics_processing_id: Minted nmdc:OmicsProcessing identifier for the translated object
+        :param nmdc_omics_processing_id: Minted nmdc:NucleotideSequencing identifier for the translated object
         :param nmdc_biosample_id: Minted nmdc:Biosample identifier for the related Biosample
         :param nmdc_study_id: Minted nmdc:Study identifier for the related Study
-        :return: nmdc:OmicsProcessing object
+        :return: nmdc:NucleotideSequencing object
         """
         gold_project_id = gold_project["projectGoldId"]
-        return nmdc.OmicsProcessing(
-            id=nmdc_omics_processing_id,
+        return nmdc.NucleotideSequencing(
+            id=nmdc_nucleotide_sequencing_id,
             name=gold_project.get("projectName"),
             gold_sequencing_project_identifiers=self._get_curie(
                 "gold", gold_project_id
             ),
             ncbi_project_name=gold_project.get("projectName"),
-            type="nmdc:OmicsProcessing",
+            type="nmdc:NucleotideSequencing",
             has_input=nmdc_biosample_id,
-            part_of=nmdc_study_id,
             add_date=gold_project.get("addDate"),
             mod_date=self._get_mod_date(gold_project),
             principal_investigator=self._get_pi(gold_project),
-            omics_type=self._get_controlled_term_value(
-                gold_project, "sequencingStrategy"
-            ),
-            instrument_name=self._get_instrument_name(gold_project),
             processing_institution=self._get_processing_institution(gold_project),
+            instrument_used=self._get_instrument(gold_project),
+            analyte_category="metagenome",
+            associated_studies=[nmdc_study_id],
         )
     def get_database(self) -> nmdc.Database:
@@ -563,11 +632,11 @@ class GoldStudyTranslator(Translator):
         }
         gold_project_ids = [project["projectGoldId"] for project in self.projects]
-        nmdc_omics_processing_ids = self._id_minter(
-            "nmdc:OmicsProcessing", len(gold_project_ids)
+        nmdc_nucleotide_sequencing_ids = self._id_minter(
+            "nmdc:NucleotideSequencing", len(gold_project_ids)
         )
-        gold_project_to_nmdc_omics_processing_ids = dict(
-            zip(gold_project_ids, nmdc_omics_processing_ids)
+        gold_project_to_nmdc_nucleotide_sequencing_ids = dict(
+            zip(gold_project_ids, nmdc_nucleotide_sequencing_ids)
         )
         database.study_set = [self._translate_study(self.study, nmdc_study_id)]
@@ -585,13 +654,13 @@ class GoldStudyTranslator(Translator):
             for biosample in self.biosamples
         ]
         database.field_research_site_set = [
-            nmdc.FieldResearchSite(id=id, name=name)
+            nmdc.FieldResearchSite(id=id, name=name, type="nmdc:FieldResearchSite")
             for name, id in gold_name_to_nmdc_field_site_ids.items()
         ]
-        database.omics_processing_set = [
-            self._translate_omics_processing(
+        database.data_generation_set = [
+            self._translate_nucleotide_sequencing(
                 project,
-                nmdc_omics_processing_id=gold_project_to_nmdc_omics_processing_ids[
+                nmdc_nucleotide_sequencing_id=gold_project_to_nmdc_nucleotide_sequencing_ids[
                     project["projectGoldId"]
                 ],
                 nmdc_biosample_id=gold_to_nmdc_biosample_ids[

nmdc_runtime/site/translation/neon_benthic_translator.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import re
 import sqlite3
+from typing import Union
 import pandas as pd
 import requests_cache
@@ -47,6 +48,7 @@ class NeonBenthicDataTranslator(Translator):
         site_code_mapping: dict,
         neon_envo_mappings_file: pd.DataFrame,
         neon_raw_data_file_mappings_file: pd.DataFrame,
+        neon_nmdc_instrument_map_df: pd.DataFrame = pd.DataFrame(),
         *args,
         **kwargs,
     ) -> None:
@@ -92,13 +94,13 @@ class NeonBenthicDataTranslator(Translator):
         )
         self.site_code_mapping = site_code_mapping
+        self.neon_nmdc_instrument_map_df = neon_nmdc_instrument_map_df
     def _translate_biosample(
         self, neon_id: str, nmdc_id: str, biosample_row: pd.DataFrame
     ) -> nmdc.Biosample:
         return nmdc.Biosample(
             id=nmdc_id,
-            part_of="nmdc:sty-11-pzmd0x14",
             env_broad_scale=_create_controlled_identified_term_value(
                 BENTHIC_BROAD_SCALE_MAPPINGS.get(
                     biosample_row["aquaticSiteType"].values[0]
@@ -146,8 +148,10 @@ class NeonBenthicDataTranslator(Translator):
             depth=nmdc.QuantityValue(
                 has_minimum_numeric_value=nmdc.Float("0"),
                 has_maximum_numeric_value=nmdc.Float("1"),
-                has_unit="meters",
+                has_unit="m",
+                type="nmdc:QuantityValue",
             ),
+            associated_studies=["nmdc:sty-11-pzmd0x14"],
         )
     def _translate_extraction_process(
@@ -185,10 +189,9 @@ class NeonBenthicDataTranslator(Translator):
             input_mass=_create_quantity_value(
                 _get_value_or_none(extraction_row, "sampleMass"), "g"
             ),
-            quality_control_report=nmdc.QualityControlReport(
-                status=_get_value_or_none(extraction_row, "qaqcStatus")
-            ),
+            qc_status=_get_value_or_none(extraction_row, "qaqcStatus"),
             processing_institution=processing_institution,
+            type="nmdc:Extraction",
         )
     def _translate_library_preparation(
@@ -201,13 +204,13 @@ class NeonBenthicDataTranslator(Translator):
         """
         Create LibraryPreparation process object. The input to LibraryPreparation process
         is the output ProcessedSample from an Extraction process. The output of LibraryPreparation
-        process is fed as input to an OmicsProcessing object.
+        process is fed as input to an NucleotideSequencing object.
         :param library_preparation_id: Minted id for LibraryPreparation process.
         :param library_preparation_input: Input to LibraryPreparation process is output from
         Extraction process.
         :param processed_sample_id: Minted ProcessedSample id which is output of LibraryPreparation
-        is also input to OmicsProcessing.
+        is also input to NucleotideSequencing.
         :param library_preparation_row: Metadata required to populate LibraryPreparation.
         :return: Object that using LibraryPreparation process model.
         """
@@ -226,31 +229,47 @@ class NeonBenthicDataTranslator(Translator):
             start_date=_get_value_or_none(library_preparation_row, "collectDate"),
             end_date=_get_value_or_none(library_preparation_row, "processedDate"),
             processing_institution=processing_institution,
+            type="nmdc:LibraryPreparation",
         )
-    def _translate_omics_processing(
+    def _get_instrument_id(self, instrument_model: Union[str | None]) -> str:
+        if not instrument_model:
+            raise ValueError(
+                f"instrument_model '{instrument_model}' could not be found in the NEON-NMDC instrument mapping TSV file."
+            )
+        df = self.neon_nmdc_instrument_map_df
+        matching_row = df[
+            df["NEON sequencingMethod"].str.contains(instrument_model, case=False)
+        ]
+        if not matching_row.empty:
+            nmdc_instrument_id = matching_row["NMDC instrument_set id"].values[0]
+            return nmdc_instrument_id
+    def _translate_nucleotide_sequencing(
         self,
-        omics_processing_id: str,
+        nucleotide_sequencing_id: str,
         processed_sample_id: str,
         raw_data_file_data: str,
-        omics_processing_row: pd.DataFrame,
-    ) -> nmdc.OmicsProcessing:
-        """Create nmdc OmicsProcessing object. This class typically models the run of a
-        Bioinformatics workflow on sequence data from a biosample. The input to an OmicsProcessing
-        process is the output from a LibraryPreparation process, and the output of OmicsProcessing
+        nucleotide_sequencing_row: pd.DataFrame,
+    ):
+        """Create nmdc NucleotideSequencing object. This class typically models the run of a
+        Bioinformatics workflow on sequence data from a biosample. The input to an NucleotideSequencing
+        process is the output from a LibraryPreparation process, and the output of NucleotideSequencing
         is a DataObject which has the FASTQ sequence file URLs embedded in them.
-        :param omics_processing_id: Minted id for an OmicsProcessing process.
+        :param nucleotide_sequencing_id: Minted id for an NucleotideSequencing process.
         :param processed_sample_id: ProcessedSample that is the output of LibraryPreparation.
         :param raw_data_file_data: R1/R2 DataObjects which have links to workflow processed output
         files embedded in them.
-        :param omics_processing_row: DataFrame with metadata for an OmicsProcessing workflow
+        :param nucleotide_sequencing_row: DataFrame with metadata for an NucleotideSequencing workflow
         process/run.
-        :return: OmicsProcessing object that models a Bioinformatics workflow process/run.
+        :return: NucleotideSequencing object that models a Bioinformatics workflow process/run.
         """
         processing_institution = None
         sequencing_facility = _get_value_or_none(
-            omics_processing_row, "sequencingFacilityID"
+            nucleotide_sequencing_row, "sequencingFacilityID"
         )
         if sequencing_facility is not None:
             if re.search("Battelle", sequencing_facility, re.IGNORECASE):
@@ -258,19 +277,21 @@ class NeonBenthicDataTranslator(Translator):
             elif re.search("Argonne", sequencing_facility, re.IGNORECASE):
                 processing_institution = "ANL"
-        return nmdc.OmicsProcessing(
-            id=omics_processing_id,
+        return nmdc.NucleotideSequencing(
+            id=nucleotide_sequencing_id,
             has_input=processed_sample_id,
             has_output=raw_data_file_data,
             processing_institution=processing_institution,
-            ncbi_project_name=_get_value_or_none(omics_processing_row, "ncbiProjectID"),
-            omics_type=_create_controlled_term_value(
-                omics_processing_row["investigation_type"].values[0]
+            ncbi_project_name=_get_value_or_none(
+                nucleotide_sequencing_row, "ncbiProjectID"
+            ),
+            instrument_used=self._get_instrument_id(
+                _get_value_or_none(nucleotide_sequencing_row, "instrument_model")
             ),
-            instrument_name=f"{_get_value_or_none(omics_processing_row, 'sequencingMethod')} {_get_value_or_none(omics_processing_row, 'instrument_model')}",
-            part_of="nmdc:sty-11-34xj1150",
-            name=f"Terrestrial soil microbial communities - {_get_value_or_none(omics_processing_row, 'dnaSampleID')}",
-            type="nmdc:OmicsProcessing",
+            name=f"Benthic microbial communities - {_get_value_or_none(nucleotide_sequencing_row, 'dnaSampleID')}",
+            type="nmdc:NucleotideSequencing",
+            associated_studies=["nmdc:sty-11-pzmd0x14"],
+            analyte_category="metagenome",
         )
     def _translate_processed_sample(
@@ -287,12 +308,14 @@ class NeonBenthicDataTranslator(Translator):
         :param sample_id: Value from `genomicsSampleID` or `dnaSampleID` column.
         :return: ProcessedSample objects to be stored in `processed_sample_set`.
         """
-        return nmdc.ProcessedSample(id=processed_sample_id, name=sample_id)
+        return nmdc.ProcessedSample(
+            id=processed_sample_id, name=sample_id, type="nmdc:ProcessedSample"
+        )
     def _translate_data_object(
         self, do_id: str, url: str, do_type: str, checksum: str
     ) -> nmdc.DataObject:
-        """Create nmdc DataObject which is the output of an OmicsProcessing process. This
+        """Create nmdc DataObject which is the output of a NucleotideSequencing process. This
         object mainly contains information about the sequencing file that was generated as
         the result of running a Bioinformatics workflow on a certain ProcessedSample, which
         is the result of a LibraryPreparation process.
@@ -419,7 +442,9 @@ class NeonBenthicDataTranslator(Translator):
         )
         neon_omprc_ids = benthic_samples["sampleID"]
-        nmdc_omprc_ids = self._id_minter("nmdc:OmicsProcessing", len(neon_omprc_ids))
+        nmdc_omprc_ids = self._id_minter(
+            "nmdc:NucleotideSequencing", len(neon_omprc_ids)
+        )
         neon_to_nmdc_omprc_ids = dict(zip(neon_omprc_ids, nmdc_omprc_ids))
         neon_raw_data_file_mappings_df = self.neon_raw_data_file_mappings_df
@@ -445,7 +470,7 @@ class NeonBenthicDataTranslator(Translator):
             processed_sample_id = neon_to_nmdc_extraction_processed_ids.get(neon_id)
             if extraction_input is not None and processed_sample_id is not None:
-                database.extraction_set.append(
+                database.material_processing_set.append(
                     self._translate_extraction_process(
                         nmdc_id,
                         extraction_input,
@@ -489,7 +514,7 @@ class NeonBenthicDataTranslator(Translator):
             processed_sample_id = neon_to_nmdc_lib_prep_processed_ids.get(neon_id)
             if lib_prep_input is not None and processed_sample_id is not None:
-                database.library_preparation_set.append(
+                database.material_processing_set.append(
                     self._translate_library_preparation(
                         nmdc_id,
                         lib_prep_input,
@@ -536,8 +561,8 @@ class NeonBenthicDataTranslator(Translator):
                             )
                         )
-                    database.omics_processing_set.append(
-                        self._translate_omics_processing(
+                    database.data_generation_set.append(
+                        self._translate_nucleotide_sequencing(
                             neon_to_nmdc_omprc_ids.get(neon_id),
                             processed_sample_id,
                             has_output_do_ids,