PyPI - nmdc-runtime - Versions diffs - 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl - Mend

nmdc-runtime 2.10.0py3-none-any.whl → 2.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (77) hide show

nmdc_runtime/Dockerfile +167 -0
nmdc_runtime/api/analytics.py +22 -2
nmdc_runtime/api/core/idgen.py +36 -6
nmdc_runtime/api/db/mongo.py +0 -12
nmdc_runtime/api/endpoints/find.py +65 -225
nmdc_runtime/api/endpoints/lib/linked_instances.py +180 -0
nmdc_runtime/api/endpoints/nmdcschema.py +65 -144
nmdc_runtime/api/endpoints/objects.py +4 -11
nmdc_runtime/api/endpoints/operations.py +0 -27
nmdc_runtime/api/endpoints/queries.py +22 -0
nmdc_runtime/api/endpoints/sites.py +0 -24
nmdc_runtime/api/endpoints/util.py +57 -35
nmdc_runtime/api/entrypoint.sh +7 -0
nmdc_runtime/api/main.py +84 -60
nmdc_runtime/api/models/util.py +12 -5
nmdc_runtime/api/openapi.py +116 -180
nmdc_runtime/api/swagger_ui/assets/custom-elements.js +522 -0
nmdc_runtime/api/swagger_ui/assets/script.js +247 -0
nmdc_runtime/api/swagger_ui/assets/style.css +155 -0
nmdc_runtime/api/swagger_ui/swagger_ui.py +34 -0
nmdc_runtime/minter/adapters/repository.py +21 -0
nmdc_runtime/minter/domain/model.py +20 -0
nmdc_runtime/site/changesheets/data/OmicsProcessing-to-catted-Biosamples.tsv +1561 -0
nmdc_runtime/site/changesheets/scripts/missing_neon_soils_ecosystem_data.py +311 -0
nmdc_runtime/site/changesheets/scripts/neon_soils_add_ncbi_ids.py +210 -0
nmdc_runtime/site/dagster.yaml +53 -0
nmdc_runtime/site/entrypoint-daemon.sh +26 -0
nmdc_runtime/site/entrypoint-dagit-readonly.sh +26 -0
nmdc_runtime/site/entrypoint-dagit.sh +26 -0
nmdc_runtime/site/export/ncbi_xml.py +632 -11
nmdc_runtime/site/export/ncbi_xml_utils.py +114 -0
nmdc_runtime/site/graphs.py +7 -0
nmdc_runtime/site/ops.py +92 -34
nmdc_runtime/site/repository.py +2 -0
nmdc_runtime/site/resources.py +16 -3
nmdc_runtime/site/translation/submission_portal_translator.py +82 -14
nmdc_runtime/site/workspace.yaml +13 -0
nmdc_runtime/static/NMDC_logo.svg +1073 -0
nmdc_runtime/static/ORCID-iD_icon_vector.svg +4 -0
nmdc_runtime/static/README.md +5 -0
nmdc_runtime/static/favicon.ico +0 -0
nmdc_runtime/util.py +87 -1
nmdc_runtime-2.11.0.dist-info/METADATA +46 -0
{nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/RECORD +47 -57
{nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/WHEEL +1 -2
nmdc_runtime/api/endpoints/ids.py +0 -192
nmdc_runtime/client/__init__.py +0 -0
nmdc_runtime/containers.py +0 -14
nmdc_runtime/core/__init__.py +0 -0
nmdc_runtime/core/db/Database.py +0 -13
nmdc_runtime/core/db/__init__.py +0 -0
nmdc_runtime/core/exceptions/__init__.py +0 -23
nmdc_runtime/core/exceptions/base.py +0 -47
nmdc_runtime/core/exceptions/token.py +0 -13
nmdc_runtime/domain/__init__.py +0 -0
nmdc_runtime/domain/users/__init__.py +0 -0
nmdc_runtime/domain/users/queriesInterface.py +0 -18
nmdc_runtime/domain/users/userSchema.py +0 -37
nmdc_runtime/domain/users/userService.py +0 -14
nmdc_runtime/infrastructure/__init__.py +0 -0
nmdc_runtime/infrastructure/database/__init__.py +0 -0
nmdc_runtime/infrastructure/database/db.py +0 -3
nmdc_runtime/infrastructure/database/models/__init__.py +0 -0
nmdc_runtime/infrastructure/database/models/user.py +0 -1
nmdc_runtime/lib/__init__.py +0 -1
nmdc_runtime/lib/extract_nmdc_data.py +0 -33
nmdc_runtime/lib/load_nmdc_data.py +0 -121
nmdc_runtime/lib/nmdc_dataframes.py +0 -825
nmdc_runtime/lib/nmdc_etl_class.py +0 -396
nmdc_runtime/lib/transform_nmdc_data.py +0 -1117
nmdc_runtime/site/drsobjects/__init__.py +0 -0
nmdc_runtime/site/drsobjects/ingest.py +0 -93
nmdc_runtime/site/drsobjects/registration.py +0 -131
nmdc_runtime-2.10.0.dist-info/METADATA +0 -265
nmdc_runtime-2.10.0.dist-info/top_level.txt +0 -1
{nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/entry_points.txt +0 -0
{nmdc_runtime-2.10.0.dist-info → nmdc_runtime-2.11.0.dist-info}/licenses/LICENSE +0 -0

nmdc_runtime/site/translation/submission_portal_translator.py CHANGED Viewed

@@ -145,6 +145,7 @@ class SubmissionPortalTranslator(Translator):
         # See: https://github.com/microbiomedata/submission-schema/issues/162
         study_category: Optional[str] = None,
         study_pi_image_url: Optional[str] = None,
+        study_id: Optional[str] = None,
         # Additional biosample-level metadata with optional column mapping information not captured
         # by the submission portal currently.
         # See: https://github.com/microbiomedata/submission-schema/issues/162
@@ -165,6 +166,7 @@ class SubmissionPortalTranslator(Translator):
             nmdc.StudyCategoryEnum(study_category) if study_category else None
         )
         self.study_pi_image_url = study_pi_image_url
+        self.study_id = study_id
         self.biosample_extras = group_dicts_by_key(
             BIOSAMPLE_UNIQUE_KEY_SLOT, biosample_extras
@@ -174,6 +176,13 @@ class SubmissionPortalTranslator(Translator):
         )
         self.schema_view: SchemaView = _get_schema_view()
+        self._material_processing_subclass_names = []
+        for class_name in self.schema_view.class_descendants(
+            "MaterialProcessing", reflexive=False
+        ):
+            class_def = self.schema_view.get_class(class_name)
+            if not class_def.abstract:
+                self._material_processing_subclass_names.append(class_name)
     def _get_pi(
         self, metadata_submission: JSON_OBJECT
@@ -542,6 +551,14 @@ class SubmissionPortalTranslator(Translator):
         return data_objects, manifest
+    def _parse_sample_link(self, sample_link: str) -> tuple[str, list[str]] | None:
+        """Parse a sample link in the form of `ProcessingName:SampleName,..."""
+        pattern = r"(" + "|".join(self._material_processing_subclass_names) + r"):(.+)"
+        match = re.match(pattern, sample_link)
+        if not match:
+            return None
+        return match.group(1), split_strip(match.group(2), ",")
     def _translate_study(
         self, metadata_submission: JSON_OBJECT, nmdc_study_id: str
     ) -> nmdc.Study:
@@ -752,11 +769,14 @@ class SubmissionPortalTranslator(Translator):
             "metadata_submission", {}
         )
-        # Generate one Study instance based on the metadata submission
-        nmdc_study_id = self._id_minter("nmdc:Study")[0]
-        database.study_set = [
-            self._translate_study(metadata_submission_data, nmdc_study_id)
-        ]
+        # Generate one Study instance based on the metadata submission, if a study_id wasn't provided
+        if self.study_id:
+            nmdc_study_id = self.study_id
+        else:
+            nmdc_study_id = self._id_minter("nmdc:Study")[0]
+            database.study_set = [
+                self._translate_study(metadata_submission_data, nmdc_study_id)
+            ]
         # Automatically populate the `env_package` field in the sample data based on which
         # environmental data tab the sample data came from.
@@ -788,15 +808,63 @@ class SubmissionPortalTranslator(Translator):
         )
         # Translate the sample data into nmdc:Biosample objects
-        database.biosample_set = [
-            self._translate_biosample(
-                sample_data,
-                nmdc_biosample_id=sample_data_to_nmdc_biosample_ids[sample_data_id],
-                nmdc_study_id=nmdc_study_id,
-            )
-            for sample_data_id, sample_data in sample_data_by_id.items()
-            if sample_data
-        ]
+        database.biosample_set = []
+        for sample_data_id, sample_data in sample_data_by_id.items():
+            # This shouldn't happen, but just in case skip empty sample data
+            if not sample_data:
+                continue
+            # Find the first tab that has a sample_link value and attempt to parse it
+            sample_link = ""
+            for tab in sample_data:
+                if tab.get("sample_link"):
+                    sample_link = tab.get("sample_link")
+                    break
+            parsed_sample_link = self._parse_sample_link(sample_link)
+            # If the sample_link could be parsed according to the [ProcessName]:[InputSample,...]
+            # format, then create a ProcessedSample and MaterialProcessing instance instead of a
+            # Biosample instance. The input samples must be present in the submission for this to
+            # work. An exception is raised if any of the referenced input samples are missing.
+            if parsed_sample_link is not None:
+                processing_type, processing_inputs = parsed_sample_link
+                if not all(
+                    input_id in sample_data_to_nmdc_biosample_ids
+                    for input_id in processing_inputs
+                ):
+                    raise ValueError(
+                        f"Could not find all input samples in sample_link '{sample_link}'"
+                    )
+                processed_sample_id = self._id_minter("nmdc:ProcessedSample")[0]
+                database.processed_sample_set.append(
+                    nmdc.ProcessedSample(
+                        id=processed_sample_id,
+                        type="nmdc:ProcessedSample",
+                        name=sample_data[0].get(BIOSAMPLE_UNIQUE_KEY_SLOT, "").strip(),
+                    )
+                )
+                processing_class = getattr(nmdc, processing_type)
+                material_processing = processing_class(
+                    id=self._id_minter(f"nmdc:{processing_type}")[0],
+                    type=f"nmdc:{processing_type}",
+                    has_input=[
+                        sample_data_to_nmdc_biosample_ids[input_id]
+                        for input_id in processing_inputs
+                    ],
+                    has_output=[processed_sample_id],
+                )
+                database.material_processing_set.append(material_processing)
+            # If there was no sample_link or it doesn't follow the expected format, create a
+            # Biosample instance as normal.
+            else:
+                biosample = self._translate_biosample(
+                    sample_data,
+                    nmdc_biosample_id=sample_data_to_nmdc_biosample_ids[sample_data_id],
+                    nmdc_study_id=nmdc_study_id,
+                )
+                database.biosample_set.append(biosample)
         # This section handles the translation of information in the external sequencing tabs into
         # various NMDC objects.

nmdc_runtime/site/workspace.yaml ADDED Viewed

@@ -0,0 +1,13 @@
+load_from:
+  - python_package:
+      package_name: nmdc_runtime.site.repository
+      attribute: repo
+  - python_package:
+      package_name: nmdc_runtime.site.repository
+      attribute: biosample_submission_ingest
+  - python_package:
+      package_name: nmdc_runtime.site.repository
+      attribute: biosample_export
+  - python_package:
+      package_name: nmdc_runtime.site.repository
+      attribute: database_records_stitching

nmdc-runtime 2.10.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

Potentially problematic release.

nmdc-runtime 2.10.0py3-none-any.whl → 2.11.0py3-none-any.whl