PyPI - nmdc-runtime - Versions diffs - 2.9.0__py3-none-any.whl → 2.10.0__py3-none-any.whl - Mend

nmdc-runtime 2.9.0py3-none-any.whl → 2.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nmdc-runtime might be problematic. Click here for more details.

Files changed (98) hide show

nmdc_runtime/api/__init__.py +0 -0
nmdc_runtime/api/analytics.py +70 -0
nmdc_runtime/api/boot/__init__.py +0 -0
nmdc_runtime/api/boot/capabilities.py +9 -0
nmdc_runtime/api/boot/object_types.py +126 -0
nmdc_runtime/api/boot/triggers.py +84 -0
nmdc_runtime/api/boot/workflows.py +116 -0
nmdc_runtime/api/core/__init__.py +0 -0
nmdc_runtime/api/core/auth.py +208 -0
nmdc_runtime/api/core/idgen.py +170 -0
nmdc_runtime/api/core/metadata.py +788 -0
nmdc_runtime/api/core/util.py +109 -0
nmdc_runtime/api/db/__init__.py +0 -0
nmdc_runtime/api/db/mongo.py +447 -0
nmdc_runtime/api/db/s3.py +37 -0
nmdc_runtime/api/endpoints/__init__.py +0 -0
nmdc_runtime/api/endpoints/capabilities.py +25 -0
nmdc_runtime/api/endpoints/find.py +794 -0
nmdc_runtime/api/endpoints/ids.py +192 -0
nmdc_runtime/api/endpoints/jobs.py +143 -0
nmdc_runtime/api/endpoints/lib/__init__.py +0 -0
nmdc_runtime/api/endpoints/lib/helpers.py +274 -0
nmdc_runtime/api/endpoints/lib/path_segments.py +165 -0
nmdc_runtime/api/endpoints/metadata.py +260 -0
nmdc_runtime/api/endpoints/nmdcschema.py +581 -0
nmdc_runtime/api/endpoints/object_types.py +38 -0
nmdc_runtime/api/endpoints/objects.py +277 -0
nmdc_runtime/api/endpoints/operations.py +105 -0
nmdc_runtime/api/endpoints/queries.py +679 -0
nmdc_runtime/api/endpoints/runs.py +98 -0
nmdc_runtime/api/endpoints/search.py +38 -0
nmdc_runtime/api/endpoints/sites.py +229 -0
nmdc_runtime/api/endpoints/triggers.py +25 -0
nmdc_runtime/api/endpoints/users.py +214 -0
nmdc_runtime/api/endpoints/util.py +774 -0
nmdc_runtime/api/endpoints/workflows.py +353 -0
nmdc_runtime/api/main.py +401 -0
nmdc_runtime/api/middleware.py +43 -0
nmdc_runtime/api/models/__init__.py +0 -0
nmdc_runtime/api/models/capability.py +14 -0
nmdc_runtime/api/models/id.py +92 -0
nmdc_runtime/api/models/job.py +37 -0
nmdc_runtime/api/models/lib/__init__.py +0 -0
nmdc_runtime/api/models/lib/helpers.py +78 -0
nmdc_runtime/api/models/metadata.py +11 -0
nmdc_runtime/api/models/minter.py +0 -0
nmdc_runtime/api/models/nmdc_schema.py +146 -0
nmdc_runtime/api/models/object.py +180 -0
nmdc_runtime/api/models/object_type.py +20 -0
nmdc_runtime/api/models/operation.py +66 -0
nmdc_runtime/api/models/query.py +246 -0
nmdc_runtime/api/models/query_continuation.py +111 -0
nmdc_runtime/api/models/run.py +161 -0
nmdc_runtime/api/models/site.py +87 -0
nmdc_runtime/api/models/trigger.py +13 -0
nmdc_runtime/api/models/user.py +140 -0
nmdc_runtime/api/models/util.py +253 -0
nmdc_runtime/api/models/workflow.py +15 -0
nmdc_runtime/api/openapi.py +242 -0
nmdc_runtime/config.py +7 -8
nmdc_runtime/core/db/Database.py +1 -3
nmdc_runtime/infrastructure/database/models/user.py +0 -9
nmdc_runtime/lib/extract_nmdc_data.py +0 -8
nmdc_runtime/lib/nmdc_dataframes.py +3 -7
nmdc_runtime/lib/nmdc_etl_class.py +1 -7
nmdc_runtime/minter/adapters/repository.py +1 -2
nmdc_runtime/minter/config.py +2 -0
nmdc_runtime/minter/domain/model.py +35 -1
nmdc_runtime/minter/entrypoints/fastapi_app.py +1 -1
nmdc_runtime/mongo_util.py +1 -2
nmdc_runtime/site/backup/nmdcdb_mongodump.py +1 -1
nmdc_runtime/site/backup/nmdcdb_mongoexport.py +1 -3
nmdc_runtime/site/export/ncbi_xml.py +1 -2
nmdc_runtime/site/export/ncbi_xml_utils.py +1 -1
nmdc_runtime/site/graphs.py +1 -22
nmdc_runtime/site/ops.py +60 -152
nmdc_runtime/site/repository.py +0 -112
nmdc_runtime/site/translation/gold_translator.py +4 -12
nmdc_runtime/site/translation/neon_benthic_translator.py +0 -1
nmdc_runtime/site/translation/neon_soil_translator.py +4 -5
nmdc_runtime/site/translation/neon_surface_water_translator.py +0 -2
nmdc_runtime/site/translation/submission_portal_translator.py +2 -54
nmdc_runtime/site/translation/translator.py +63 -1
nmdc_runtime/site/util.py +8 -3
nmdc_runtime/site/validation/util.py +10 -5
nmdc_runtime/util.py +3 -47
{nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/METADATA +57 -6
nmdc_runtime-2.10.0.dist-info/RECORD +138 -0
nmdc_runtime/site/translation/emsl.py +0 -43
nmdc_runtime/site/translation/gold.py +0 -53
nmdc_runtime/site/translation/jgi.py +0 -32
nmdc_runtime/site/translation/util.py +0 -132
nmdc_runtime/site/validation/jgi.py +0 -43
nmdc_runtime-2.9.0.dist-info/RECORD +0 -84
{nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/WHEEL +0 -0
{nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/entry_points.txt +0 -0
{nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/licenses/LICENSE +0 -0
{nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/top_level.txt +0 -0

nmdc_runtime/site/repository.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import json
-from typing import Any
 from dagster import (
     repository,
@@ -14,7 +13,6 @@ from dagster import (
     DagsterRunStatus,
     RunStatusSensorContext,
     DefaultSensorStatus,
-    in_process_executor,
 )
 from starlette import status
 from toolz import merge, get_in
@@ -30,8 +28,6 @@ from nmdc_runtime.site.graphs import (
     translate_metadata_submission_to_nmdc_schema_database,
     ingest_metadata_submission,
     gold_study_to_database,
-    gold_translation,
-    gold_translation_curation,
     create_objects_from_site_object_puts,
     housekeeping,
     ensure_jobs,
@@ -62,9 +58,6 @@ from nmdc_runtime.site.resources import (
 from nmdc_runtime.site.resources import (
     get_runtime_api_site_client,
 )
-from nmdc_runtime.site.translation.emsl import emsl_job, test_emsl_job
-from nmdc_runtime.site.translation.gold import gold_job, test_gold_job
-from nmdc_runtime.site.translation.jgi import jgi_job, test_jgi_job
 from nmdc_runtime.util import freeze
 from nmdc_runtime.util import unfreeze
@@ -249,82 +242,6 @@ def process_workflow_job_triggers(_context):
         yield SkipReason("No new jobs required")
-@asset_sensor(
-    asset_key=AssetKey(["object", "nmdc_database.json.zip"]),
-    job=ensure_jobs.to_job(name="ensure_gold_translation", **preset_normal),
-)
-def ensure_gold_translation_job(_context, asset_event):
-    mdb = get_mongo(run_config_frozen__normal_env).db
-    gold_etl_latest = mdb.objects.find_one(
-        {"name": "nmdc_database.json.zip"}, sort=[("created_time", -1)]
-    )
-    sensed_object_id = asset_materialization_metadata(asset_event, "object_id").text
-    if gold_etl_latest is None:
-        yield SkipReason("can't find sensed asset object_id in database")
-        return
-    elif gold_etl_latest["id"] != sensed_object_id:
-        yield SkipReason("later object than sensed materialization")
-        return
-    run_config = merge(
-        run_config_frozen__normal_env,
-        {
-            "solids": {
-                "construct_jobs": {
-                    "config": {
-                        "base_jobs": [
-                            {
-                                "workflow": {"id": "gold-translation-1.0.0"},
-                                "config": {"object_id": gold_etl_latest["id"]},
-                            }
-                        ]
-                    }
-                }
-            }
-        },
-    )
-    yield RunRequest(run_key=sensed_object_id, run_config=unfreeze(run_config))
-@asset_sensor(
-    asset_key=AssetKey(["job", "gold-translation-1.0.0"]),
-    job=gold_translation_curation.to_job(**preset_normal),
-)
-def claim_and_run_gold_translation_curation(_context, asset_event):
-    client = get_runtime_api_site_client(run_config_frozen__normal_env)
-    mdb = get_mongo(run_config_frozen__normal_env).db
-    object_id_latest = asset_materialization_metadata(
-        asset_event, "object_id_latest"
-    ).text
-    job = mdb.jobs.find_one(
-        {
-            "workflow.id": "gold-translation-1.0.0",
-            "config.object_id_latest": object_id_latest,
-        }
-    )
-    if job is not None:
-        rv = client.claim_job(job["id"])
-        if rv.status_code == status.HTTP_200_OK:
-            operation = rv.json()
-            run_config = merge(
-                run_config_frozen__normal_env,
-                {
-                    "ops": {
-                        "get_operation": {
-                            "config": {
-                                "operation_id": operation["id"],
-                            }
-                        }
-                    }
-                },
-            )
-            yield RunRequest(run_key=operation["id"], run_config=unfreeze(run_config))
-        else:
-            yield SkipReason("Job found, but already claimed by this site")
-    else:
-        yield SkipReason("No job found")
 @sensor(
     job=apply_metadata_in.to_job(name="apply_metadata_in_sensed", **preset_normal),
     default_status=DefaultSensorStatus.RUNNING,
@@ -502,7 +419,6 @@ def on_run_fail(context: RunStatusSensorContext):
 @repository
 def repo():
     graph_jobs = [
-        gold_translation.to_job(**preset_normal),
         hello_graph.to_job(name="hello_job"),
         ensure_jobs.to_job(**preset_normal),
         apply_metadata_in.to_job(**preset_normal),
@@ -518,8 +434,6 @@ def repo():
     ]
     sensors = [
         done_object_put_ops,
-        ensure_gold_translation_job,
-        claim_and_run_gold_translation_curation,
         process_workflow_job_triggers,
         claim_and_run_apply_changesheet_jobs,
         claim_and_run_metadata_in_jobs,
@@ -529,20 +443,6 @@ def repo():
     return graph_jobs + schedules + sensors
-@repository
-def translation():
-    graph_jobs = [jgi_job, gold_job, emsl_job]
-    return graph_jobs
-@repository
-def test_translation():
-    graph_jobs = [test_jgi_job, test_gold_job, test_emsl_job]
-    return graph_jobs
 @repository
 def biosample_submission_ingest():
     normal_resources = run_config_frozen__normal_env["resources"]
@@ -1110,15 +1010,3 @@ def database_records_stitching():
             },
         ),
     ]
-# @repository
-# def validation():
-#     graph_jobs = [validate_jgi_job, validate_gold_job, validate_emsl_job]
-#     return graph_jobs
-#
-#
-# @repository
-# def test_validation():
-#     graph_jobs = [test_validate_jgi_job, test_validate_gold_job, test_validate_emsl_job]
-#     return graph_jobs

nmdc_runtime/site/translation/gold_translator.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import collections
-import csv
 import re
 from typing import List, Tuple, Union
 from nmdc_schema import nmdc
@@ -342,12 +341,7 @@ class GoldStudyTranslator(Translator):
         if field_value is None:
             return None
-        return nmdc.QuantityValue(
-            has_raw_value=field_value,
-            has_numeric_value=nmdc.Double(field_value),
-            has_unit=unit,
-            type="nmdc:QuantityValue",
-        )
+        return self._parse_quantity_value(str(field_value), unit)
     def _get_text_value(
         self, gold_entity: JSON_OBJECT, gold_field: str
@@ -573,13 +567,11 @@ class GoldStudyTranslator(Translator):
         gold_biosample_id = gold_biosample["biosampleGoldId"]
         return nmdc.Biosample(
             add_date=gold_biosample.get("addDate"),
-            alt=self._get_quantity_value(
-                gold_biosample, "altitudeInMeters", unit="meters"
-            ),
+            alt=self._get_quantity_value(gold_biosample, "altitudeInMeters", unit="m"),
             collected_from=nmdc_field_site_id,
             collection_date=self._get_collection_date(gold_biosample),
             depth=self._get_quantity_value(
-                gold_biosample, ("depthInMeters", "depthInMeters2"), unit="meters"
+                gold_biosample, ("depthInMeters", "depthInMeters2"), unit="m"
             ),
             description=gold_biosample.get("description"),
             diss_oxygen=self._get_quantity_value(gold_biosample, "oxygenConcentration"),
@@ -618,7 +610,7 @@ class GoldStudyTranslator(Translator):
             ),
             specific_ecosystem=gold_biosample.get("specificEcosystem"),
             subsurface_depth=self._get_quantity_value(
-                gold_biosample, "subsurfaceDepthInMeters", unit="meters"
+                gold_biosample, "subsurfaceDepthInMeters", unit="m"
             ),
             temp=self._get_quantity_value(
                 gold_biosample, "sampleCollectionTemperature"

nmdc_runtime/site/translation/neon_benthic_translator.py CHANGED Viewed

@@ -11,7 +11,6 @@ from nmdc_runtime.site.util import get_basename
 from nmdc_runtime.site.translation.neon_utils import (
     _get_value_or_none,
     _create_controlled_identified_term_value,
-    _create_controlled_term_value,
     _create_geolocation_value,
     _create_quantity_value,
     _create_timestamp_value,

nmdc_runtime/site/translation/neon_soil_translator.py CHANGED Viewed

@@ -10,7 +10,6 @@ from nmdc_runtime.site.util import get_basename
 from nmdc_runtime.site.translation.neon_utils import (
     _get_value_or_none,
     _create_controlled_identified_term_value,
-    _create_controlled_term_value,
     _create_geolocation_value,
     _create_quantity_value,
     _create_timestamp_value,
@@ -153,7 +152,7 @@ class NeonSoilDataTranslator(Translator):
             collection_date=_create_timestamp_value(
                 biosample_row["collectDate"].values[0]
             ),
-            temp=_create_quantity_value(biosample_row["soilTemp"].values[0], "Celsius"),
+            temp=_create_quantity_value(biosample_row["soilTemp"].values[0], "Cel"),
             depth=nmdc.QuantityValue(
                 has_minimum_numeric_value=_get_value_or_none(
                     biosample_row, "sampleTopDepth"
@@ -169,13 +168,13 @@ class NeonSoilDataTranslator(Translator):
             analysis_type=_get_value_or_none(biosample_row, "sequenceAnalysisType"),
             env_package=_create_text_value(biosample_row["sampleType"].values[0]),
             nitro=_create_quantity_value(
-                biosample_row["nitrogenPercent"].values[0], "percent"
+                biosample_row["nitrogenPercent"].values[0], "%"
             ),
             org_carb=_create_quantity_value(
-                biosample_row["organicCPercent"].values[0], "percent"
+                biosample_row["organicCPercent"].values[0], "%"
             ),
             carb_nitro_ratio=_create_quantity_value(
-                biosample_row["CNratio"].values[0], None
+                biosample_row["CNratio"].values[0], "ratio"
             ),
             ph=_create_double_value(biosample_row["soilInWaterpH"].values[0]),
             water_content=(

nmdc_runtime/site/translation/neon_surface_water_translator.py CHANGED Viewed

@@ -3,7 +3,6 @@ import sqlite3
 from typing import Dict, Optional, Union
 import pandas as pd
-import requests
 import requests_cache
 from nmdc_schema import nmdc
@@ -12,7 +11,6 @@ from nmdc_runtime.site.util import get_basename
 from nmdc_runtime.site.translation.neon_utils import (
     _get_value_or_none,
     _create_controlled_identified_term_value,
-    _create_controlled_term_value,
     _create_geolocation_value,
     _create_quantity_value,
     _create_timestamp_value,

nmdc_runtime/site/translation/submission_portal_translator.py CHANGED Viewed

@@ -278,61 +278,9 @@ class SubmissionPortalTranslator(Translator):
     def _get_quantity_value(
         self, raw_value: Optional[str], unit: Optional[str] = None
     ) -> Union[nmdc.QuantityValue, None]:
-        """Construct a nmdc:QuantityValue from a raw value string
+        """Construct a nmdc:QuantityValue from a raw value string"""
-        The regex pattern minimally matches on a single numeric value (possibly
-        floating point). The pattern can also identify a range represented by
-        two numeric values separated by a hyphen. It can also identify non-numeric
-        characters at the end of the string which are interpreted as a unit. A unit
-        may also be explicitly provided as an argument to this function. If parsing
-        identifies a unit and a unit argument is provided, the unit argument is used.
-        If the pattern is not matched at all None is returned.
-        TODO: currently the parsed unit string is used as-is. In the future we may want
-        to be stricter about what we accept or coerce into a controlled value set
-        :param raw_value: string to parse
-        :param unit: optional unit, defaults to None
-        :return: nmdc:QuantityValue
-        """
-        if raw_value is None:
-            return None
-        match = re.fullmatch(
-            "([+-]?(?=\.\d|\d)(?:\d+)?(?:\.?\d*)(?:[eE][+-]?\d+)?)(?: *- *([+-]?(?=\.\d|\d)(?:\d+)?(?:\.?\d*)(?:[eE][+-]?\d+)?))?(?: *(\S+))?",
-            raw_value,
-        )
-        if not match:
-            return None
-        qv = nmdc.QuantityValue(
-            has_raw_value=raw_value,
-            type="nmdc:QuantityValue",
-        )
-        if match.group(2):
-            # having group 2 means the value is a range like "0 - 1". Either
-            # group 1 or group 2 might be the minimum especially when handling
-            # negative ranges like "0 - -1"
-            num_1 = float(match.group(1))
-            num_2 = float(match.group(2))
-            qv.has_minimum_numeric_value = min(num_1, num_2)
-            qv.has_maximum_numeric_value = max(num_1, num_2)
-        else:
-            # otherwise we just have a single numeric value
-            qv.has_numeric_value = float(match.group(1))
-        if unit:
-            # a unit was manually specified
-            if match.group(3) and unit != match.group(3):
-                # a unit was also found in the raw string; issue a warning
-                # if they don't agree, but keep the manually specified one
-                logging.warning(f'Unit mismatch: "{unit}" and "{match.group(3)}"')
-            qv.has_unit = unit
-        elif match.group(3):
-            # a unit was found in the raw string
-            qv.has_unit = match.group(3)
-        return qv
+        return self._parse_quantity_value(raw_value, unit)
     def _get_ontology_class(
         self, raw_value: Optional[str]

nmdc_runtime/site/translation/translator.py CHANGED Viewed

@@ -1,9 +1,13 @@
+import logging
+import re
 from abc import ABC, abstractmethod
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 from nmdc_schema import nmdc
 JSON_OBJECT = Dict[str, Any]
+logger = logging.getLogger(__name__)
 class Translator(ABC):
     def __init__(
@@ -27,3 +31,61 @@ class Translator(ABC):
     @abstractmethod
     def get_database(self) -> nmdc.Database:
         pass
+    def _parse_quantity_value(
+        self, raw_value: Optional[str], unit: Optional[str] = None
+    ) -> Union[nmdc.QuantityValue, None]:
+        """Construct a nmdc:QuantityValue from a raw value string
+        The regex pattern minimally matches on a single numeric value (possibly
+        floating point). The pattern can also identify a range represented by
+        two numeric values separated by a hyphen. It can also identify non-numeric
+        characters at the end of the string which are interpreted as a unit. A unit
+        may also be explicitly provided as an argument to this function. If parsing
+        identifies a unit and a unit argument is provided, the unit argument is used.
+        If the pattern is not matched at all None is returned.
+        :param raw_value: string to parse
+        :param unit: optional unit, defaults to None. If None, the unit is extracted from the
+            raw_value. If a unit is provided, it will override the unit extracted from the
+            raw_value.
+        :return: nmdc:QuantityValue
+        """
+        if raw_value is None:
+            return None
+        match = re.fullmatch(
+            "([+-]?(?=\.\d|\d)(?:\d+)?(?:\.?\d*)(?:[eE][+-]?\d+)?)(?: *- *([+-]?(?=\.\d|\d)(?:\d+)?(?:\.?\d*)(?:[eE][+-]?\d+)?))?(?: *(\S+))?",
+            raw_value,
+        )
+        if not match:
+            return None
+        quantity_value_kwargs = {
+            "has_raw_value": raw_value,
+            "type": "nmdc:QuantityValue",
+        }
+        if match.group(2):
+            # having group 2 means the value is a range like "0 - 1". Either
+            # group 1 or group 2 might be the minimum especially when handling
+            # negative ranges like "0 - -1"
+            num_1 = float(match.group(1))
+            num_2 = float(match.group(2))
+            quantity_value_kwargs["has_minimum_numeric_value"] = min(num_1, num_2)
+            quantity_value_kwargs["has_maximum_numeric_value"] = max(num_1, num_2)
+        else:
+            # otherwise we just have a single numeric value
+            quantity_value_kwargs["has_numeric_value"] = float(match.group(1))
+        if unit:
+            # a unit was manually specified
+            if match.group(3) and unit != match.group(3):
+                # a unit was also found in the raw string; issue a warning
+                # if they don't agree, but keep the manually specified one
+                logger.warning(f'Unit mismatch: "{unit}" and "{match.group(3)}"')
+            quantity_value_kwargs["has_unit"] = unit
+        elif match.group(3):
+            # a unit was found in the raw string
+            quantity_value_kwargs["has_unit"] = match.group(3)
+        return nmdc.QuantityValue(**quantity_value_kwargs)

nmdc_runtime/site/util.py CHANGED Viewed

@@ -3,10 +3,11 @@ import os
 from functools import lru_cache
 from pymongo.database import Database as MongoDatabase
 from subprocess import Popen, PIPE, STDOUT, CalledProcessError
-from toolz import groupby
+from refscan.lib.helpers import get_collection_names_from_schema
-from nmdc_runtime.api.db.mongo import get_collection_names_from_schema
 from nmdc_runtime.site.resources import mongo_resource
+from nmdc_runtime.util import nmdc_schema_view
 mode_test = {
     "resource_defs": {"mongo": mongo_resource}
@@ -37,12 +38,16 @@ def run_and_log(shell_cmd, context):
 @lru_cache
 def schema_collection_has_index_on_id(mdb: MongoDatabase) -> dict:
+    """
+    TODO: Document this function.
+    """
+    schema_view = nmdc_schema_view()
     present_collection_names = set(mdb.list_collection_names())
     return {
         name: (
             name in present_collection_names and "id_1" in mdb[name].index_information()
         )
-        for name in get_collection_names_from_schema()
+        for name in get_collection_names_from_schema(schema_view)
     }

nmdc_runtime/site/validation/util.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from dagster import op, AssetMaterialization, AssetKey, EventMetadata
+from dagster import op, AssetMaterialization, AssetKey, MetadataValue
 from jsonschema import Draft7Validator
 from nmdc_runtime.util import get_nmdc_jsonschema_dict
 from toolz import dissoc
@@ -92,10 +92,15 @@ def announce_validation_report(context, report, api_object):
         asset_key=AssetKey(["validation", f"{collection_name}_validation"]),
         description=f"{collection_name} translation validation",
         metadata={
-            # https://docs.dagster.io/_apidocs/solids#event-metadata
-            # also .json, .md, .path, .url, .python_artifact, ...
-            "n_errors": EventMetadata.int(len(report["errors"])),
-            "object_id": EventMetadata.text(api_object["id"]),
+            # Note: When this code was originally written, it used Dagster's `EventMetadata` class,
+            #       which has since been replaced by Dagster's `MetadataValue` class.
+            #
+            #       Reference:
+            #       - https://docs.dagster.io/api/dagster/ops#dagster.MetadataValue
+            #       - https://docs.dagster.io/api/dagster/metadata#dagster.MetadataValue
+            #
+            "n_errors": MetadataValue.int(len(report["errors"])),
+            "object_id": MetadataValue.text(api_object["id"]),
         },
     )

nmdc_runtime/util.py CHANGED Viewed

@@ -14,8 +14,6 @@ from typing import Callable, List, Optional, Set, Dict
 import fastjsonschema
 import requests
 from frozendict import frozendict
-from linkml_runtime import linkml_model
-from linkml_runtime.utils.schemaview import SchemaView
 from nmdc_schema.get_nmdc_view import ViewGetter
 from pymongo.database import Database as MongoDatabase
 from pymongo.errors import OperationFailure
@@ -27,48 +25,6 @@ from nmdc_runtime.api.core.util import sha256hash_from_file
 from nmdc_runtime.api.models.object import DrsObjectIn
-def get_names_of_classes_in_effective_range_of_slot(
-    schema_view: SchemaView, slot_definition: linkml_model.SlotDefinition
-) -> List[str]:
-    r"""
-    Determine the slot's "effective" range, by taking into account its `any_of` constraints (if defined).
-    Note: The `any_of` constraints constrain the slot's "effective" range beyond that described by the
-          induced slot definition's `range` attribute. `SchemaView` does not seem to provide the result
-          of applying those additional constraints, so we do it manually here (if any are defined).
-          Reference: https://github.com/orgs/linkml/discussions/2101#discussion-6625646
-    Reference: https://linkml.io/linkml-model/latest/docs/any_of/
-    """
-    # Initialize the list to be empty.
-    names_of_eligible_target_classes = []
-    # If the `any_of` constraint is defined on this slot, use that instead of the `range`.
-    if "any_of" in slot_definition and len(slot_definition.any_of) > 0:
-        for slot_expression in slot_definition.any_of:
-            # Use the slot expression's `range` to get the specified eligible class name
-            # and the names of all classes that inherit from that eligible class.
-            if slot_expression.range in schema_view.all_classes():
-                own_and_descendant_class_names = schema_view.class_descendants(
-                    slot_expression.range
-                )
-                names_of_eligible_target_classes.extend(own_and_descendant_class_names)
-    else:
-        # Use the slot's `range` to get the specified eligible class name
-        # and the names of all classes that inherit from that eligible class.
-        if slot_definition.range in schema_view.all_classes():
-            own_and_descendant_class_names = schema_view.class_descendants(
-                slot_definition.range
-            )
-            names_of_eligible_target_classes.extend(own_and_descendant_class_names)
-    # Remove duplicate class names.
-    names_of_eligible_target_classes = list(set(names_of_eligible_target_classes))
-    return names_of_eligible_target_classes
 def get_class_names_from_collection_spec(
     spec: dict, prefix: Optional[str] = None
 ) -> List[str]:
@@ -324,9 +280,9 @@ def find_one(k_v: dict, entities: Iterable[dict]):
     """Find the first entity with key-value pair k_v, if any?
     >>> find_one({"id": "foo"}, [{"id": "foo"}])
+    {'id': 'foo'}
+    >>> find_one({"id": "foo"}, [{"id": "bar"}]) is None
     True
-    >>> find_one({"id": "foo"}, [{"id": "bar"}])
-    False
     """
     if len(k_v) > 1:
         raise Exception("Supports only one key-value pair")
@@ -370,7 +326,7 @@ def nmdc_database_collection_names():
     TODO: Document this function.
     TODO: Assuming this function was designed to return a list of names of all Database slots that represents database
-          collections, use the function named `get_collection_names_from_schema` in `nmdc_runtime/api/db/mongo.py`
+          collections, import/use the function named `get_collection_names_from_schema` from `refscan.lib.helpers`
           instead, since (a) it includes documentation and (b) it performs the additional checks the lead schema
           maintainer expects (e.g. checking whether a slot is `multivalued` and `inlined_as_list`).
     """

{nmdc_runtime-2.9.0.dist-info → nmdc_runtime-2.10.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nmdc_runtime
-Version: 2.9.0
+Version: 2.10.0
 Summary: A runtime system for NMDC data management and orchestration
 Home-page: https://github.com/microbiomedata/nmdc-runtime
 Author: Donny Winston
@@ -152,7 +152,7 @@ http://127.0.0.1:8000/redoc/.
 *  NOTE: Any time you add or change requirements in requirements/main.in or requirements/dev.in, you must run:
-```
+```bash
 pip-compile --build-isolation --allow-unsafe --resolver=backtracking --strip-extras --output-file requirements/[main|dev].txt requirements/[main|dev].in
 ```
 to generate main.txt and dev.txt files respectively. main.in is kind of like a poetry dependency stanza, dev.in is kind
@@ -160,9 +160,6 @@ of like poetry dev.dependencies stanza. main.txt and dev.txt are kind of like po
 versions of dependencies to use. main.txt and dev.txt are combined in the docker compose build process to create the
 final requirements.txt file and import the dependencies into the Docker image.
-```bash
 ## Local Testing
 Tests can be found in `tests` and are run with the following commands:
@@ -173,8 +170,9 @@ make test
 # Run a Specific test file eg. tests/test_api/test_endpoints.py
 make test ARGS="tests/test_api/test_endpoints.py"
-```
 docker compose --file docker-compose.test.yml run test
+```
 As you create Dagster solids and pipelines, add tests in `tests/` to check that your code behaves as
 desired and does not break over time.
@@ -182,6 +180,59 @@ desired and does not break over time.
 [For hints on how to write tests for solids and pipelines in Dagster, see their documentation
 tutorial on Testing](https://docs.dagster.io/guides/test/unit-testing-assets-and-ops).
+### Performance profiling
+We use a tool called [Pyinstrument](https://pyinstrument.readthedocs.io) to profile the performance of the Runtime API while processing an individual HTTP request.
+Here's how you can do that:
+1. In your `.env` file, set `IS_PROFILING_ENABLED` to `true`
+2. Start/restart your development stack: `$ make up-dev`
+3. Ensure the endpoint function whose performance you want to profile is defined using `async def` (as opposed to just `def`) ([reference](https://github.com/joerick/pyinstrument/issues/257))
+Then—with all of that done—submit an HTTP request that includes the URL query parameter: `profile=true`. Instructions for doing that are in the sections below.
+<details>
+<summary>Show/hide instructions for <code>GET</code> requests only (involves web browser)</summary>
+1. In your web browser, visit the endpoint's URL, but add the `profile=true` query parameter to the URL. Examples:
+   ```diff
+   A. If the URL doesn't already have query parameters, append `?profile=true`.
+   - http://127.0.0.1:8000/nmdcschema/biosample_set
+   + http://127.0.0.1:8000/nmdcschema/biosample_set?profile=true
+   B. If the URL already has query parameters, append `&profile=true`.
+   - http://127.0.0.1:8000/nmdcschema/biosample_set?filter={}
+   + http://127.0.0.1:8000/nmdcschema/biosample_set?filter={}&profile=true
+   ```
+2. Your web browser will display a performance profiling report.
+   > Note: The Runtime API will have responded with a performance profiling report web page, instead of its normal response (which the Runtime discards).
+That'll only work for `GET` requests, though, since you're limited to specifying the request via the address bar.
+</details>
+<details>
+<summary>Show/hide instructions for <strong>all</strong> kinds of requests (involves <code>curl</code> + web browser)</summary>
+1. At your terminal, type or paste the `curl` command you want to run (you can copy/paste one from Swagger UI).
+2. Append the `profile=true` query parameter to the URL in the command, and use the `-o` option to save the response to a file whose name ends with `.html`. For example:
+   ```diff
+     curl -X 'POST' \
+   -   'http://127.0.0.1:8000/metadata/json:validate' \
+   +   'http://127.0.0.1:8000/metadata/json:validate?profile=true' \
+   +    -o /tmp/profile.html
+        -H 'accept: application/json' \
+        -H 'Content-Type: application/json' \
+        -d '{"biosample_set": []}'
+   ```
+3. Run the command.
+   > Note: The Runtime API will respond with a performance profiling report web page, instead of its normal response (which the Runtime discards). The performance profiling report web page will be saved to the `.html` file to which you redirected the command output.
+4. Double-click on the `.html` file to view it in your web browser.
+   1. Alternatively, open your web browser and navigate to the `.html` file; e.g., enter `file:///tmp/profile.html` into the address bar.
+</details>
 ### RAM usage
 The `dagster-daemon` and `dagster-dagit` containers can consume a lot of RAM. If tests are failing and the console of

nmdc-runtime 2.9.0__py3-none-any.whl → 2.10.0__py3-none-any.whl

Potentially problematic release.

nmdc-runtime 2.9.0py3-none-any.whl → 2.10.0py3-none-any.whl