PyPI - deriva-ml - Versions diffs - 1.13.0__py3-none-any.whl → 1.13.2__py3-none-any.whl - Mend

deriva-ml 1.13.0py3-none-any.whl → 1.13.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

deriva_ml/__init__.py +2 -2
deriva_ml/dataset.py +14 -12
deriva_ml/deriva_definitions.py +11 -1
deriva_ml/deriva_ml_base.py +7 -1
deriva_ml/execution.py +92 -94
deriva_ml/execution_configuration.py +5 -4
deriva_ml/run_notebook.py +40 -16
deriva_ml/test-files/execution-parameters.json +1 -0
deriva_ml/test-files/notebook-parameters.json +5 -0
deriva_ml/test_notebook.ipynb +99 -26
{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/METADATA +1 -1
{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/RECORD +16 -14
{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/WHEEL +1 -1
{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/entry_points.txt +0 -0
{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/licenses/LICENSE +0 -0
{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/top_level.txt +0 -0

deriva_ml/__init__.py CHANGED Viewed

@@ -15,7 +15,7 @@ __all__ = [
     "UploadState",
     "MLVocab",
     "MLAsset",
-    "ExecMetadataVocab",
+    "ExecAssetType",
     "RID",
     "DerivaSystemColumns",
     "VersionPart",
@@ -34,7 +34,7 @@ from .deriva_definitions import (
     DerivaMLException,
     MLVocab,
     MLAsset,
-    ExecMetadataVocab,
+    ExecAssetType,
     DerivaSystemColumns,
 )
 from .deriva_ml_base import DerivaML

deriva_ml/dataset.py CHANGED Viewed

@@ -964,7 +964,8 @@ class Dataset:
             for the dataset.
         """
         if (
-            execution_rid != DRY_RUN_RID
+            execution_rid
+            and execution_rid != DRY_RUN_RID
             and self._model.catalog.resolve_rid(execution_rid).table.name != "Execution"
         ):
             raise DerivaMLException(f"RID {execution_rid} is not an execution")
@@ -1120,17 +1121,18 @@ class Dataset:
         def update_status(status: Status, msg: str) -> None:
             """Update the current status for this execution in the catalog"""
-            self._model.catalog.getPathBuilder().schemas[
-                self._ml_schema
-            ].Execution.update(
-                [
-                    {
-                        "RID": execution_rid,
-                        "Status": status.value,
-                        "Status_Detail": msg,
-                    }
-                ]
-            )
+            if execution_rid and execution_rid != DRY_RUN_RID:
+                self._model.catalog.getPathBuilder().schemas[
+                    self._ml_schema
+                ].Execution.update(
+                    [
+                        {
+                            "RID": execution_rid,
+                            "Status": status.value,
+                            "Status_Detail": msg,
+                        }
+                    ]
+                )
             self._logger.info(msg)
         def fetch_progress_callback(current, total):

deriva_ml/deriva_definitions.py CHANGED Viewed

@@ -197,7 +197,7 @@ class MLAsset(StrEnum):
     execution_asset = "Execution_Asset"
-class ExecMetadataVocab(StrEnum):
+class ExecMetadataType(StrEnum):
     """
     Predefined execution metadata types.
     """
@@ -206,6 +206,16 @@ class ExecMetadataVocab(StrEnum):
     runtime_env = "Runtime_Env"
+class ExecAssetType(StrEnum):
+    """
+    Predefined execution metadata types.
+    """
+    input_file = "Input_File"
+    output_file = "Output_File"
+    notebook_output = "Notebook_Output"
 class ColumnDefinition(BaseModel):
     """Pydantic model for deriva_py Column.define"""

deriva_ml/deriva_ml_base.py CHANGED Viewed

@@ -974,7 +974,7 @@ class DerivaML(Dataset):
     ) -> Workflow:
         """Identify current executing program and return a workflow RID for it
-        Determine the notebook or script that is currently being executed. Assume that  this is
+        Determine the notebook or script that is currently being executed. Assume that this is
         being executed from a cloned GitHub repository.  Determine the remote repository name for
         this object.  Then either retrieve an existing workflow for this executable or create
         a new one.
@@ -983,6 +983,9 @@ class DerivaML(Dataset):
             name: The name of the workflow.
             workflow_type: The type of the workflow.
             description: The description of the workflow.
+        Returns:
+            A workflow object.
         """
         # Make sure type is correct.
         self.lookup_term(MLVocab.workflow_type, workflow_type)
@@ -1001,6 +1004,9 @@ class DerivaML(Dataset):
         1. The datasets specified in the configuration are downloaded and placed in the cache-dir. If a version is
         not specified in the configuration, then a new minor version number is created for the dataset and downloaded.
+        2. If any execution assets are provided in the configuration, they are downloaded and placed in the working directory.
         Args:
             configuration: ExecutionConfiguration:
             dry_run: Do not create an execution record or upload results.

deriva_ml/execution.py CHANGED Viewed

@@ -12,15 +12,12 @@ import os
 from pathlib import Path
 from pydantic import validate_call, ConfigDict
-import regex as re
 import sys
 import shutil
 from typing import Iterable, Any, Optional
 from deriva.core import format_exception
-from deriva.core.datapath import DataPathException
 from deriva.core.hatrac_store import HatracStore
-from .deriva_definitions import ExecMetadataVocab
 from .deriva_definitions import (
     RID,
     Status,
@@ -28,6 +25,8 @@ from .deriva_definitions import (
     DerivaMLException,
     MLVocab,
     MLAsset,
+    ExecMetadataType,
+    ExecAssetType,
     DRY_RUN_RID,
 )
 from .deriva_ml_base import DerivaML, FeatureRecord
@@ -65,29 +64,43 @@ except ImportError:
         return s
-class AssetFilePath(type(Path())):
-    """Derived class of Path that also includes information about a downloaded.
+# Platform-specific base class
+if sys.version_info >= (3, 12):
-    An AssetFilePath has all  the methods associated with a pathlib.Path object. In addition, it defines additional
-    attributes associated with a DerviaML asset.
+    class AssetFilePath(Path):
+        """
+        Create a new Path object that has additional information related to the use of this path as an asset.
-    Attributes:
-        asset_types: A list of the types associated with this asset.  From the Asset_Type controlled vocabulary.
-        asset_metadata: A dictionary of names and values of any additional columns  associated with this asset.
-        asset_name: The name of the asset table
-        file_name: The name of the file in the local file system that has the asset contents
-        asset_rid: The RID of the asset if it has been uploaded into an asset table
-    """
+        Args:
+            asset_path: Local path to the location of the asset.
+            asset_name:  The name of the asset in the catalog (e.g. the asset table name).
+            file_name:  Name of the local file that contains the contents of the asset.
+            asset_metadata: Any additional columns associated with this asset beyond the URL, Length, and checksum.
+            asset_types:  A list of terms from the Asset_Type controlled vocabulary.
+            asset_rid:  The RID of the asset if it has been uploaded into an asset table
+        """
-    def __new__(
-        cls,
-        asset_path,
-        asset_name: str,
-        file_name: str,
-        asset_metadata: dict[str, Any],
-        asset_types: list[str] | str,
-        asset_rid: Optional[RID] = None,
-    ):
+        def __init__(
+            self,
+            asset_path: str | Path,
+            asset_name: str,
+            file_name: str,
+            asset_metadata: dict[str, Any],
+            asset_types: list[str] | str,
+            asset_rid: Optional["RID"] = None,
+        ):
+            super().__init__(asset_path)
+            # These assignments happen after __new__ returns the instance
+            self.asset_name = asset_name
+            self.file_name = file_name
+            self.asset_metadata = asset_metadata
+            self.asset_types = (
+                asset_types if isinstance(asset_types, list) else [asset_types]
+            )
+            self.asset_rid = asset_rid
+else:
+    class AssetFilePath(type(Path())):
         """
         Create a new Path object that has additional information related to the use of this path as an asset.
@@ -99,15 +112,26 @@ class AssetFilePath(type(Path())):
             asset_types:  A list of terms from the Asset_Type controlled vocabulary.
             asset_rid:  The RID of the asset if it has been uploaded into an asset table
         """
-        obj = super().__new__(cls, asset_path)
-        obj.asset_types = (
-            asset_types if isinstance(asset_types, list) else [asset_types]
-        )
-        obj.asset_metadata = asset_metadata
-        obj.asset_name = asset_name
-        obj.file_name = file_name
-        obj.asset_rid = asset_rid
-        return obj
+        def __new__(
+            cls,
+            asset_path: str | Path,
+            asset_name: str,
+            file_name: str,
+            asset_metadata: dict[str, Any],
+            asset_types: list[str] | str,
+            asset_rid: Optional["RID"] = None,
+        ):
+            # Only pass the path to the base Path class
+            obj = super().__new__(cls, asset_path)
+            obj.asset_name = asset_name
+            obj.file_name = file_name
+            obj.asset_metadata = asset_metadata
+            obj.asset_types = (
+                asset_types if isinstance(asset_types, list) else [asset_types]
+            )
+            obj.asset_rid = asset_rid
+            return obj
 class Execution:
@@ -154,7 +178,7 @@ class Execution:
         Args:
             configuration: Execution configuration object that describes the execution.
             ml_object: The DerivaML instance that created the execution.
-            reload: RID of previously initialized execution object.
+            reload: RID of a previously initialized execution object.
         """
         self.asset_paths: list[AssetFilePath] = []
         self.configuration = configuration
@@ -237,9 +261,9 @@ class Execution:
     def _save_runtime_environment(self):
         runtime_env_path = self.asset_file_path(
-            asset_name="Execution_Metadata",
-            file_name=f"environment_snapshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
-            asset_types=ExecMetadataVocab.runtime_env.value,
+            "Execution_Metadata",
+            f"environment_snapshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
+            ExecMetadataType.runtime_env.value,
         )
         with open(runtime_env_path, "w") as fp:
             json.dump(get_execution_environment(), fp)
@@ -296,15 +320,19 @@ class Execution:
         # Save configuration details for later upload
         if not reload:
             cfile = self.asset_file_path(
-                asset_name=MLAsset.execution_metadata,
-                file_name="configuration.json",
-                asset_types=ExecMetadataVocab.execution_config.value,
+                MLAsset.execution_metadata,
+                "configuration.json",
+                ExecMetadataType.execution_config.value,
             )
             with open(cfile.as_posix(), "w", encoding="utf-8") as config_file:
                 json.dump(self.configuration.model_dump(), config_file)
             for parameter_file in self.configuration.parameters:
-                self.asset_file_path(MLAsset.execution_assets, parameter_file)
+                self.asset_file_path(
+                    MLAsset.execution_asset,
+                    parameter_file,
+                    ExecAssetType.input_file.value,
+                )
             # save runtime env
             self._save_runtime_environment()
@@ -471,7 +499,7 @@ class Execution:
         """Download an asset from a URL and place it in a local directory.
         Args:
-            asset_rid: URL of the asset.
+            asset_rid: RID of the asset.
             dest_dir: Destination directory for the asset.
             update_catalog: Whether to update the catalog execution information after downloading.
@@ -651,20 +679,9 @@ class Execution:
         with open(feature_file, "r") as feature_values:
             entities = [json.loads(line.strip()) for line in feature_values]
         # Update the asset columns in the feature and add to the catalog.
-        try:
-            self._ml_object.domain_path.tables[feature_table].insert(
-                [map_path(e) for e in entities]
-            )
-        except DataPathException as e:
-            if re.match(
-                rf'DETAIL: +Key +\("Execution", +"{target_table}", +"Feature_Name"\)=\(.*\) already exists',
-                e.message,
-            ):
-                self._logger.info(
-                    f"Skipping reload of feature values for {feature_table}"
-                )
-            else:
-                raise e
+        self._ml_object.domain_path.tables[feature_table].insert(
+            [map_path(e) for e in entities], on_conflict_skip=True
+        )
     def _update_asset_execution_table(
         self,
@@ -689,27 +706,17 @@ class Execution:
             asset_exe = self._model.find_association(asset_table_name, "Execution")
             asset_exe_path = pb.schemas[asset_exe.schema.name].tables[asset_exe.name]
-            try:
-                asset_exe_path.insert(
-                    [
-                        {
-                            asset_table_name: asset_path.asset_rid,
-                            "Execution": self.execution_rid,
-                            "Asset_Role": asset_role,
-                        }
-                        for asset_path in asset_list
-                    ]
-                )
-            except DataPathException as e:
-                if re.match(
-                    rf'DETAIL: +Key +\("{asset_table_name}", +"Execution"\)=\(.*\) already exists',
-                    e.message,
-                ):
-                    self._logger.info(
-                        f"Skipping reload of execution assocations for {asset_table_name}"
-                    )
-                else:
-                    raise e
+            asset_exe_path.insert(
+                [
+                    {
+                        asset_table_name: asset_path.asset_rid,
+                        "Execution": self.execution_rid,
+                        "Asset_Role": asset_role,
+                    }
+                    for asset_path in asset_list
+                ],
+                on_conflict_skip=True,
+            )
             # Now add in the type names via the asset_asset_type association table.
             # Get the list of types for each file in the asset.
@@ -735,24 +742,15 @@ class Execution:
             type_path = pb.schemas[asset_asset_type.schema.name].tables[
                 asset_asset_type.name
             ]
-            try:
-                type_path.insert(
-                    [
-                        {asset_table_name: asset.asset_rid, "Asset_Type": t}
-                        for asset in asset_list
-                        for t in asset_type_map[asset.file_name]
-                    ]
-                )
-            except DataPathException as e:
-                if re.match(
-                    rf'DETAIL: +Key +\("{asset_table_name}", +"Asset_Type"\)=\(.*\) already exists',
-                    e.message,
-                ):
-                    self._logger.info(
-                        f"Skipping reload of execution asset types for {asset_table_name}"
-                    )
-                else:
-                    raise e
+            type_path.insert(
+                [
+                    {asset_table_name: asset.asset_rid, "Asset_Type": t}
+                    for asset in asset_list
+                    for t in asset_type_map[asset.file_name]
+                ],
+                on_conflict_skip=True,
+            )
     @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
     def asset_file_path(

deriva_ml/execution_configuration.py CHANGED Viewed

@@ -264,7 +264,7 @@ class Workflow(BaseModel):
             checksum = os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"]
             is_notebook = True
         else:
-            path, is_notebook = Workflow._get_notebook_path()
+            path, is_notebook = Workflow._get_python_script()
             github_url, checksum = Workflow.get_url_and_checksum(path)
         return Workflow(
@@ -325,14 +325,15 @@ class ExecutionConfiguration(BaseModel):
             should be materialized.
         assets: List of assets to be downloaded prior to execution.  The values must be RIDs in an asset table
         parameters: Either a dictionary or a path to a JSON file that contains configuration parameters for the execution.
-        workflow: A RID for a workflow instance.  Must have a name, URI to the workflow instance, and a type.
+        workflow: Either a Workflow object, or a RID for a workflow instance.
+        parameters: Either a dictionary or a path to a JSON file that contains configuration parameters for the execution.
         description: A description of the execution.  Can use Markdown format.
     """
     datasets: conlist(DatasetSpec) = []
     assets: list[RID | str] = []  # List of RIDs to model files.
     workflow: RID | Workflow
-    parameters: dict[str, Any] = {}
+    parameters: dict[str, Any] | Path = {}
     description: str = ""
     argv: conlist(str) = Field(default_factory=lambda: sys.argv)
@@ -341,7 +342,7 @@ class ExecutionConfiguration(BaseModel):
     @field_validator("parameters", mode="before")
     @classmethod
     def validate_parameters(cls, value: Any) -> Any:
-        """If parameter is a file, assume that it has JSON contents for configuration parameters"""
+        """If a parameter is a file, assume that it has JSON contents for configuration parameters"""
         if isinstance(value, str) or isinstance(value, Path):
             with open(value, "r") as f:
                 return json.load(f)

deriva_ml/run_notebook.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Module to run a notebook using papermill"""
+from datetime import datetime
 import json
 import os
 import papermill as pm
@@ -7,9 +8,9 @@ from pathlib import Path
 import regex as re
 import tempfile
-from deriva_ml import Workflow, DerivaML, MLVocab
+from deriva_ml import Workflow, DerivaML
 from deriva.core import BaseCLI
-from deriva_ml import MLAsset
+from deriva_ml import MLAsset, ExecAssetType
 class DerivaMLRunNotebookCLI(BaseCLI):
@@ -36,6 +37,19 @@ class DerivaMLRunNotebookCLI(BaseCLI):
             help="Display parameters information for the given notebook path.",
         )
+        self.parser.add_argument(
+            "--log-output",
+            action="store_false",
+            help="Display logging output from notebook.",
+        )
+        self.parser.add_argument(
+            "--catalog",
+            metavar="<1>",
+            default=1,
+            help="Catalog number. Default 1",
+        )
         self.parser.add_argument(
             "--parameter",
             "-p",
@@ -43,7 +57,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
             action="append",
             metavar=("KEY", "VALUE"),
             default=[],
-            help="Provide a parameter name band value to inject into the notebook.",
+            help="Provide a parameter name and value to inject into the notebook.",
         )
         self.parser.add_argument(
@@ -84,19 +98,24 @@ class DerivaMLRunNotebookCLI(BaseCLI):
             print("Notebook file must be an ipynb file.")
             exit(1)
+        os.environ["DERIVA_HOST"] = args.host
+        os.environ["DERIVA_CATALOG_ID"] = args.catalog
         # Create a workflow instance for this specific version of the script.  Return an existing workflow if one is found.
         notebook_parameters = pm.inspect_notebook(notebook_file)
         if args.inspect:
-            for param, value in notebook_parameters:
+            for param, value in notebook_parameters.items():
                 print(
                     f"{param}:{value['inferred_type_name']}  (default {value['default']})"
                 )
             return
         else:
-            notebook_parameters = {
-                k: v["default"] for k, v in notebook_parameters.items()
-            } | parameters
-            print(f"Running notebook {notebook_file.name} with paremeters:")
+            notebook_parameters = (
+                {"host": args.host, "catalog": args.catalog}
+                | {k: v["default"] for k, v in notebook_parameters.items()}
+                | parameters
+            )
+            print(f"Running notebook {notebook_file.name} with parameters:")
             for param, value in notebook_parameters.items():
                 print(f"  {param}:{value}")
             self.run_notebook(notebook_file.resolve(), parameters, args.kernel)
@@ -121,7 +140,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
                         r"Execution RID: https://(?P<host>.*)/id/(?P<catalog_id>.*)/(?P<execution_rid>[\w-]+)",
                         line,
                     ):
-                        host = m["host"]
+                        hostname = m["host"]
                         catalog_id = m["catalog_id"]
                         execution_rid = m["execution_rid"]
             if not execution_rid:
@@ -129,19 +148,24 @@ class DerivaMLRunNotebookCLI(BaseCLI):
                 exit(1)
             print("Uploaded notebook output for Execution RID:", execution_rid)
-            ml_instance = DerivaML(hostname=host, catalog_id=catalog_id)
-            ml_instance.add_term(
-                MLVocab.asset_type,
-                "Notebook_Output",
-                description="Jupyter Notebook Output",
-            )
+            ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id)
             execution = ml_instance.restore_execution(execution_rid)
             execution.asset_file_path(
                 asset_name=MLAsset.execution_asset,
                 file_name=notebook_output,
-                asset_types=["Notebook_Output"],
+                asset_types=ExecAssetType.notebook_output,
             )
+            parameter_file = execution.asset_file_path(
+                asset_name=MLAsset.execution_asset,
+                file_name=f"notebook-parameters-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json",
+                asset_types=ExecAssetType.input_file.value,
+            )
+            with open(parameter_file, "w") as f:
+                json.dump(parameters, f)
             execution.upload_execution_outputs()
+            print(ml_instance.cite(execution_rid))
 def main():

deriva_ml/test-files/execution-parameters.json ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"local-file": "My local file.txt"}

deriva_ml/test-files/notebook-parameters.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "assets": ["2-7J8M"],
+  "datasets": ["2-7K8W"],
+  "parameters": "test-files/execution-parameters.json"
+}

deriva_ml/test_notebook.ipynb CHANGED Viewed

@@ -3,15 +3,21 @@
   {
    "cell_type": "code",
    "id": "0",
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-04-18T22:52:49.930351Z",
+     "start_time": "2025-04-18T22:52:48.926842Z"
+    }
+   },
    "source": [
     "import builtins\n",
+    "import os\n",
+    "\n",
     "from deriva.core.utils.globus_auth_utils import GlobusNativeLogin\n",
-    "from deriva_ml import ExecutionConfiguration, MLVocab, DerivaSystemColumns, DatasetSpec, DerivaML, Workflow\n",
-    "from deriva_ml.demo_catalog import create_demo_catalog, DemoML"
+    "from deriva_ml import ExecutionConfiguration, MLVocab, DerivaML, DatasetSpec"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": 1
   },
   {
    "cell_type": "code",
@@ -19,36 +25,61 @@
    "metadata": {
     "tags": [
      "parameters"
-    ]
+    ],
+    "ExecuteTime": {
+     "end_time": "2025-04-18T22:52:49.988873Z",
+     "start_time": "2025-04-18T22:52:49.986713Z"
+    }
    },
    "source": [
     "foo: int = 1\n",
-    "bar: str = \"hello\"\n",
-    "list_parameter: list[float] = [1, 2, 3]"
+    "assets = []\n",
+    "datasets = []\n",
+    "parameters = None"
    ],
    "outputs": [],
-   "execution_count": null
+   "execution_count": 2
   },
   {
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-04-18T22:52:50.002808Z",
+     "start_time": "2025-04-18T22:52:49.999450Z"
+    }
+   },
    "cell_type": "code",
-   "outputs": [],
-   "execution_count": null,
    "source": [
-    "print('foo', foo)\n",
-    "print('bar', bar)\n",
-    "print('list_parameter', list_parameter)"
+    "print(\"foo\", foo)\n",
+    "print(\"assets\", assets)\n",
+    "print(\"datasets\", datasets)\n",
+    "print(\"parameters\", parameters)"
    ],
-   "id": "70b23cdd933ce669"
+   "id": "70b23cdd933ce669",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "foo 1\n",
+      "assets []\n",
+      "datasets []\n",
+      "parameters None\n"
+     ]
+    }
+   ],
+   "execution_count": 3
   },
   {
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-04-18T22:52:50.344660Z",
+     "start_time": "2025-04-18T22:52:50.013816Z"
+    }
+   },
    "cell_type": "code",
-   "outputs": [],
-   "execution_count": null,
    "source": [
-    "hostname = 'dev.eye-ai.org'\n",
-    "domain_schema = 'eye-ai'\n",
+    "hostname = os.environ.get(\"DERIVA_HOST\") #or \"dev.eye-ai.org\"\n",
+    "catalog_id = os.environ.get(\"DERIVA_CATALOG_ID\") #or 'eye-ai'\n",
     "\n",
     "gnl = GlobusNativeLogin(host=hostname)\n",
     "if gnl.is_logged_in([hostname]):\n",
@@ -57,15 +88,31 @@
     "    gnl.login([hostname], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)\n",
     "    print(\"Login Successful\")\n"
    ],
-   "id": "2"
+   "id": "2",
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "'NoneType' object has no attribute 'lower'",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mAttributeError\u001B[0m                            Traceback (most recent call last)",
+      "Cell \u001B[0;32mIn[4], line 5\u001B[0m\n\u001B[1;32m      2\u001B[0m catalog_id \u001B[38;5;241m=\u001B[39m os\u001B[38;5;241m.\u001B[39menviron\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mDERIVA_CATALOG_ID\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;66;03m#or 'eye-ai'\u001B[39;00m\n\u001B[1;32m      4\u001B[0m gnl \u001B[38;5;241m=\u001B[39m GlobusNativeLogin(host\u001B[38;5;241m=\u001B[39mhostname)\n\u001B[0;32m----> 5\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[43mgnl\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mis_logged_in\u001B[49m\u001B[43m(\u001B[49m\u001B[43m[\u001B[49m\u001B[43mhostname\u001B[49m\u001B[43m]\u001B[49m\u001B[43m)\u001B[49m:\n\u001B[1;32m      6\u001B[0m     \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mYou are already logged in.\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m      7\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n",
+      "File \u001B[0;32m~/opt/anaconda3/envs/deriva-test/lib/python3.10/site-packages/deriva/core/utils/globus_auth_utils.py:582\u001B[0m, in \u001B[0;36mGlobusNativeLogin.is_logged_in\u001B[0;34m(self, hosts, requested_scopes, hosts_to_scope_map, exclude_defaults)\u001B[0m\n\u001B[1;32m    576\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21mis_logged_in\u001B[39m(\u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m    577\u001B[0m                  hosts\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m    578\u001B[0m                  requested_scopes\u001B[38;5;241m=\u001B[39m(),\n\u001B[1;32m    579\u001B[0m                  hosts_to_scope_map\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m    580\u001B[0m                  exclude_defaults\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m):\n\u001B[1;32m    581\u001B[0m     scopes \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mset\u001B[39m(requested_scopes)\n\u001B[0;32m--> 582\u001B[0m     scope_map \u001B[38;5;241m=\u001B[39m hosts_to_scope_map \u001B[38;5;28;01mif\u001B[39;00m hosts_to_scope_map \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mhosts_to_scope_map\u001B[49m\u001B[43m(\u001B[49m\u001B[43mhosts\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mhosts\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    583\u001B[0m     scopes\u001B[38;5;241m.\u001B[39mupdate(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mscope_set_from_scope_map(scope_map))\n\u001B[1;32m    584\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m exclude_defaults:\n",
+      "File \u001B[0;32m~/opt/anaconda3/envs/deriva-test/lib/python3.10/site-packages/deriva/core/utils/globus_auth_utils.py:607\u001B[0m, in \u001B[0;36mGlobusNativeLogin.hosts_to_scope_map\u001B[0;34m(self, hosts, match_scope_tag, all_tagged_scopes, force_refresh, warn_on_discovery_failure)\u001B[0m\n\u001B[1;32m    605\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m host \u001B[38;5;129;01min\u001B[39;00m hosts:\n\u001B[1;32m    606\u001B[0m     scope_map\u001B[38;5;241m.\u001B[39mupdate({host: []})\n\u001B[0;32m--> 607\u001B[0m     scopes \u001B[38;5;241m=\u001B[39m \u001B[43mget_oauth_scopes_for_host\u001B[49m\u001B[43m(\u001B[49m\u001B[43mhost\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    608\u001B[0m \u001B[43m                                       \u001B[49m\u001B[43mconfig_file\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconfig_file\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    609\u001B[0m \u001B[43m                                       \u001B[49m\u001B[43mforce_refresh\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mforce_refresh\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m    610\u001B[0m \u001B[43m                                       \u001B[49m\u001B[43mwarn_on_discovery_failure\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mwarn_on_discovery_failure\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    611\u001B[0m     scope_list \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mlist\u001B[39m()\n\u001B[1;32m    612\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m scopes:\n",
+      "File \u001B[0;32m~/opt/anaconda3/envs/deriva-test/lib/python3.10/site-packages/deriva/core/utils/core_utils.py:300\u001B[0m, in \u001B[0;36mget_oauth_scopes_for_host\u001B[0;34m(host, config_file, force_refresh, warn_on_discovery_failure)\u001B[0m\n\u001B[1;32m    298\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m required_scopes:\n\u001B[1;32m    299\u001B[0m     \u001B[38;5;28;01mfor\u001B[39;00m hostname, scopes \u001B[38;5;129;01min\u001B[39;00m required_scopes\u001B[38;5;241m.\u001B[39mitems():\n\u001B[0;32m--> 300\u001B[0m         \u001B[38;5;28;01mif\u001B[39;00m \u001B[43mhost\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mlower\u001B[49m() \u001B[38;5;241m==\u001B[39m hostname\u001B[38;5;241m.\u001B[39mlower():\n\u001B[1;32m    301\u001B[0m             result \u001B[38;5;241m=\u001B[39m scopes\n\u001B[1;32m    302\u001B[0m             \u001B[38;5;28;01mbreak\u001B[39;00m\n",
+      "\u001B[0;31mAttributeError\u001B[0m: 'NoneType' object has no attribute 'lower'"
+     ]
+    }
+   ],
+   "execution_count": 4
   },
   {
    "cell_type": "code",
    "id": "3",
    "metadata": {},
    "source": [
-    "ml_instance = DemoML(hostname, domain_schema)\n",
-    "print(f'Creating catalog at {ml_instance.catalog_id}')\n",
+    "ml_instance = DerivaML(hostname, catalog_id)\n",
     "\n",
     "ml_instance.add_term(MLVocab.workflow_type, \"Manual Workflow\", description=\"Initial setup of Model File\")\n",
     "ml_instance.add_term(MLVocab.asset_type, \"API_Model\", description=\"Model for our API workflow\")"
@@ -92,12 +139,38 @@
    "id": "6",
    "metadata": {},
    "source": [
-    "manual_execution = ml_instance.create_execution(ExecutionConfiguration( description=\"Sample Execution\", workflow=api_workflow))\n",
-    "manual_execution.upload_execution_outputs()\n",
-    "# Now lets create model configuration for our program."
+    "manual_execution = ml_instance.create_execution(\n",
+    "    ExecutionConfiguration(\n",
+    "        description=\"Sample Execution\",\n",
+    "        workflow=api_workflow,\n",
+    "        datasets=[DatasetSpec(rid=ds, version=ml_instance.dataset_version(ds)) for ds in datasets],\n",
+    "        assets=assets,\n",
+    "        parameters=parameters\n",
+    "    )\n",
+    ")"
    ],
    "outputs": [],
    "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "print(f'parameters: {manual_execution.parameters}')\n",
+    "print(f'datasets: {manual_execution.datasets}')\n",
+    "print(f'assets: {manual_execution.asset_paths}')"
+   ],
+   "id": "4b2a3b8c16333645",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "manual_execution.upload_execution_outputs()",
+   "id": "efa8cb1b0ed438bb",
+   "outputs": [],
+   "execution_count": null
   }
  ],
  "metadata": {

{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deriva-ml
-Version: 1.13.0
+Version: 1.13.2
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10

{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/RECORD RENAMED Viewed

@@ -1,29 +1,31 @@
-deriva_ml/__init__.py,sha256=2sRcX2s72Guo4M7IGW_0_ZyKokZNCbVE6de65tvHBlw,1109
+deriva_ml/__init__.py,sha256=GfneBq7xDphMqUQY96sW9ixRj74M3UTUCmD4KMIRSaM,1101
 deriva_ml/database_model.py,sha256=lMbAEqn4n0m7h_JstMX_LX9gbvBIEydG3sRilPn3eLU,14885
-deriva_ml/dataset.py,sha256=OyWUKWnYeP0ctimSBQ4em-uJrzCNOohx4GPT2uIl6R4,60649
+deriva_ml/dataset.py,sha256=W1TSHgkdXNw2v5hC0UBrivCKadMK1LaFd6YIjHE9jZA,60786
 deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
 deriva_ml/dataset_bag.py,sha256=yS8oYVshfFtRDyhGPRqtbvxjyd3ZFF29lrB783OP4vM,11849
 deriva_ml/demo_catalog.py,sha256=9Qo3JD4bUIwnL3ngPctc2QBeWApvMR_5UyaK9ockTrY,11536
-deriva_ml/deriva_definitions.py,sha256=MZl3c23gArbS-0HZ24VDAyb8HI2Kcb8hFdhSnBLOLfo,9030
-deriva_ml/deriva_ml_base.py,sha256=JYTG_a8SURhrPQBTz6OaGMk0D0sSPWpXqCnoVnSNViI,38501
+deriva_ml/deriva_definitions.py,sha256=avdOgxtB60yb8XsWm-AYtCdvg2QkQbyfkZuA9xx9t2U,9221
+deriva_ml/deriva_ml_base.py,sha256=FYSTQl4mNePC8IxC70rS5D0VmLNPccfFkkiVneDxJpY,38678
 deriva_ml/deriva_model.py,sha256=wytGCAHutiUaRfnRKr80Ks_P6ci0_wXRU3vq3lthfYU,13260
-deriva_ml/execution.py,sha256=t20sGqPRcUaG-5LLHPaQ01pPP8XpqiCveS1h-Fw_XbQ,38093
-deriva_ml/execution_configuration.py,sha256=WiA4PPijNZUftExN6Qm1YScVD1OY3depNKTutIwOfUg,14063
+deriva_ml/execution.py,sha256=otMkdjF15SEWg99mvWrTpnKz7-BWp9b8XbFf6iwfmtg,37697
+deriva_ml/execution_configuration.py,sha256=7fiIbtzz9nmkxA9-GTiN6Ln2twfaOLivwJwGZb8gAL0,14163
 deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
 deriva_ml/feature.py,sha256=07g0uSrhumdopJluWuWSRMrzagaikAOihqB09bzXBP4,5475
 deriva_ml/history.py,sha256=qTDLDs8Ow_6r7mDO0gZm0Fg81SWKOAgtCU5pzZoDRgM,2828
-deriva_ml/run_notebook.py,sha256=XzI38WNsu9CKDYbWMt8b5ODtlp27dsWsSuMkKwfeWOE,5484
+deriva_ml/run_notebook.py,sha256=vhmij4P1Va52MIj8hOc-WmjLRp3sTmK6p7LXCWrzejc,6308
 deriva_ml/test_functions.py,sha256=-eqLHjjCQCLBNAr1ofbZekNiCOfMISSACRxT_YHER8I,4396
-deriva_ml/test_notebook.ipynb,sha256=CatQIh9whsmYWGpwuyw9XMggQ9-TlCueTyH3Wiv4aBc,3116
+deriva_ml/test_notebook.ipynb,sha256=_5D6rkSGbmENPJZbDgfZ6-yt94BNEwxytVUDmG3RE3w,10166
 deriva_ml/upload.py,sha256=gHTGXAVlf56EwNzmw5zY0gbBf8h08eU2q2GBbb2FdVc,16087
 deriva_ml/schema_setup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 deriva_ml/schema_setup/annotations.py,sha256=v0gTpmWYxRqsQ-bcnQzsr8WowGv2pi9pZUsO3WWnu1U,9528
 deriva_ml/schema_setup/create_schema.py,sha256=hNMc-v5tferd0UjfdB6nBw7Rc_o-Mg6NkPqQGie9YOw,11700
 deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
 deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
-deriva_ml-1.13.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-deriva_ml-1.13.0.dist-info/METADATA,sha256=YxPB1VnpB-Y8KL4Yp3VKAYq7F5EUp-R7MfZ1uhWpRZs,999
-deriva_ml-1.13.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-deriva_ml-1.13.0.dist-info/entry_points.txt,sha256=cJnALMa6pjdk6RQCt4HFbKHqALpVa0k6wPeQDPedLJI,295
-deriva_ml-1.13.0.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
-deriva_ml-1.13.0.dist-info/RECORD,,
+deriva_ml/test-files/execution-parameters.json,sha256=1vBqXlaMa0cysonE20TweVDfTGRdSi9CUuAkW1xiYNo,36
+deriva_ml/test-files/notebook-parameters.json,sha256=7uEE2sLQSrSc9cEGQ_RKE7t5dwkEYv0qLo5mRbzo8Og,108
+deriva_ml-1.13.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+deriva_ml-1.13.2.dist-info/METADATA,sha256=uuvCztFgxOwWM34egjr65pW8-2pYGCtV_xofT5TmcLg,999
+deriva_ml-1.13.2.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
+deriva_ml-1.13.2.dist-info/entry_points.txt,sha256=cJnALMa6pjdk6RQCt4HFbKHqALpVa0k6wPeQDPedLJI,295
+deriva_ml-1.13.2.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
+deriva_ml-1.13.2.dist-info/RECORD,,

{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.1.0)
+Generator: setuptools (79.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{deriva_ml-1.13.0.dist-info → deriva_ml-1.13.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

deriva-ml 1.13.0__py3-none-any.whl → 1.13.2__py3-none-any.whl

deriva-ml 1.13.0py3-none-any.whl → 1.13.2py3-none-any.whl