PyPI - deriva-ml - Versions diffs - 1.6.7__tar.gz → 1.6.8__tar.gz - Mend

deriva-ml 1.6.7tar.gz → 1.6.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{deriva_ml-1.6.7/src/deriva_ml.egg-info → deriva_ml-1.6.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: deriva-ml
-Version: 1.6.7
+Version: 1.6.8
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10

deriva_ml-1.6.8/src/deriva_ml/VERSION.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "1.6.8"

{deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/deriva_definitions.py RENAMED Viewed

@@ -70,8 +70,11 @@ class Status(StrEnum):
     """
-    running = "Running"
+    initializing = "Initializing"
+    created = "Created"
     pending = "Pending"
+    running = "Running"
+    aborted = "Aborted"
     completed = "Completed"
     failed = "Failed"

{deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/deriva_ml_base.py RENAMED Viewed

@@ -11,14 +11,17 @@ relationships that follow a specific data model.
 import getpass
 import logging
 from datetime import datetime
+import hashlib
 from itertools import chain
 from pathlib import Path
+import requests
 from typing import Optional, Any, Iterable, TYPE_CHECKING
 from deriva.core import (
     ErmrestCatalog,
     get_credential,
     urlquote,
     DEFAULT_SESSION_CONFIG,
+    format_exception,
 )
 import deriva.core.datapath as datapath
 from deriva.core.datapath import DataPathException
@@ -27,7 +30,7 @@ from deriva.core.ermrest_model import Key, Table
 from deriva.core.hatrac_store import HatracStore
 from pydantic import validate_call, ConfigDict
-from .execution_configuration import ExecutionConfiguration
+from .execution_configuration import ExecutionConfiguration, Workflow
 from .feature import Feature, FeatureRecord
 from .dataset import Dataset
 from .deriva_model import DerivaModel
@@ -47,6 +50,7 @@ from .deriva_definitions import (
     DerivaMLException,
     ML_SCHEMA,
     VocabularyTerm,
+    MLVocab,
 )
 if TYPE_CHECKING:
@@ -122,6 +126,7 @@ class DerivaML(Dataset):
         self.ml_schema = ml_schema
         self.version = model_version
         self.configuration = None
+        self._execution: Optional[Execution] = None
         self.domain_schema = self.model.domain_schema
         self.project_name = project_name or self.domain_schema
@@ -145,6 +150,10 @@ class DerivaML(Dataset):
                 f"Loading dirty model.  Consider commiting and tagging: {self.version}"
             )
+    def __del__(self):
+        if self._execution and self._execution.status != Status.completed:
+            self._execution.update_status(Status.aborted, f"Execution Aborted")
     @staticmethod
     def _get_session_config():
         """ """
@@ -187,7 +196,7 @@ class DerivaML(Dataset):
         return table_path(
             self.working_dir,
             schema=self.domain_schema,
-            table=self.model.namne_to_table(table).name,
+            table=self.model.name_to_table(table).name,
         )
     def asset_dir(
@@ -688,19 +697,29 @@ class DerivaML(Dataset):
             for v in pb.schemas[table.schema.name].tables[table.name].entities().fetch()
         ]
-    def download_asset(self, asset_url: str, dest_filename: str) -> Path:
+    @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
+    def download_asset(self, asset_rid: RID, dest_dir: Path) -> Path:
         """Download an asset from a URL and place it in a local directory.
         Args:
-            asset_url: URL of the asset.
-            dest_filename: Destination filename.
+            asset_rid: URL of the asset.
+            dest_dir: Destination directory for the asset.
         Returns:
             A  Path object to the downloaded asset.
         """
+        table = self.resolve_rid(asset_rid).table
+        if not self.model.is_asset(table):
+            raise DerivaMLException(f"RID {asset_rid}  is not for an asset table.")
+        tpath = self.pathBuilder.schemas[table.schema.name].tables[table.name]
+        asset_metadata = list(tpath.filter(tpath.RID == asset_rid).entities())[0]
+        asset_url = asset_metadata["URL"]
+        asset_filename = dest_dir / asset_metadata["Filename"]
         hs = HatracStore("https", self.host_name, self.credential)
-        hs.get_obj(path=asset_url, destfilename=dest_filename)
-        return Path(dest_filename)
+        hs.get_obj(path=asset_url, destfilename=asset_filename.as_posix())
+        return Path(asset_filename)
     @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
     def upload_assets(
@@ -761,6 +780,72 @@ class DerivaML(Dataset):
             ]
         )
+    def list_workflows(self) -> list[Workflow]:
+        workflow_path = self.pathBuilder.schemas[self.ml_schema].Workflow
+        return [
+            Workflow(
+                name=w["Name"],
+                url=w["URL"],
+                workflow_type=w["Workflow_Type"],
+                version=w["Version"],
+                description=w["Description"],
+            )
+            for w in workflow_path.entities().fetch()
+        ]
+    def add_workflow(self, workflow: Workflow) -> RID:
+        """Add a workflow to the Workflow table.
+        Args:
+          - url(str): URL of the workflow.
+          - workflow_type(str): Type of the workflow.
+          - version(str): Version of the workflow.
+          - description(str): Description of the workflow.
+        Returns:
+          - str: Resource Identifier (RID) of the added workflow.
+        """
+        # Check to make sure that the workflow is not already in the table. If it's not, add it.
+        def get_checksum(url) -> str:
+            """Get the checksum of a file from a URL."""
+            try:
+                response = requests.get(url)
+                response.raise_for_status()
+            except Exception:
+                raise DerivaMLException(f"Invalid URL: {url}")
+            else:
+                sha256_hash = hashlib.sha256()
+                sha256_hash.update(response.content)
+                checksum = "SHA-256: " + sha256_hash.hexdigest()
+            return checksum
+        ml_schema_path = self.pathBuilder.schemas[self.ml_schema]
+        try:
+            url_column = ml_schema_path.Workflow.URL
+            workflow_record = list(
+                ml_schema_path.Workflow.filter(url_column == workflow.url).entities()
+            )[0]
+            workflow_rid = workflow_record["RID"]
+        except IndexError:
+            # Record doesn't exist already
+            workflow_record = {
+                "URL": workflow.url,
+                "Name": workflow.name,
+                "Description": workflow.description,
+                "Checksum": get_checksum(workflow.url),
+                "Version": workflow.version,
+                MLVocab.workflow_type: self.lookup_term(
+                    MLVocab.workflow_type, workflow.workflow_type
+                ).name,
+            }
+            workflow_rid = ml_schema_path.Workflow.insert([workflow_record])[0]["RID"]
+        except Exception as e:
+            error = format_exception(e)
+            raise DerivaMLException(f"Failed to insert workflow. Error: {error}")
+        return workflow_rid
     # @validate_call
     def create_execution(self, configuration: ExecutionConfiguration) -> "Execution":
         """Create an execution object
@@ -779,7 +864,13 @@ class DerivaML(Dataset):
         """
         from .execution import Execution
-        return Execution(configuration, self)
+        if self._execution:
+            DerivaMLException(
+                f"Only one execution can be created for a Deriva ML instance."
+            )
+        else:
+            self._execution = Execution(configuration, self)
+        return self._execution
     # @validate_call
     def restore_execution(self, execution_rid: Optional[RID] = None) -> "Execution":

{deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/execution.py RENAMED Viewed

@@ -1,13 +1,13 @@
+from __future__ import annotations
 from collections import defaultdict
 import csv
-import hashlib
 import json
 import logging
 import os
 import shutil
 from datetime import datetime
 from pathlib import Path
-import requests
 from tempfile import NamedTemporaryFile
 from typing import Iterable, Any, Optional
 from deriva.core import format_exception
@@ -96,7 +96,8 @@ class Execution:
         self.configuration = configuration
         self._ml_object = ml_object
         self.start_time = None
-        self.status = Status.pending
+        self.status = Status.created
+        self.uploaded_assets: list[Path] = []
         self.dataset_rids: list[RID] = []
         self.datasets: list[DatasetBag] = []
@@ -104,7 +105,27 @@ class Execution:
         self._working_dir = self._ml_object.working_dir
         self._cache_dir = self._ml_object.cache_dir
-        self.workflow_rid = self._add_workflow()
+        self.workflow_rid = self.configuration.workflow
+        if self._ml_object.resolve_rid(configuration.workflow).table.name != "Workflow":
+            raise DerivaMLException(
+                f"Workflow specified in execution configuration is not a Workflow"
+            )
+        for d in self.configuration.datasets:
+            if self._ml_object.resolve_rid(d.rid).table.name != "Dataset":
+                raise DerivaMLException(
+                    f"Dataset specified in execution configuration is not a dataset"
+                )
+        for a in self.configuration.assets:
+            if not self._ml_object.model.is_asset(
+                self._ml_object.resolve_rid(a).table.name
+            ):
+                raise DerivaMLException(
+                    f"Asset specified in execution configuration is not a asset table"
+                )
         schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
         if reload:
             self.execution_rid = reload
@@ -117,47 +138,10 @@ class Execution:
                     }
                 ]
             )[0]["RID"]
-        self._initialize_execution(reload)
-    def _add_workflow(self) -> RID:
-        """Add a workflow to the Workflow table.
-        Args:
-          - url(str): URL of the workflow.
-          - workflow_type(str): Type of the workflow.
-          - version(str): Version of the workflow.
-          - description(str): Description of the workflow.
-        Returns:
-          - str: Resource Identifier (RID) of the added workflow.
-        """
-        workflow = self.configuration.workflow
-        # Check to make sure that the workflow is not already in the table. If it's not, add it.
-        ml_schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
-        try:
-            url_column = ml_schema_path.Workflow.URL
-            workflow_record = list(
-                ml_schema_path.Workflow.filter(url_column == workflow.url).entities()
-            )[0]
-            workflow_rid = workflow_record["RID"]
-        except IndexError:
-            # Record doesn't exist already
-            workflow_record = {
-                "URL": workflow.url,
-                "Name": workflow.name,
-                "Description": workflow.description,
-                "Checksum": self._get_checksum(workflow.url),
-                "Version": workflow.version,
-                MLVocab.workflow_type: self._ml_object.lookup_term(
-                    MLVocab.workflow_type, workflow.workflow_type
-                ).name,
-            }
-            workflow_rid = ml_schema_path.Workflow.insert([workflow_record])[0]["RID"]
-        except Exception as e:
-            error = format_exception(e)
-            raise DerivaMLException(f"Failed to insert workflow. Error: {error}")
-        return workflow_rid
+        # Create a directory for execution rid so we can recover state in case of a crash.
+        execution_root(prefix=self._ml_object.working_dir, exec_rid=self.execution_rid)
+        self._initialize_execution(reload)
     def _save_runtime_environment(self):
@@ -186,7 +170,9 @@ class Execution:
         """
         # Materialize bdbag
         for dataset in self.configuration.datasets:
-            self.update_status(Status.running, f"Materialize bag {dataset.rid}... ")
+            self.update_status(
+                Status.initializing, f"Materialize bag {dataset.rid}... "
+            )
             self.datasets.append(self.download_dataset_bag(dataset))
             self.dataset_rids.append(dataset.rid)
         # Update execution info
@@ -199,13 +185,14 @@ class Execution:
                 ]
             )
-        # Download model
+        # Download assets....
         self.update_status(Status.running, "Downloading assets ...")
-        asset_path = self._asset_dir().as_posix()
         self.asset_paths = [
-            self._download_execution_file(file_rid=m, dest_dir=asset_path)
-            for m in self.configuration.assets
+            self._ml_object.download_asset(asset_rid=a, dest_dir=self._asset_dir())
+            for a in self.configuration.assets
         ]
+        if self.asset_paths and not reload:
+            self._update_execution_asset_table(self.configuration.assets)
         # Save configuration details for later upload
         exec_config_path = ExecMetadataVocab.execution_config.value
@@ -217,29 +204,7 @@ class Execution:
         self._save_runtime_environment()
         self.start_time = datetime.now()
-        self.update_status(Status.running, "Initialize status finished.")
-    @staticmethod
-    def _get_checksum(url) -> str:
-        """Get the checksum of a file from a URL.
-        Args:
-          url:
-        Returns:
-          str: Checksum of the file.
-        """
-        try:
-            response = requests.get(url)
-            response.raise_for_status()
-        except Exception:
-            raise DerivaMLException(f"Invalid URL: {url}")
-        else:
-            sha256_hash = hashlib.sha256()
-            sha256_hash.update(response.content)
-            checksum = "SHA-256: " + sha256_hash.hexdigest()
-        return checksum
+        self.update_status(Status.pending, "Initialize status finished.")
     @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
     def download_dataset_bag(self, dataset: DatasetSpec) -> DatasetBag:
@@ -276,8 +241,10 @@ class Execution:
     def execution_start(self) -> None:
         """ """
         self.start_time = datetime.now()
-        self.update_status(Status.running, f"Start ML algorithm ...")
+        self.uploaded_assets = None
+        self.update_status(Status.initializing, f"Start ML algorithm ...")
     def execution_stop(self) -> None:
         """Finish the execution and update the duration and status of execution."""
@@ -318,7 +285,15 @@ class Execution:
                 for r in results.values()
                 if r.state == UploadState.success and "Execution_Asset_Type" in r.result
             ]
+            execution_metadata = [
+                r.result["RID"]
+                for r in results.values()
+                if r.state == UploadState.success
+                and "Execution_Metadata_Type" in r.result
+            ]
             self._update_execution_asset_table(execution_assets)
+            self._update_execution_metadata_table(execution_metadata)
         except Exception as e:
             error = format_exception(e)
             self.update_status(Status.failed, error)
@@ -413,53 +388,6 @@ class Execution:
         path.mkdir(parents=True, exist_ok=True)
         return path
-    def _download_execution_file(self, file_rid: RID, dest_dir: str = "") -> Path:
-        """Download execution assets.
-        Args:
-            file_rid(str): Resource Identifier (RID) of the file.
-            dest_dir(str): Destination directory for the downloaded assets.
-        Returns:
-          - Path: Path to the downloaded asset.
-        Raises:
-          - DerivaMLException: If there is an issue downloading the assets.
-        """
-        table = self._ml_object.resolve_rid(file_rid).table
-        if not self._ml_object.model.is_asset(table):
-            raise DerivaMLException(f"Table {table} is not an asset table.")
-        pb = self._ml_object.pathBuilder
-        ml_schema_path = pb.schemas[self._ml_object.ml_schema]
-        tpath = pb.schemas[table.schema.name].tables[table.name]
-        file_metadata = list(tpath.filter(tpath.RID == file_rid).entities())[0]
-        file_url = file_metadata["URL"]
-        file_name = file_metadata["Filename"]
-        try:
-            self.update_status(Status.running, f"Downloading {table.name}...")
-            file_path = self._ml_object.download_asset(
-                file_url, str(dest_dir) + "/" + file_name
-            )
-        except Exception as e:
-            error = format_exception(e)
-            self.update_status(Status.failed, error)
-            raise DerivaMLException(
-                f"Failed to download the file {file_rid}. Error: {error}"
-            )
-        ass_table = table.name + "_Execution"
-        ass_table_path = ml_schema_path.tables[ass_table]
-        exec_file_exec_entities = ass_table_path.filter(
-            ass_table_path.columns[table.name] == file_rid
-        ).entities()
-        exec_list = [e["Execution"] for e in exec_file_exec_entities]
-        if self.execution_rid not in exec_list:
-            tpath = pb.schemas[self._ml_object.ml_schema].tables[ass_table]
-            tpath.insert([{table.name: file_rid, "Execution": self.execution_rid}])
-        self.update_status(Status.running, f"Successfully download {table.name}...")
-        return Path(file_path)
     def _clean_folder_contents(self, folder_path: Path):
         """
@@ -477,47 +405,6 @@ class Execution:
             error = format_exception(e)
             self.update_status(Status.failed, error)
-    # def _update_execution_metadata_table(
-    #     self, assets: dict[str, FileUploadState]
-    # ) -> None:
-    #     """Upload execution metadata at _working_dir/Execution_metadata.
-    #
-    #     Args:
-    #         assets: dict[str:
-    #         FileUploadState]:
-    #
-    #     Raises:
-    #       - DerivaMLException: If there is an issue uploading the metadata.
-    #     """
-    #     ml_schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
-    #     a_table = list(
-    #         self._ml_object.model.schemas[self._ml_object.ml_schema]
-    #         .tables["Execution_Metadata"]
-    #         .find_associations()
-    #     )[0].name
-    #
-    #     def asset_rid(asset) -> str:
-    #         """
-    #
-    #         Args:
-    #           asset:
-    #
-    #         Returns:
-    #
-    #         """
-    #         return (
-    #             asset.state == UploadState.success
-    #             and asset.result
-    #             and asset.result["RID"]
-    #         )
-    #
-    #     entities = [
-    #         {"Execution_Metadata": rid, "Execution": self.execution_rid}
-    #         for asset in assets.values()
-    #         if (rid := asset_rid(asset))
-    #     ]
-    #     ml_schema_path.tables[a_table].insert(entities)
     def _update_feature_table(
         self,
         target_table: str,
@@ -568,6 +455,15 @@ class Execution:
             entities = [map_path(e) for e in csv.DictReader(feature_values)]
         self._ml_object.domain_path.tables[feature_table].insert(entities)
+    def _update_execution_metadata_table(self, assets: list[RID]) -> None:
+        """Upload execution metadata at _working_dir/Execution_metadata."""
+        ml_schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
+        entities = [
+            {"Execution_Metadata": metadata_rid, "Execution": self.execution_rid}
+            for metadata_rid in assets
+        ]
+        ml_schema_path.Execution_Metadata_Execution.insert(entities)
     def _update_execution_asset_table(self, assets: list[RID]) -> None:
         """Assets associated with an execution must be linked to an execution entity after they are uploaded into
         the catalog. This routine takes a list of uploaded assets and makes that association.
@@ -576,17 +472,9 @@ class Execution:
             assets: list of RIDS for execution assets.:
         """
         ml_schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
-        asset_exec_entities = ml_schema_path.Execution_Asset_Execution.filter(
-            ml_schema_path.Execution_Asset_Execution.Execution == self.execution_rid
-        ).entities()
-        existing_assets = {e["Execution_Asset"] for e in asset_exec_entities}
-        # Now got through the list of recently added assets, and add an entry for this asset if it
-        # doesn't already exist.
         entities = [
             {"Execution_Asset": asset_rid, "Execution": self.execution_rid}
             for asset_rid in assets
-            if asset_rid not in existing_assets
         ]
         ml_schema_path.Execution_Asset_Execution.insert(entities)
@@ -741,13 +629,9 @@ class Execution:
             self._working_dir, schema=self._ml_object.domain_schema, table=table
         )
-    def execute(self) -> "DerivaMLExec":
-        """Generate a context manager for a DerivaML execution.
-        Returns:
-            A DerivaMLExec object
-        """
-        return DerivaMLExec(self)
+    def execute(self) -> Execution:
+        """Initiate an execution with provided configuration. Can be used in a context manager."""
+        return self
     @validate_call
     def write_feature_file(self, features: Iterable[FeatureRecord]) -> None:
@@ -801,20 +685,6 @@ class Execution:
         ]
         return "\n".join(items)
-class DerivaMLExec:
-    """Context manager for managing DerivaML execution.
-    Provides status updates.  For convenience, asset discovery and creation functions from the
-    Execution object are provided.
-    """
-    def __init__(self, execution: Execution):
-        self.execution = execution
-        self.execution_rid = execution.execution_rid
-        self.start_time = datetime.now()
-        self.uploaded_assets = None
     def __enter__(self):
         """
         Method invoked when entering the context.
@@ -823,7 +693,7 @@ class DerivaMLExec:
         - self: The instance itself.
         """
-        self.execution.execution_start()
+        self.execution_start()
         return self
     def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> bool:
@@ -839,11 +709,11 @@ class DerivaMLExec:
            bool: True if execution completed successfully, False otherwise.
         """
         if not exc_type:
-            self.execution.update_status(Status.running, "Successfully run Ml.")
-            self.execution.execution_stop()
+            self.update_status(Status.running, "Successfully run Ml.")
+            self.execution_stop()
             return True
         else:
-            self.execution.update_status(
+            self.update_status(
                 Status.failed,
                 f"Exception type: {exc_type}, Exception value: {exc_value}",
             )
@@ -851,54 +721,3 @@ class DerivaMLExec:
                 f"Exception type: {exc_type}, Exception value: {exc_value}, Exception traceback: {exc_tb}"
             )
             return False
-    def execution_asset_path(self, asset_type: str) -> Path:
-        """Return path to where execution assets of specified type should be placed.
-        Args:
-            asset_type: str:
-        Returns:
-            Path to the directory in which to place asset files.
-        """
-        return self.execution.execution_asset_path(asset_type)
-    def execution_metadata_path(self, metadata_type: str) -> Path:
-        """Return path to where execution metadata of specified type should be placed.
-        Args:
-            metadata_type: Term from metadata type vocabulary.
-        Returns:
-            Path to the directory in which to place metadata files.
-        """
-        return self.execution.execution_metadata_path(metadata_type)
-    def feature_paths(
-        self, table: Table | str, feature_name: str
-    ) -> tuple[Path, dict[str, Path]]:
-        """Return the file path of where to place feature values, and assets for the named feature and table.
-        A side effect of calling this routine is that the directories in which to place the feature values and assets
-        will be created
-        Args:
-            table: The table with which the feature is associated.
-            feature_name: Name of the feature
-        Returns:
-            A tuple whose first element is the path for the feature values and whose second element is a dictionary
-            of associated asset table names and corresponding paths.
-        """
-        return self.execution.feature_paths(table, feature_name)
-    def table_path(self, table: Table | str) -> Path:
-        """Path in the local file system for tables to be uploaded as part of the execution.
-        Args:
-          table: Table|str:
-        Returns:
-        """
-        return self.execution.table_path(table)

{deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/execution_configuration.py RENAMED Viewed

@@ -49,7 +49,7 @@ class ExecutionConfiguration(BaseModel):
     datasets: conlist(DatasetSpec) = []
     assets: list[RID | str] = []  # List of RIDs to model files.
-    workflow: Workflow
+    workflow: RID
     description: str = ""
     model_config = ConfigDict(arbitrary_types_allowed=True)

{deriva_ml-1.6.7 → deriva_ml-1.6.8/src/deriva_ml.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: deriva-ml
-Version: 1.6.7
+Version: 1.6.8
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10

deriva_ml-1.6.8/tests/test_execution.py ADDED Viewed

@@ -0,0 +1,148 @@
+from idlelib.run import manage_socket
+from derivaml_test import TestDerivaML
+from deriva_ml import (
+    MLVocab as vc,
+    Workflow,
+    ExecutionConfiguration,
+    DatasetSpec,
+    DerivaML,
+)
+class TestExecution(TestDerivaML):
+    def test_execution_no_download(self):
+        self.ml_instance.add_term(
+            vc.workflow_type,
+            "Manual Workflow",
+            description="Initial setup of Model File",
+        )
+        self.ml_instance.add_term(
+            vc.execution_asset_type,
+            "API_Model",
+            description="Model for our API workflow",
+        )
+        self.ml_instance.add_term(
+            vc.workflow_type,
+            "ML Demo",
+            description="A ML Workflow that uses Deriva ML API",
+        )
+        api_workflow = self.ml_instance.add_workflow(
+            Workflow(
+                name="Manual Workflow",
+                url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/tests/test_execution.py",
+                workflow_type="Manual Workflow",
+                description="A manual operation",
+            )
+        )
+        manual_execution = self.ml_instance.create_execution(
+            ExecutionConfiguration(
+                description="Sample Execution", workflow=api_workflow
+            )
+        )
+        with manual_execution as e:
+            pass
+        manual_execution.upload_execution_outputs()
+    def test_execution_download(self):
+        self.populate_catalog()
+        double_nested, nested, datasets = self.create_nested_dataset()
+        self.ml_instance.add_term(
+            vc.execution_asset_type,
+            "API_Model",
+            description="Model for our API workflow",
+        )
+        self.ml_instance.add_term(
+            vc.workflow_type,
+            "ML Demo",
+            description="A ML Workflow that uses Deriva ML API",
+        )
+        api_workflow = self.ml_instance.add_workflow(
+            Workflow(
+                name="ML Demo",
+                url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml",
+                workflow_type="ML Demo",
+                description="A workflow that uses Deriva ML",
+            )
+        )
+        execution_model = self.create_execution_asset(api_workflow)
+        config = ExecutionConfiguration(
+            datasets=[
+                DatasetSpec(
+                    rid=nested[0],
+                    version=self.ml_instance.dataset_version(nested[0]),
+                ),
+                DatasetSpec(
+                    rid=nested[1],
+                    version=self.ml_instance.dataset_version(nested[1]),
+                ),
+            ],
+            assets=[execution_model],
+            description="Sample Execution",
+            workflow=api_workflow,
+        )
+        exec = self.ml_instance.create_execution(config)
+        with exec as e:
+            print(e.asset_paths)
+            print(e.datasets)
+            self.assertEqual(1, len(e.asset_paths))
+            self.assertEqual(2, len(e.datasets))
+        exec.upload_execution_outputs()
+        pb = self.ml_instance.pathBuilder.schemas[self.ml_instance.ml_schema]
+        execution_asset_execution = pb.Execution_Asset_Execution
+        execution_metadata_execution = pb.Execution_Metadata_Execution
+        execution_asset = pb.Execution_Asset
+        execution_metadata = pb.Execution_Metadata
+        assets_execution = [
+            {
+                "RID": a["RID"],
+                "Execution_Asset": a["Execution_Asset"],
+                "Execution": a["Execution"],
+            }
+            for a in execution_asset_execution.entities().fetch()
+            if a["Execution"] == exec.execution_rid
+        ]
+        metadata_execution = [
+            {
+                "RID": a["RID"],
+                "Execution": a["Execution"],
+                "Execution_Metadata": a["Execution_Metadata"],
+            }
+            for a in execution_metadata_execution.entities().fetch()
+            if a["Execution"] == exec.execution_rid
+        ]
+        execution_assets = [
+            {"RID": a["RID"], "Filename": a["Filename"]}
+            for a in execution_asset.entities().fetch()
+        ]
+        execution_metadata = [
+            {"RID": a["RID"], "Filename": a["Filename"]}
+            for a in execution_metadata.entities().fetch()
+        ]
+        print(assets_execution)
+        print(metadata_execution)
+        print(execution_assets)
+        print(execution_metadata)
+        self.assertEqual(1, len(assets_execution))
+        self.assertEqual(2, len(metadata_execution))
+    def create_execution_asset(self, api_workflow):
+        manual_execution = self.ml_instance.create_execution(
+            ExecutionConfiguration(
+                description="Sample Execution", workflow=api_workflow
+            )
+        )
+        model_file = (
+            manual_execution.execution_asset_path("API_Model") / "modelfile.txt"
+        )
+        with open(model_file, "w") as fp:
+            fp.write(f"My model")
+        # Now upload the file and retrieve the RID of the new asset from the returned results.
+        uploaded_assets = manual_execution.upload_execution_outputs()
+        self.ml_instance._execution = None
+        return uploaded_assets["API_Model/modelfile.txt"].result["RID"]

{deriva_ml-1.6.7 → deriva_ml-1.6.8}/tests/test_upload.py RENAMED Viewed

@@ -71,13 +71,14 @@ class TestUpload(TestDerivaML):
             description="Model for our API workflow",
         )
-        api_workflow = Workflow(
-            name="Manual Workflow",
-            url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/tests/test_upload.py",
-            workflow_type="Manual Workflow",
-            description="A manual operation",
+        api_workflow = self.ml_instance.add_workflow(
+            Workflow(
+                name="Manual Workflow",
+                url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/tests/test_upload.py",
+                workflow_type="Manual Workflow",
+                description="A manual operation",
+            )
         )
         manual_execution = self.ml_instance.create_execution(
             ExecutionConfiguration(
                 description="Sample Execution", workflow=api_workflow
@@ -95,6 +96,7 @@ class TestUpload(TestDerivaML):
         uploaded_assets = manual_execution.upload_execution_outputs()
         path = self.ml_instance.catalog.getPathBuilder().schemas["deriva-ml"]
         self.assertEqual(1, len(list(path.Execution_Asset.entities().fetch())))
         execution_metadata = list(path.Execution_Metadata.entities().fetch())
-        print([m["Filename"] for m in execution_metadata])
+        print([m for m in execution_metadata])
         self.assertEqual(2, len(execution_metadata))

deriva_ml-1.6.7/src/deriva_ml/VERSION.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __version__ = "1.6.7"

deriva_ml-1.6.7/tests/test_execution.py DELETED Viewed

@@ -1,118 +0,0 @@
-from derivaml_test import TestDerivaML
-from deriva_ml import MLVocab as vc, Workflow, ExecutionConfiguration, DatasetSpec
-from deriva_ml.demo_catalog import (
-    reset_demo_catalog,
-    populate_demo_catalog,
-    create_demo_datasets,
-)
-class TestExecution(TestDerivaML):
-    def test_execution_no_download(self):
-        self.ml_instance.add_term(
-            vc.workflow_type,
-            "Manual Workflow",
-            description="Initial setup of Model File",
-        )
-        self.ml_instance.add_term(
-            vc.execution_asset_type,
-            "API_Model",
-            description="Model for our API workflow",
-        )
-        self.ml_instance.add_term(
-            vc.workflow_type,
-            "ML Demo",
-            description="A ML Workflow that uses Deriva ML API",
-        )
-        api_workflow = Workflow(
-            name="Manual Workflow",
-            url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/tests/test_execution.py",
-            workflow_type="Manual Workflow",
-            description="A manual operation",
-        )
-        manual_execution = self.ml_instance.create_execution(
-            ExecutionConfiguration(
-                description="Sample Execution", workflow=api_workflow
-            )
-        )
-        manual_execution.upload_execution_outputs()
-    def test_execution_download(self):
-        populate_demo_catalog(self.ml_instance, self.domain_schema)
-        create_demo_datasets(self.ml_instance)
-        exec_config = execution_test(self.ml_instance)
-        exec = self.ml_instance.create_execution(exec_config)
-def execution_test(ml_instance):
-    training_dataset_rid = [
-        ds["RID"]
-        for ds in ml_instance.find_datasets()
-        if "Training" in ds["Dataset_Type"]
-    ][0]
-    testing_dataset_rid = [
-        ds["RID"]
-        for ds in ml_instance.find_datasets()
-        if "Testing" in ds["Dataset_Type"]
-    ][0]
-    nested_dataset_rid = [
-        ds["RID"]
-        for ds in ml_instance.find_datasets()
-        if "Partitioned" in ds["Dataset_Type"]
-    ][0]
-    ml_instance.add_term(
-        vc.workflow_type, "Manual Workflow", description="Initial setup of Model File"
-    )
-    ml_instance.add_term(
-        vc.execution_asset_type, "API_Model", description="Model for our API workflow"
-    )
-    ml_instance.add_term(
-        vc.workflow_type, "ML Demo", description="A ML Workflow that uses Deriva ML API"
-    )
-    api_workflow = Workflow(
-        name="Manual Workflow",
-        url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/docs/Notebooks/DerivaML%20Execution.ipynb",
-        workflow_type="Manual Workflow",
-        description="A manual operation",
-    )
-    manual_execution = ml_instance.create_execution(
-        ExecutionConfiguration(description="Sample Execution", workflow=api_workflow)
-    )
-    # Now lets create model configuration for our program.
-    model_file = manual_execution.execution_asset_path("API_Model") / "modelfile.txt"
-    with open(model_file, "w") as fp:
-        fp.write(f"My model")
-    # Now upload the file and retrieve the RID of the new asset from the returned results.
-    uploaded_assets = manual_execution.upload_execution_outputs()
-    training_model_rid = uploaded_assets["API_Model/modelfile.txt"].result["RID"]
-    api_workflow = Workflow(
-        name="ML Demo",
-        url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml",
-        workflow_type="ML Demo",
-        description="A workflow that uses Deriva ML",
-    )
-    config = ExecutionConfiguration(
-        datasets=[
-            DatasetSpec(
-                rid=nested_dataset_rid,
-                version=ml_instance.dataset_version(nested_dataset_rid),
-            ),
-            DatasetSpec(
-                rid=testing_dataset_rid,
-                version=ml_instance.dataset_version(testing_dataset_rid),
-            ),
-        ],
-        assets=[training_model_rid],
-        description="Sample Execution",
-        workflow=api_workflow,
-    )
-    return config