PyPI - deriva-ml - Versions diffs - 1.8.11__tar.gz → 1.9.1__tar.gz - Mend

deriva-ml 1.8.11tar.gz → 1.9.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

{deriva_ml-1.8.11/src/deriva_ml.egg-info → deriva_ml-1.9.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deriva-ml
-Version: 1.8.11
+Version: 1.9.1
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10
@@ -16,5 +16,14 @@ Requires-Dist: setuptools-scm<=6.0
 Requires-Dist: nbstripout
 Dynamic: license-file
-Deriva-ML is a python libary to simplify the process of creating and executing reproducible machine learning workflows
+# DerivaML
+Deriva-ML is a python library to simplify the process of creating and executing reproducible machine learning workflows
 using a deriva catalog.
+## Installing the GitHub CLI
+The script release.sh will create a new release tag in GitHub.  This script requires the
+GitHUB CLI be installed.
+See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.

deriva_ml-1.9.1/README.md ADDED Viewed

@@ -0,0 +1,11 @@
+# DerivaML
+Deriva-ML is a python library to simplify the process of creating and executing reproducible machine learning workflows
+using a deriva catalog.
+## Installing the GitHub CLI
+The script release.sh will create a new release tag in GitHub.  This script requires the
+GitHUB CLI be installed.
+See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.

{deriva_ml-1.8.11 → deriva_ml-1.9.1}/docs/Notebooks/DerivaML Execution.ipynb RENAMED Viewed

@@ -28,7 +28,7 @@
    "source": [
     "import builtins\n",
     "from deriva.core.utils.globus_auth_utils import GlobusNativeLogin\n",
-    "from deriva_ml import ExecutionConfiguration, Workflow, MLVocab, DerivaSystemColumns\n",
+    "from deriva_ml import ExecutionConfiguration, MLVocab, DerivaSystemColumns\n",
     "from deriva_ml.demo_catalog import create_demo_catalog, DemoML\n",
     "from IPython.display import display, Markdown, JSON\n",
     "import itertools\n",
@@ -166,12 +166,11 @@
    "metadata": {},
    "cell_type": "code",
    "source": [
-    "ml_instance.add_term(MLVocab.workflow_type, \"Manual Workflow\", description=\"Inital setup of Model File\")\n",
+    "ml_instance.add_term(MLVocab.workflow_type, \"Manual Workflow\", description=\"Initial setup of Model File\")\n",
     "ml_instance.add_term(MLVocab.execution_asset_type, \"API_Model\", description=\"Model for our API workflow\")\n",
     "\n",
-    "api_workflow = Workflow(\n",
+    "api_workflow = ml_instance.create_workflow(\n",
     "    name=\"Manual Workflow\",\n",
-    "    url='https://github.com/informatics-isi-edu/deriva-ml/blob/main/docs/Notebooks/DerivaML%20Execution.ipynb',\n",
     "    workflow_type=\"Manual Workflow\",\n",
     "    description=\"A manual operation\"\n",
     ")\n",
@@ -207,13 +206,6 @@
    "source": [
     "ml_instance.add_term(MLVocab.workflow_type, \"ML Demo\", description=\"A ML Workflow that uses Deriva ML API\")\n",
     "\n",
-    "api_workflow = Workflow(\n",
-    "    name=\"ML Demo\",\n",
-    "    url=\"https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml\",\n",
-    "    workflow_type=\"ML Demo\",\n",
-    "    description=\"A workflow that uses Deriva ML\"\n",
-    ")\n",
-    "\n",
     "config = ExecutionConfiguration(\n",
     "    datasets=[training_dataset_rid, {'rid':testing_dataset_rid, 'materialize':False}],\n",
     "    assets = [training_model_rid],\n",

deriva_ml-1.9.1/docs/user-guide/execution-configuration.md ADDED Viewed

@@ -0,0 +1,26 @@
+# Configuring an execution
+One of the essential functions of DerivaML is to help keep track how ML model results are created so that hey can be shared and reproduced.
+Every execution in DerivaML is represented by an Execution object, whick keeps track of all of the paramemters associated with and execution and
+provides a number of functions that enable a program to help keep track of the configuation and results of a model execution.
+The first step in creating a DerivaML execution is to create an `ExectuionConfiguration`.
+The `ExecutionConfiguration` class is used to specify the inputs that go are to be used by an Execution.
+These inputs include
+* A list of datasets that are used
+* A list of other files (assets) that are to be used. This can include existing models, or any other infomration that the execution might need.
+* The actual code that is being executed.
+[`ExecutionConfiguration`][deriva_ml.execution_configuration.ExecutionConfiguration]  is a Pydantic dataclass.
+As part of initializing an execution, the assets and datasets in the configuration object are downloaded and cached.
+The datasets are provided as a list of DatasetSpecw which
+```DatasetSpec(dataset_rid:RID, version:DatasetVersion, materialize:bool)```
+it will be common to just want to use the latest version of the dataset, in which case you would use: `
+````
+deriva_nl = DerivaML(...)
+dataset_rid = ...
+datasets = [DatasetSpec(dataset_rid, version=deriva_ml.dataset_version(dataset_rid))]
+```
+If a dataset is large, downloading from the catalog might take a signficant amount of time.

{deriva_ml-1.8.11 → deriva_ml-1.9.1}/release.sh RENAMED Viewed

@@ -9,11 +9,12 @@ fi
 # Default version bump is patch unless specified (patch, minor, or major)
 VERSION_TYPE=${1:-patch}
 echo "Bumping version: $VERSION_TYPE"
 # Bump the version using bump-my-version.
 # This command should update version files, commit the changes, and create a Git tag.
-bump-my-version bump $VERSION_TYPE --verbose
+bump-my-version bump "$VERSION_TYPE" --verbose
 # Push commits and tags to the remote repository.
 echo "Pushing changes to remote repository..."
@@ -32,6 +33,6 @@ python -m build
 NEW_TAG=$(git describe --tags --abbrev=0)
 echo "New version tag: $NEW_TAG"
-twine upload dist/*${NEW_TAG}
+twine upload dist/*${NEW_TAG/v/}
 echo "Release process complete!"

{deriva_ml-1.8.11 → deriva_ml-1.9.1}/src/deriva_ml/database_model.py RENAMED Viewed

@@ -1,12 +1,15 @@
-"""Ths module constains the definition of the DatabaseModel class.  The role of this class is to provide an nterface between the BDBag representation
+"""Ths module contains the definition of the DatabaseModel class.  The role of this class is to provide an nterface between the BDBag representation
 of a dataset and a sqllite database in which the contents of the bag are stored.
 """
+from __future__ import annotations
 import logging
 import sqlite3
 from csv import reader
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any, Optional, Generator
 from urllib.parse import urlparse
 from deriva.core.ermrest_model import Model
@@ -20,7 +23,7 @@ from .dataset_bag import DatasetBag
 class DatabaseModelMeta(type):
     """Use metaclass to ensure that there is onl one instance per path"""
-    _paths_loaded: dict[Path:"DatabaseModel"] = {}
+    _paths_loaded: dict[Path, "DatabaseModel"] = {}
     def __call__(cls, *args, **kwargs):
         logger = logging.getLogger("deriva_ml")
@@ -47,7 +50,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
     Because of nested datasets, it's possible that more than one dataset rid is in a bag, or that a dataset rid might
     appear in more than one database. To help manage this, a global list of all the datasets that have been loaded
     into DatabaseModels, is kept in the class variable `_rid_map`.
     Because you can load diffent versions of a dataset simultaniously, the dataset RID and version number are tracked, and a new
     sqllite instance is created for every new dataset version present.
@@ -81,7 +84,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
         except KeyError:
             raise DerivaMLException(f"Dataset {dataset_rid} not found")
-    def __init__(self, minid: DatasetMinid, bag_path: Path):
+    def __init__(self, minid: DatasetMinid, bag_path: Path, dbase_path: Path):
         """Create a new DatabaseModel.
         Args:
@@ -92,8 +95,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
         self.bag_path = bag_path
         self.minid = minid
         self.dataset_rid = minid.dataset_rid
-        dir_path = bag_path.parent
-        self.dbase_file = dir_path / f"{minid.version_rid}.db"
+        self.dbase_file = dbase_path / f"{minid.version_rid}.db"
         self.dbase = sqlite3.connect(self.dbase_file)
         super().__init__(
@@ -315,6 +317,26 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
             )
         return datasets
+    def get_table_as_dict(self, table: str) -> Generator[dict[str, Any], None, None]:
+        """Retrieve the contents of the specified table as a dictionary.
+        Args:
+            table: Table to retrieve data from. f schema is not provided as part of the table name,
+                the method will attempt to locate the schema for the table.
+        Returns:
+          A generator producing dictionaries containing the contents of the specified table as name/value pairs.
+        """
+        table_name = self.normalize_table_name(table)
+        with self.dbase as dbase:
+            col_names = [
+                c[1]
+                for c in dbase.execute(f'PRAGMA table_info("{table_name}")').fetchall()
+            ]
+            result = self.dbase.execute(f'SELECT * FROM "{table_name}"')
+            while row := result.fetchone():
+                yield dict(zip(col_names, row))
     def normalize_table_name(self, table: str) -> str:
         """Attempt to insert the schema into a table name if it's not provided.

{deriva_ml-1.8.11 → deriva_ml-1.9.1}/src/deriva_ml/dataset.py RENAMED Viewed

@@ -67,11 +67,12 @@ class Dataset:
     _Logger = logging.getLogger("deriva_ml")
-    def __init__(self, model: DerivaModel, cache_dir: Path):
+    def __init__(self, model: DerivaModel, cache_dir: Path, working_dir: Path):
         self._model = model
         self._ml_schema = ML_SCHEMA
         self.dataset_table = self._model.schemas[self._ml_schema].tables["Dataset"]
         self._cache_dir = cache_dir
+        self._working_dir = working_dir
         self._logger = logging.getLogger("deriva_ml")
     def _is_dataset_rid(self, dataset_rid: RID, deleted: bool = False) -> bool:
@@ -92,7 +93,7 @@ class Dataset:
         dataset_list: list[DatasetSpec],
         description: Optional[str] = "",
         execution_rid: Optional[RID] = None,
-    ) -> RID:
+    ) -> list[dict[str, Any]]:
         schema_path = self._model.catalog.getPathBuilder().schemas[self._ml_schema]
         # Construct version records for insert
@@ -245,7 +246,7 @@ class Dataset:
           DerivaMLException: if provided RID is not to a dataset_table.
         """
-        # Find all of the datasets that are reachable from this dataset and determine their new version numbers.
+        # Find all the datasets that are reachable from this dataset and determine their new version numbers.
         related_datasets = list(self._build_dataset_graph(dataset_rid=dataset_rid))
         version_update_list = [
             DatasetSpec(
@@ -254,7 +255,7 @@ class Dataset:
             )
             for ds_rid in related_datasets
         ]
-        updated_versions = self._insert_dataset_versions(
+        self._insert_dataset_versions(
             version_update_list, description=description, execution_rid=execution_rid
         )
         return [d.version for d in version_update_list if d.rid == dataset_rid][0]
@@ -751,9 +752,10 @@ class Dataset:
         ]
     def _table_paths(
-        self, dataset: DatasetSpec = None, snapshot_catalog: Optional[DerivaML] = None
+        self,
+        dataset: Optional[DatasetSpec] = None,
+        snapshot_catalog: Optional[DerivaML] = None,
     ) -> Iterator[tuple[str, str, Table]]:
         paths = self._collect_paths(dataset and dataset.rid, snapshot_catalog)
         def source_path(path: tuple[Table, ...]):
@@ -779,17 +781,19 @@ class Dataset:
     def _collect_paths(
         self,
         dataset_rid: Optional[RID] = None,
-        snapshot_catalog: Optional[DerivaML] = None,
+        snapshot: Optional[Dataset] = None,
         dataset_nesting_depth: Optional[int] = None,
     ) -> set[tuple[Table, ...]]:
+        snapshot_catalog = snapshot if snapshot else self
-        snapshot_catalog = snapshot_catalog or self
         dataset_table = snapshot_catalog._model.schemas[self._ml_schema].tables[
             "Dataset"
         ]
         dataset_dataset = snapshot_catalog._model.schemas[self._ml_schema].tables[
             "Dataset_Dataset"
         ]
+        # Figure out what types of elements the dataset contains.
         dataset_associations = [
             a
             for a in self.dataset_table.find_associations()
@@ -812,7 +816,8 @@ class Dataset:
             ]
         else:
             included_associations = dataset_associations
-        # Get the paths through the schema and filter out all of dataset paths not used by this dataset.
+        # Get the paths through the schema and filter out all the dataset paths not used by this dataset.
         paths = {
             tuple(p)
             for p in snapshot_catalog._model._schema_to_paths()
@@ -826,9 +831,7 @@ class Dataset:
         nested_paths = set()
         if dataset_rid:
             for c in snapshot_catalog.list_dataset_children(dataset_rid=dataset_rid):
-                nested_paths |= self._collect_paths(
-                    c, snapshot_catalog=snapshot_catalog
-                )
+                nested_paths |= self._collect_paths(c, snapshot=snapshot_catalog)
         else:
             # Initialize nesting depth if not already provided.
             dataset_nesting_depth = (
@@ -974,7 +977,7 @@ class Dataset:
             if dataset.materialize
             else self._download_dataset_minid(minid)
         )
-        return DatabaseModel(minid, bag_path).get_dataset()
+        return DatabaseModel(minid, bag_path, self._working_dir).get_dataset()
     def _version_snapshot(self, dataset: DatasetSpec) -> str:
         """Return a catalog with snapshot for the specified dataset version"""

{deriva_ml-1.8.11 → deriva_ml-1.9.1}/src/deriva_ml/dataset_bag.py RENAMED Viewed

@@ -109,7 +109,7 @@ class DatasetBag:
         for ts, on in paths:
             tables = " JOIN ".join(ts)
             on_expression = " and ".join(
-                [f"{column_name(l)}={column_name(r)}" for l, r in on]
+                [f"{column_name(left)}={column_name(right)}" for left, right in on]
             )
             sql.append(
                 f"SELECT {select_args} FROM {tables} ON {on_expression} WHERE {dataset_table_name}.RID IN ({datasets})"

{deriva_ml-1.8.11 → deriva_ml-1.9.1}/src/deriva_ml/demo_catalog.py RENAMED Viewed

@@ -5,6 +5,7 @@ import logging
 from random import random, randint
 import tempfile
 from tempfile import TemporaryDirectory
+from typing import Optional
 import itertools
 from deriva.config.acl_config import AclConfig
@@ -18,7 +19,6 @@ from requests import HTTPError
 from deriva_ml import (
     DerivaML,
     ExecutionConfiguration,
-    Workflow,
     MLVocab,
     BuiltinTypes,
     ColumnDefinition,
@@ -169,12 +169,9 @@ def create_demo_features(ml_instance):
         description="Model for our API workflow",
     )
-    api_workflow = ml_instance.add_workflow(
-        Workflow(
-            name="API Workflow",
-            url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml",
-            workflow_type="API Workflow",
-        )
+    api_workflow = ml_instance.create_workflow(
+        name="API Workflow",
+        workflow_type="API Workflow",
     )
     api_execution = ml_instance.create_execution(
@@ -322,7 +319,11 @@ def create_demo_catalog(
 class DemoML(DerivaML):
     def __init__(
-        self, hostname, catalog_id, cache_dir: str = None, working_dir: str = None
+        self,
+        hostname,
+        catalog_id,
+        cache_dir: Optional[str] = None,
+        working_dir: Optional[str] = None,
     ):
         super().__init__(
             hostname=hostname,

{deriva_ml-1.8.11 → deriva_ml-1.9.1}/src/deriva_ml/deriva_definitions.py RENAMED Viewed

@@ -8,7 +8,7 @@ from enum import Enum
 from typing import Any, Iterable, Optional, Annotated
 import deriva.core.ermrest_model as em
-from urllib.parse import urlparse, urljoin
+from urllib.parse import urlparse
 from deriva.core.ermrest_model import builtin_types
 from pydantic import (
     BaseModel,
@@ -139,13 +139,18 @@ class FileSpec(BaseModel):
         if url_parts.scheme == "tag":
             return v
         elif not url_parts.scheme:
-            return f'tag://{gethostname()},{date.today()}:file://{v}'
+            return f"tag://{gethostname()},{date.today()}:file://{v}"
         else:
             raise ValidationError("url is not a file URL")
     @model_serializer()
     def serialize_filespec(self):
-        return {'URL': self.url, 'Description': self.description, 'MD5': self.md5, 'Length': self.length}
+        return {
+            "URL": self.url,
+            "Description": self.description,
+            "MD5": self.md5,
+            "Length": self.length,
+        }
 class VocabularyTerm(BaseModel):

{deriva_ml-1.8.11 → deriva_ml-1.9.1}/src/deriva_ml/deriva_ml_base.py RENAMED Viewed

@@ -32,6 +32,7 @@ from deriva.core.deriva_server import DerivaServer
 from deriva.core.ermrest_catalog import ResolveRidResult
 from deriva.core.ermrest_model import Key, Table
 from deriva.core.hatrac_store import HatracStore
+from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
 from pydantic import validate_call, ConfigDict
 from requests import RequestException
@@ -70,17 +71,29 @@ except ImportError:  # Graceful fallback if IceCream isn't installed.
 try:
     from IPython import get_ipython
 except ImportError:  # Graceful fallback if IPython isn't installed.
-    get_ipython = lambda: None
+    def get_ipython():
+        """Dummy routine in case you are not running in IPython."""
+        return None
 try:
     from jupyter_server.serverapp import list_running_servers
 except ImportError:
-    list_running_servers = lambda: []
+    def list_running_servers():
+        """Dummy routine in case you are not running in Jupyter."""
+        return []
 try:
     from ipykernel import get_connection_file
 except ImportError:
-    get_connection_file = lambda: ""
+    def get_connection_file():
+        """Dummy routine in case you are not running in Jupyter."""
+        return ""
 if TYPE_CHECKING:
     from .execution import Execution
@@ -102,8 +115,8 @@ class DerivaML(Dataset):
         self,
         hostname: str,
         catalog_id: str | int,
-        domain_schema: str = None,
-        project_name: str = None,
+        domain_schema: Optional[str] = None,
+        project_name: Optional[str] = None,
         cache_dir: Optional[str] = None,
         working_dir: Optional[str] = None,
         model_version: str = "1",
@@ -150,7 +163,7 @@ class DerivaML(Dataset):
         self.cache_dir.mkdir(parents=True, exist_ok=True)
         # Initialize dataset class.
-        super().__init__(self.model, self.cache_dir)
+        super().__init__(self.model, self.cache_dir, self.working_dir)
         self._logger = logging.getLogger("deriva_ml")
         self._logger.setLevel(logging_level)
@@ -205,9 +218,8 @@ class DerivaML(Dataset):
         except subprocess.CalledProcessError:
             self._logger.error("nbstripout is not found.")
-    def _get_notebook_session(
-        self,
-    ) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
+    @staticmethod
+    def _get_notebook_session() -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
         """Return the absolute path of the current notebook."""
         # Get the kernel's connection file and extract the kernel ID
         try:
@@ -245,7 +257,7 @@ class DerivaML(Dataset):
     def _get_notebook_path(self) -> Path | None:
         """Return the absolute path of the current notebook."""
-        server, session = self._get_notebook_session()
+        server, session = DerivaML._get_notebook_session()
         if server and session:
             self._check_nbstrip_status()
             relative_path = session["notebook"]["path"]
@@ -267,7 +279,7 @@ class DerivaML(Dataset):
                 )  # Get the caller's filename, which is two up the stack from here.
             else:
                 raise DerivaMLException(
-                    f"Looking for caller failed"
+                    "Looking for caller failed"
                 )  # Stack is too shallow
         return filename, is_notebook
@@ -335,7 +347,7 @@ class DerivaML(Dataset):
         )
     def asset_dir(
-        self, table: str | Table, prefix: str | Path = None
+        self, table: str | Table, prefix: Optional[str | Path] = None
     ) -> UploadAssetDirectory:
         """Return a local file path in which to place a files for an asset table.  T
@@ -369,6 +381,29 @@ class DerivaML(Dataset):
         """
         return self.cache_dir if cached else self.working_dir
+    @staticmethod
+    def globus_login(host: str) -> None:
+        """Log  into the specified host using Globus.
+        Args:
+            host:
+        Returns:
+        """
+        gnl = GlobusNativeLogin(host=host)
+        if gnl.is_logged_in([host]):
+            print("You are already logged in.")
+        else:
+            gnl.login(
+                [host],
+                no_local_server=True,
+                no_browser=True,
+                refresh_tokens=True,
+                update_bdbag_keychain=True,
+            )
+            print("Login Successful")
     def chaise_url(self, table: RID | Table) -> str:
         """Return a Chaise URL to the specified table.
@@ -379,15 +414,15 @@ class DerivaML(Dataset):
         Returns:
             URL to the table in Chaise format.
         """
+        table_obj = self.model.name_to_table(table)
         try:
-            table = self.model.name_to_table(table)
             uri = self.catalog.get_server_uri().replace(
                 "ermrest/catalog/", "chaise/recordset/#"
             )
         except DerivaMLException:
             # Perhaps we have a RID....
             uri = self.cite(table)
-        return f"{uri}/{urlquote(table.schema.name)}:{urlquote(table.name)}"
+        return f"{uri}/{urlquote(table_obj.schema.name)}:{urlquote(table_obj.name)}"
     def cite(self, entity: dict | str) -> str:
         """Return a citation URL for the provided entity.
@@ -401,7 +436,9 @@ class DerivaML(Dataset):
         Raises:
             DerivaMLException: if provided RID does not exist.
         """
-        if entity.startswith(f"https://{self.host_name}/id/{self.catalog_id}/"):
+        if isinstance(entity, str) and entity.startswith(
+            f"https://{self.host_name}/id/{self.catalog_id}/"
+        ):
             # Already got a citation...
             return entity
         try:
@@ -498,9 +535,9 @@ class DerivaML(Dataset):
     def create_asset(
         self,
         asset_name: str,
-        column_defs: Iterable[ColumnDefinition] = None,
+        column_defs: Optional[Iterable[ColumnDefinition]] = None,
         comment: str = "",
-        schema: str = None,
+        schema: Optional[str] = None,
     ) -> Table:
         """Create an asset table with the given asset name.
@@ -532,9 +569,9 @@ class DerivaML(Dataset):
         self,
         target_table: Table | str,
         feature_name: str,
-        terms: list[Table | str] = None,
-        assets: list[Table | str] = None,
-        metadata: Iterable[ColumnDefinition | Table | Key | str] = None,
+        terms: Optional[list[Table | str]] = None,
+        assets: Optional[list[Table | str]] = None,
+        metadata: Optional[Iterable[ColumnDefinition | Table | Key | str]] = None,
         optional: Optional[list[str]] = None,
         comment: str = "",
     ) -> type[FeatureRecord]:
@@ -899,6 +936,7 @@ class DerivaML(Dataset):
         """
         def path_to_asset(path: str) -> str:
+            """Pull the asset name out of a path to that asset in the filesystem"""
             components = path.split("/")
             return components[
                 components.index("asset") + 2
@@ -963,6 +1001,7 @@ class DerivaML(Dataset):
             )
         def check_file_type(dtype: str) -> bool:
+            """Make sure that the specified string is either the name or synonym for a file type term."""
             for term in defined_types:
                 if dtype == term.name or (term.synonyms and file_type in term.synonyms):
                     return True
@@ -1098,7 +1137,7 @@ class DerivaML(Dataset):
     def create_workflow(
         self, name: str, workflow_type: str, description: str = "", create: bool = True
-    ) -> RID:
+    ) -> RID | None:
         """Identify current executing program and return a workflow RID for it
         Determine the notebook or script that is currently being executed. Assume that  this is
@@ -1166,7 +1205,7 @@ class DerivaML(Dataset):
             )
             github_url = result.stdout.strip().removesuffix(".git")
         except subprocess.CalledProcessError:
-            raise DerivaMLException(f"No GIT remote found")
+            raise DerivaMLException("No GIT remote found")
         # Find the root directory for the repository
         repo_root = self._get_git_root()
@@ -1188,7 +1227,7 @@ class DerivaML(Dataset):
         """Get SHA-1 hash of latest commit of the file in the repository"""
         result = subprocess.run(
-            ["git", "log", "-n", "1", "--pretty=format:%H" "--", self.executable_path],
+            ["git", "log", "-n", "1", "--pretty=format:%H--", self.executable_path],
             cwd=self.executable_path.parent,
             capture_output=True,
             text=True,

{deriva_ml-1.8.11 → deriva_ml-1.9.1}/src/deriva_ml/deriva_model.py RENAMED Viewed

@@ -21,7 +21,7 @@ from .deriva_definitions import (
 from collections import Counter
 from pydantic import validate_call, ConfigDict
-from typing import Iterable
+from typing import Iterable, Optional
 class DerivaModel:
@@ -267,7 +267,7 @@ class DerivaModel:
     def _schema_to_paths(
         self,
         root: Table = None,
-        path: list[Table] = None,
+        path: Optional[list[Table]] = None,
     ) -> list[list[Table]]:
         """Recursively walk over the domain schema graph and extend the current path.

{deriva_ml-1.8.11 → deriva_ml-1.9.1}/src/deriva_ml/execution.py RENAMED Viewed

@@ -54,7 +54,9 @@ except ImportError:  # Graceful fallback if IceCream isn't installed.
 try:
     from jupyter_server.serverapp import list_running_servers
 except ImportError:
-    list_running_servers = lambda: []
+    def list_running_servers():
+        return []
 class Execution:
@@ -155,7 +157,6 @@ class Execution:
         self._initialize_execution(reload)
     def _save_runtime_environment(self):
         runtime_env_path = ExecMetadataVocab.runtime_env.value
         runtime_env_dir = self.execution_metadata_path(runtime_env_path)
         with NamedTemporaryFile(
@@ -267,7 +268,7 @@ class Execution:
             # Execution metadata cannot be in a directory, so map path into filename.
             checkpoint_path = (
                 self.execution_metadata_path(ExecMetadataVocab.runtime_env.value)
-                / f"{notebook_name.replace('/','_')}.checkpoint"
+                / f"{notebook_name.replace('/', '_')}.checkpoint"
             )
             with open(checkpoint_path, "w", encoding="utf-8") as f:
                 json.dump(notebook_content, f)
@@ -359,7 +360,7 @@ class Execution:
             if m := is_feature_asset_dir(p):
                 try:
                     self.update_status(
-                        Status.running, f'Uploading feature {m["feature_name"]}...'
+                        Status.running, f"Uploading feature {m['feature_name']}..."
                     )
                     feature_assets[m["target_table"], m["feature_name"]] = (
                         self._ml_object.upload_assets(p)

deriva-ml 1.8.11__tar.gz → 1.9.1__tar.gz

deriva-ml 1.8.11tar.gz → 1.9.1tar.gz