PyPI - deriva-ml - Versions diffs - 1.8.11__py3-none-any.whl → 1.9.1__py3-none-any.whl - Mend

deriva-ml 1.8.11py3-none-any.whl → 1.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

deriva_ml/database_model.py +29 -7
deriva_ml/dataset.py +16 -13
deriva_ml/dataset_bag.py +1 -1
deriva_ml/demo_catalog.py +9 -8
deriva_ml/deriva_definitions.py +8 -3
deriva_ml/deriva_ml_base.py +62 -23
deriva_ml/deriva_model.py +2 -2
deriva_ml/execution.py +5 -4
deriva_ml/execution_configuration.py +20 -23
deriva_ml/schema_setup/annotations.py +1 -1
deriva_ml/schema_setup/create_schema.py +3 -2
deriva_ml/upload.py +1 -1
{deriva_ml-1.8.11.dist-info → deriva_ml-1.9.1.dist-info}/METADATA +11 -2
deriva_ml-1.9.1.dist-info/RECORD +27 -0
deriva_ml/build/lib/schema_setup/__init__.py +0 -0
deriva_ml/build/lib/schema_setup/alter_annotation.py +0 -36
deriva_ml/build/lib/schema_setup/annotation_temp.py +0 -255
deriva_ml/build/lib/schema_setup/create_schema.py +0 -165
deriva_ml/build/lib/schema_setup/table_comments_utils.py +0 -56
deriva_ml/schema_setup/alter_annotation.py +0 -55
deriva_ml-1.8.11.dist-info/RECORD +0 -33
{deriva_ml-1.8.11.dist-info → deriva_ml-1.9.1.dist-info}/WHEEL +0 -0
{deriva_ml-1.8.11.dist-info → deriva_ml-1.9.1.dist-info}/entry_points.txt +0 -0
{deriva_ml-1.8.11.dist-info → deriva_ml-1.9.1.dist-info}/licenses/LICENSE +0 -0
{deriva_ml-1.8.11.dist-info → deriva_ml-1.9.1.dist-info}/top_level.txt +0 -0

deriva_ml/database_model.py CHANGED Viewed

@@ -1,12 +1,15 @@
-"""Ths module constains the definition of the DatabaseModel class.  The role of this class is to provide an nterface between the BDBag representation
+"""Ths module contains the definition of the DatabaseModel class.  The role of this class is to provide an nterface between the BDBag representation
 of a dataset and a sqllite database in which the contents of the bag are stored.
 """
+from __future__ import annotations
 import logging
 import sqlite3
 from csv import reader
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any, Optional, Generator
 from urllib.parse import urlparse
 from deriva.core.ermrest_model import Model
@@ -20,7 +23,7 @@ from .dataset_bag import DatasetBag
 class DatabaseModelMeta(type):
     """Use metaclass to ensure that there is onl one instance per path"""
-    _paths_loaded: dict[Path:"DatabaseModel"] = {}
+    _paths_loaded: dict[Path, "DatabaseModel"] = {}
     def __call__(cls, *args, **kwargs):
         logger = logging.getLogger("deriva_ml")
@@ -47,7 +50,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
     Because of nested datasets, it's possible that more than one dataset rid is in a bag, or that a dataset rid might
     appear in more than one database. To help manage this, a global list of all the datasets that have been loaded
     into DatabaseModels, is kept in the class variable `_rid_map`.
     Because you can load diffent versions of a dataset simultaniously, the dataset RID and version number are tracked, and a new
     sqllite instance is created for every new dataset version present.
@@ -81,7 +84,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
         except KeyError:
             raise DerivaMLException(f"Dataset {dataset_rid} not found")
-    def __init__(self, minid: DatasetMinid, bag_path: Path):
+    def __init__(self, minid: DatasetMinid, bag_path: Path, dbase_path: Path):
         """Create a new DatabaseModel.
         Args:
@@ -92,8 +95,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
         self.bag_path = bag_path
         self.minid = minid
         self.dataset_rid = minid.dataset_rid
-        dir_path = bag_path.parent
-        self.dbase_file = dir_path / f"{minid.version_rid}.db"
+        self.dbase_file = dbase_path / f"{minid.version_rid}.db"
         self.dbase = sqlite3.connect(self.dbase_file)
         super().__init__(
@@ -315,6 +317,26 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
             )
         return datasets
+    def get_table_as_dict(self, table: str) -> Generator[dict[str, Any], None, None]:
+        """Retrieve the contents of the specified table as a dictionary.
+        Args:
+            table: Table to retrieve data from. f schema is not provided as part of the table name,
+                the method will attempt to locate the schema for the table.
+        Returns:
+          A generator producing dictionaries containing the contents of the specified table as name/value pairs.
+        """
+        table_name = self.normalize_table_name(table)
+        with self.dbase as dbase:
+            col_names = [
+                c[1]
+                for c in dbase.execute(f'PRAGMA table_info("{table_name}")').fetchall()
+            ]
+            result = self.dbase.execute(f'SELECT * FROM "{table_name}"')
+            while row := result.fetchone():
+                yield dict(zip(col_names, row))
     def normalize_table_name(self, table: str) -> str:
         """Attempt to insert the schema into a table name if it's not provided.

deriva_ml/dataset.py CHANGED Viewed

@@ -67,11 +67,12 @@ class Dataset:
     _Logger = logging.getLogger("deriva_ml")
-    def __init__(self, model: DerivaModel, cache_dir: Path):
+    def __init__(self, model: DerivaModel, cache_dir: Path, working_dir: Path):
         self._model = model
         self._ml_schema = ML_SCHEMA
         self.dataset_table = self._model.schemas[self._ml_schema].tables["Dataset"]
         self._cache_dir = cache_dir
+        self._working_dir = working_dir
         self._logger = logging.getLogger("deriva_ml")
     def _is_dataset_rid(self, dataset_rid: RID, deleted: bool = False) -> bool:
@@ -92,7 +93,7 @@ class Dataset:
         dataset_list: list[DatasetSpec],
         description: Optional[str] = "",
         execution_rid: Optional[RID] = None,
-    ) -> RID:
+    ) -> list[dict[str, Any]]:
         schema_path = self._model.catalog.getPathBuilder().schemas[self._ml_schema]
         # Construct version records for insert
@@ -245,7 +246,7 @@ class Dataset:
           DerivaMLException: if provided RID is not to a dataset_table.
         """
-        # Find all of the datasets that are reachable from this dataset and determine their new version numbers.
+        # Find all the datasets that are reachable from this dataset and determine their new version numbers.
         related_datasets = list(self._build_dataset_graph(dataset_rid=dataset_rid))
         version_update_list = [
             DatasetSpec(
@@ -254,7 +255,7 @@ class Dataset:
             )
             for ds_rid in related_datasets
         ]
-        updated_versions = self._insert_dataset_versions(
+        self._insert_dataset_versions(
             version_update_list, description=description, execution_rid=execution_rid
         )
         return [d.version for d in version_update_list if d.rid == dataset_rid][0]
@@ -751,9 +752,10 @@ class Dataset:
         ]
     def _table_paths(
-        self, dataset: DatasetSpec = None, snapshot_catalog: Optional[DerivaML] = None
+        self,
+        dataset: Optional[DatasetSpec] = None,
+        snapshot_catalog: Optional[DerivaML] = None,
     ) -> Iterator[tuple[str, str, Table]]:
         paths = self._collect_paths(dataset and dataset.rid, snapshot_catalog)
         def source_path(path: tuple[Table, ...]):
@@ -779,17 +781,19 @@ class Dataset:
     def _collect_paths(
         self,
         dataset_rid: Optional[RID] = None,
-        snapshot_catalog: Optional[DerivaML] = None,
+        snapshot: Optional[Dataset] = None,
         dataset_nesting_depth: Optional[int] = None,
     ) -> set[tuple[Table, ...]]:
+        snapshot_catalog = snapshot if snapshot else self
-        snapshot_catalog = snapshot_catalog or self
         dataset_table = snapshot_catalog._model.schemas[self._ml_schema].tables[
             "Dataset"
         ]
         dataset_dataset = snapshot_catalog._model.schemas[self._ml_schema].tables[
             "Dataset_Dataset"
         ]
+        # Figure out what types of elements the dataset contains.
         dataset_associations = [
             a
             for a in self.dataset_table.find_associations()
@@ -812,7 +816,8 @@ class Dataset:
             ]
         else:
             included_associations = dataset_associations
-        # Get the paths through the schema and filter out all of dataset paths not used by this dataset.
+        # Get the paths through the schema and filter out all the dataset paths not used by this dataset.
         paths = {
             tuple(p)
             for p in snapshot_catalog._model._schema_to_paths()
@@ -826,9 +831,7 @@ class Dataset:
         nested_paths = set()
         if dataset_rid:
             for c in snapshot_catalog.list_dataset_children(dataset_rid=dataset_rid):
-                nested_paths |= self._collect_paths(
-                    c, snapshot_catalog=snapshot_catalog
-                )
+                nested_paths |= self._collect_paths(c, snapshot=snapshot_catalog)
         else:
             # Initialize nesting depth if not already provided.
             dataset_nesting_depth = (
@@ -974,7 +977,7 @@ class Dataset:
             if dataset.materialize
             else self._download_dataset_minid(minid)
         )
-        return DatabaseModel(minid, bag_path).get_dataset()
+        return DatabaseModel(minid, bag_path, self._working_dir).get_dataset()
     def _version_snapshot(self, dataset: DatasetSpec) -> str:
         """Return a catalog with snapshot for the specified dataset version"""

deriva_ml/dataset_bag.py CHANGED Viewed

@@ -109,7 +109,7 @@ class DatasetBag:
         for ts, on in paths:
             tables = " JOIN ".join(ts)
             on_expression = " and ".join(
-                [f"{column_name(l)}={column_name(r)}" for l, r in on]
+                [f"{column_name(left)}={column_name(right)}" for left, right in on]
             )
             sql.append(
                 f"SELECT {select_args} FROM {tables} ON {on_expression} WHERE {dataset_table_name}.RID IN ({datasets})"

deriva_ml/demo_catalog.py CHANGED Viewed

@@ -5,6 +5,7 @@ import logging
 from random import random, randint
 import tempfile
 from tempfile import TemporaryDirectory
+from typing import Optional
 import itertools
 from deriva.config.acl_config import AclConfig
@@ -18,7 +19,6 @@ from requests import HTTPError
 from deriva_ml import (
     DerivaML,
     ExecutionConfiguration,
-    Workflow,
     MLVocab,
     BuiltinTypes,
     ColumnDefinition,
@@ -169,12 +169,9 @@ def create_demo_features(ml_instance):
         description="Model for our API workflow",
     )
-    api_workflow = ml_instance.add_workflow(
-        Workflow(
-            name="API Workflow",
-            url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml",
-            workflow_type="API Workflow",
-        )
+    api_workflow = ml_instance.create_workflow(
+        name="API Workflow",
+        workflow_type="API Workflow",
     )
     api_execution = ml_instance.create_execution(
@@ -322,7 +319,11 @@ def create_demo_catalog(
 class DemoML(DerivaML):
     def __init__(
-        self, hostname, catalog_id, cache_dir: str = None, working_dir: str = None
+        self,
+        hostname,
+        catalog_id,
+        cache_dir: Optional[str] = None,
+        working_dir: Optional[str] = None,
     ):
         super().__init__(
             hostname=hostname,

deriva_ml/deriva_definitions.py CHANGED Viewed

@@ -8,7 +8,7 @@ from enum import Enum
 from typing import Any, Iterable, Optional, Annotated
 import deriva.core.ermrest_model as em
-from urllib.parse import urlparse, urljoin
+from urllib.parse import urlparse
 from deriva.core.ermrest_model import builtin_types
 from pydantic import (
     BaseModel,
@@ -139,13 +139,18 @@ class FileSpec(BaseModel):
         if url_parts.scheme == "tag":
             return v
         elif not url_parts.scheme:
-            return f'tag://{gethostname()},{date.today()}:file://{v}'
+            return f"tag://{gethostname()},{date.today()}:file://{v}"
         else:
             raise ValidationError("url is not a file URL")
     @model_serializer()
     def serialize_filespec(self):
-        return {'URL': self.url, 'Description': self.description, 'MD5': self.md5, 'Length': self.length}
+        return {
+            "URL": self.url,
+            "Description": self.description,
+            "MD5": self.md5,
+            "Length": self.length,
+        }
 class VocabularyTerm(BaseModel):

deriva_ml/deriva_ml_base.py CHANGED Viewed

@@ -32,6 +32,7 @@ from deriva.core.deriva_server import DerivaServer
 from deriva.core.ermrest_catalog import ResolveRidResult
 from deriva.core.ermrest_model import Key, Table
 from deriva.core.hatrac_store import HatracStore
+from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
 from pydantic import validate_call, ConfigDict
 from requests import RequestException
@@ -70,17 +71,29 @@ except ImportError:  # Graceful fallback if IceCream isn't installed.
 try:
     from IPython import get_ipython
 except ImportError:  # Graceful fallback if IPython isn't installed.
-    get_ipython = lambda: None
+    def get_ipython():
+        """Dummy routine in case you are not running in IPython."""
+        return None
 try:
     from jupyter_server.serverapp import list_running_servers
 except ImportError:
-    list_running_servers = lambda: []
+    def list_running_servers():
+        """Dummy routine in case you are not running in Jupyter."""
+        return []
 try:
     from ipykernel import get_connection_file
 except ImportError:
-    get_connection_file = lambda: ""
+    def get_connection_file():
+        """Dummy routine in case you are not running in Jupyter."""
+        return ""
 if TYPE_CHECKING:
     from .execution import Execution
@@ -102,8 +115,8 @@ class DerivaML(Dataset):
         self,
         hostname: str,
         catalog_id: str | int,
-        domain_schema: str = None,
-        project_name: str = None,
+        domain_schema: Optional[str] = None,
+        project_name: Optional[str] = None,
         cache_dir: Optional[str] = None,
         working_dir: Optional[str] = None,
         model_version: str = "1",
@@ -150,7 +163,7 @@ class DerivaML(Dataset):
         self.cache_dir.mkdir(parents=True, exist_ok=True)
         # Initialize dataset class.
-        super().__init__(self.model, self.cache_dir)
+        super().__init__(self.model, self.cache_dir, self.working_dir)
         self._logger = logging.getLogger("deriva_ml")
         self._logger.setLevel(logging_level)
@@ -205,9 +218,8 @@ class DerivaML(Dataset):
         except subprocess.CalledProcessError:
             self._logger.error("nbstripout is not found.")
-    def _get_notebook_session(
-        self,
-    ) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
+    @staticmethod
+    def _get_notebook_session() -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
         """Return the absolute path of the current notebook."""
         # Get the kernel's connection file and extract the kernel ID
         try:
@@ -245,7 +257,7 @@ class DerivaML(Dataset):
     def _get_notebook_path(self) -> Path | None:
         """Return the absolute path of the current notebook."""
-        server, session = self._get_notebook_session()
+        server, session = DerivaML._get_notebook_session()
         if server and session:
             self._check_nbstrip_status()
             relative_path = session["notebook"]["path"]
@@ -267,7 +279,7 @@ class DerivaML(Dataset):
                 )  # Get the caller's filename, which is two up the stack from here.
             else:
                 raise DerivaMLException(
-                    f"Looking for caller failed"
+                    "Looking for caller failed"
                 )  # Stack is too shallow
         return filename, is_notebook
@@ -335,7 +347,7 @@ class DerivaML(Dataset):
         )
     def asset_dir(
-        self, table: str | Table, prefix: str | Path = None
+        self, table: str | Table, prefix: Optional[str | Path] = None
     ) -> UploadAssetDirectory:
         """Return a local file path in which to place a files for an asset table.  T
@@ -369,6 +381,29 @@ class DerivaML(Dataset):
         """
         return self.cache_dir if cached else self.working_dir
+    @staticmethod
+    def globus_login(host: str) -> None:
+        """Log  into the specified host using Globus.
+        Args:
+            host:
+        Returns:
+        """
+        gnl = GlobusNativeLogin(host=host)
+        if gnl.is_logged_in([host]):
+            print("You are already logged in.")
+        else:
+            gnl.login(
+                [host],
+                no_local_server=True,
+                no_browser=True,
+                refresh_tokens=True,
+                update_bdbag_keychain=True,
+            )
+            print("Login Successful")
     def chaise_url(self, table: RID | Table) -> str:
         """Return a Chaise URL to the specified table.
@@ -379,15 +414,15 @@ class DerivaML(Dataset):
         Returns:
             URL to the table in Chaise format.
         """
+        table_obj = self.model.name_to_table(table)
         try:
-            table = self.model.name_to_table(table)
             uri = self.catalog.get_server_uri().replace(
                 "ermrest/catalog/", "chaise/recordset/#"
             )
         except DerivaMLException:
             # Perhaps we have a RID....
             uri = self.cite(table)
-        return f"{uri}/{urlquote(table.schema.name)}:{urlquote(table.name)}"
+        return f"{uri}/{urlquote(table_obj.schema.name)}:{urlquote(table_obj.name)}"
     def cite(self, entity: dict | str) -> str:
         """Return a citation URL for the provided entity.
@@ -401,7 +436,9 @@ class DerivaML(Dataset):
         Raises:
             DerivaMLException: if provided RID does not exist.
         """
-        if entity.startswith(f"https://{self.host_name}/id/{self.catalog_id}/"):
+        if isinstance(entity, str) and entity.startswith(
+            f"https://{self.host_name}/id/{self.catalog_id}/"
+        ):
             # Already got a citation...
             return entity
         try:
@@ -498,9 +535,9 @@ class DerivaML(Dataset):
     def create_asset(
         self,
         asset_name: str,
-        column_defs: Iterable[ColumnDefinition] = None,
+        column_defs: Optional[Iterable[ColumnDefinition]] = None,
         comment: str = "",
-        schema: str = None,
+        schema: Optional[str] = None,
     ) -> Table:
         """Create an asset table with the given asset name.
@@ -532,9 +569,9 @@ class DerivaML(Dataset):
         self,
         target_table: Table | str,
         feature_name: str,
-        terms: list[Table | str] = None,
-        assets: list[Table | str] = None,
-        metadata: Iterable[ColumnDefinition | Table | Key | str] = None,
+        terms: Optional[list[Table | str]] = None,
+        assets: Optional[list[Table | str]] = None,
+        metadata: Optional[Iterable[ColumnDefinition | Table | Key | str]] = None,
         optional: Optional[list[str]] = None,
         comment: str = "",
     ) -> type[FeatureRecord]:
@@ -899,6 +936,7 @@ class DerivaML(Dataset):
         """
         def path_to_asset(path: str) -> str:
+            """Pull the asset name out of a path to that asset in the filesystem"""
             components = path.split("/")
             return components[
                 components.index("asset") + 2
@@ -963,6 +1001,7 @@ class DerivaML(Dataset):
             )
         def check_file_type(dtype: str) -> bool:
+            """Make sure that the specified string is either the name or synonym for a file type term."""
             for term in defined_types:
                 if dtype == term.name or (term.synonyms and file_type in term.synonyms):
                     return True
@@ -1098,7 +1137,7 @@ class DerivaML(Dataset):
     def create_workflow(
         self, name: str, workflow_type: str, description: str = "", create: bool = True
-    ) -> RID:
+    ) -> RID | None:
         """Identify current executing program and return a workflow RID for it
         Determine the notebook or script that is currently being executed. Assume that  this is
@@ -1166,7 +1205,7 @@ class DerivaML(Dataset):
             )
             github_url = result.stdout.strip().removesuffix(".git")
         except subprocess.CalledProcessError:
-            raise DerivaMLException(f"No GIT remote found")
+            raise DerivaMLException("No GIT remote found")
         # Find the root directory for the repository
         repo_root = self._get_git_root()
@@ -1188,7 +1227,7 @@ class DerivaML(Dataset):
         """Get SHA-1 hash of latest commit of the file in the repository"""
         result = subprocess.run(
-            ["git", "log", "-n", "1", "--pretty=format:%H" "--", self.executable_path],
+            ["git", "log", "-n", "1", "--pretty=format:%H--", self.executable_path],
             cwd=self.executable_path.parent,
             capture_output=True,
             text=True,

deriva_ml/deriva_model.py CHANGED Viewed

@@ -21,7 +21,7 @@ from .deriva_definitions import (
 from collections import Counter
 from pydantic import validate_call, ConfigDict
-from typing import Iterable
+from typing import Iterable, Optional
 class DerivaModel:
@@ -267,7 +267,7 @@ class DerivaModel:
     def _schema_to_paths(
         self,
         root: Table = None,
-        path: list[Table] = None,
+        path: Optional[list[Table]] = None,
     ) -> list[list[Table]]:
         """Recursively walk over the domain schema graph and extend the current path.

deriva_ml/execution.py CHANGED Viewed

@@ -54,7 +54,9 @@ except ImportError:  # Graceful fallback if IceCream isn't installed.
 try:
     from jupyter_server.serverapp import list_running_servers
 except ImportError:
-    list_running_servers = lambda: []
+    def list_running_servers():
+        return []
 class Execution:
@@ -155,7 +157,6 @@ class Execution:
         self._initialize_execution(reload)
     def _save_runtime_environment(self):
         runtime_env_path = ExecMetadataVocab.runtime_env.value
         runtime_env_dir = self.execution_metadata_path(runtime_env_path)
         with NamedTemporaryFile(
@@ -267,7 +268,7 @@ class Execution:
             # Execution metadata cannot be in a directory, so map path into filename.
             checkpoint_path = (
                 self.execution_metadata_path(ExecMetadataVocab.runtime_env.value)
-                / f"{notebook_name.replace('/','_')}.checkpoint"
+                / f"{notebook_name.replace('/', '_')}.checkpoint"
             )
             with open(checkpoint_path, "w", encoding="utf-8") as f:
                 json.dump(notebook_content, f)
@@ -359,7 +360,7 @@ class Execution:
             if m := is_feature_asset_dir(p):
                 try:
                     self.update_status(
-                        Status.running, f'Uploading feature {m["feature_name"]}...'
+                        Status.running, f"Uploading feature {m['feature_name']}..."
                     )
                     feature_assets[m["target_table"], m["feature_name"]] = (
                         self._ml_object.upload_assets(p)

deriva_ml/execution_configuration.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from __future__ import annotations
 import json
-from typing import Optional, Any
+from typing import Optional
 from pydantic import (
     BaseModel,
     conlist,
-    ConfigDict, field_validator,
+    ConfigDict,
 )
 from pathlib import Path
@@ -36,7 +36,6 @@ class Workflow(BaseModel):
     checksum: Optional[str]
 class ExecutionConfiguration(BaseModel):
     """Define the parameters that are used to configure a specific execution.
@@ -69,23 +68,21 @@ class ExecutionConfiguration(BaseModel):
             config = json.load(fd)
         return ExecutionConfiguration.model_validate(config)
-    def download_execution_configuration(
-        self, configuration_rid: RID
-    ) -> ExecutionConfiguration:
-        """Create an ExecutionConfiguration object from a catalog RID that points to a JSON representation of that
-        configuration in hatrac
-        Args:
-            configuration_rid: RID that should be to an asset table that refers to an execution configuration
-        Returns:
-            A ExecutionConfiguration object for configured by the parameters in the configuration file.
-        """
-        AssertionError("Not Implemented")
-        return ExecutionConfiguration.load_configuration(configuration_rid)
-        # configuration = self.retrieve_rid(configuration_rid)
-        # with NamedTemporaryFile("w+", delete=False, suffix=".json") as dest_file:
-        #    hs = HatracStore("https", self.host_name, self.credential)
-        #    hs.get_obj(path=configuration["URL"], destfilename=dest_file.name)
-        #    return ExecutionConfiguration.load_configuration(Path(dest_file.name))
+    # def download_execution_configuration(
+    #     self, configuration_rid: RID
+    # ) -> ExecutionConfiguration:
+    #     """Create an ExecutionConfiguration object from a catalog RID that points to a JSON representation of that
+    #     configuration in hatrac
+    #
+    #     Args:
+    #         configuration_rid: RID that should be to an asset table that refers to an execution configuration
+    #
+    #     Returns:
+    #         A ExecutionConfiguration object for configured by the parameters in the configuration file.
+    #     """
+    #     AssertionError("Not Implemented")
+    #     configuration = self.retrieve_rid(configuration_rid)
+    #     with NamedTemporaryFile("w+", delete=False, suffix=".json") as dest_file:
+    #         hs = HatracStore("https", self.host_name, self.credential)
+    #         hs.get_obj(path=configuration["URL"], destfilename=dest_file.name)
+    #         return ExecutionConfiguration.load_configuration(Path(dest_file.name))

deriva_ml/schema_setup/annotations.py CHANGED Viewed

@@ -240,7 +240,7 @@ def main():
     parser.add_argument("--catalog_id", type=str, required=True)
     parser.add_argument("--schema_name", type=str, required=True)
     args = parser.parse_args()
-    generate_annotation(args.catalog_id, args.schema_name)
+    generate_annotation(args.catalog_id)
 if __name__ == "__main__":

deriva_ml/schema_setup/create_schema.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import argparse
 import sys
+from typing import Optional
 from deriva.core import DerivaServer, get_credential
 from deriva.core.ermrest_model import Model
@@ -32,7 +33,7 @@ def define_table_workflow(workflow_annotation: dict):
     )
-def define_table_dataset(dataset_annotation: dict = None):
+def define_table_dataset(dataset_annotation: Optional[dict] = None):
     return Table.define(
         tname="Dataset",
         column_defs=[
@@ -154,7 +155,7 @@ def create_www_schema(model: Model):
 def create_ml_schema(
-    model: Model, schema_name: str = "deriva-ml", project_name: str = None
+    model: Model, schema_name: str = "deriva-ml", project_name: Optional[str] = None
 ):
     if model.schemas.get(schema_name):
         model.schemas[schema_name].drop(cascade=True)

deriva_ml/upload.py CHANGED Viewed

@@ -483,7 +483,7 @@ def upload_directory(
 @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
 def upload_asset(
-    model: DerivaModel, file: Path | str, table: Table | str, **kwargs: Any
+    model: DerivaModel, file: Path | str, table: Table, **kwargs: Any
 ) -> dict:
     """Upload the specified file into Hatrac and update the associated asset table.

deriva-ml 1.8.11__py3-none-any.whl → 1.9.1__py3-none-any.whl

deriva-ml 1.8.11py3-none-any.whl → 1.9.1py3-none-any.whl