PyPI - deriva-ml - Versions diffs - 1.7.0__tar.gz → 1.8.1__tar.gz - Mend

deriva-ml 1.7.0tar.gz → 1.8.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{deriva_ml-1.7.0/src/deriva_ml.egg-info → deriva_ml-1.8.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: deriva-ml
-Version: 1.7.0
+Version: 1.8.1
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10

deriva_ml-1.8.1/src/deriva_ml/VERSION.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "1.8.1"

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/__init__.py RENAMED Viewed

@@ -2,6 +2,7 @@ __all__ = [
     "DerivaML",
     "DerivaMLException",
     "FileUploadState",
+    "FileSpec",
     "ExecutionConfiguration",
     "Workflow",
     "DatasetBag",
@@ -26,6 +27,7 @@ from .deriva_definitions import (
     BuiltinTypes,
     UploadState,
     FileUploadState,
+    FileSpec,
     RID,
     DerivaMLException,
     MLVocab,

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/dataset.py RENAMED Viewed

@@ -75,9 +75,10 @@ class Dataset:
             rid_info = self._model.catalog.resolve_rid(dataset_rid, self._model.model)
         except KeyError as _e:
             raise DerivaMLException(f"Invalid RID {dataset_rid}")
-        # Got a dataset rid. Now check to see if its deleted or not.
-        if deleted:
+        if rid_info.table != self.dataset_table:
+            return False
+        elif deleted:
+            # Got a dataset rid. Now check to see if its deleted or not.
             return True
         else:
             return not list(rid_info.datapath.entities().fetch())[0]["Deleted"]
@@ -293,7 +294,7 @@ class Dataset:
         pb = self._model.catalog.getPathBuilder()
         for ds_type in ds_types:
             if not check_dataset_type(ds_type):
-                raise DerivaMLException(f"Dataset type must be a vocabulary term.")
+                raise DerivaMLException("Dataset type must be a vocabulary term.")
         dataset_table_path = pb.schemas[self.dataset_table.schema.name].tables[
             self.dataset_table.name
         ]
@@ -444,7 +445,7 @@ class Dataset:
         self._model.model.apply()
         return table
-    @validate_call
+    # @validate_call
     def list_dataset_members(
         self, dataset_rid: RID, recurse: bool = False
     ) -> dict[str, list[dict[str, Any]]]:
@@ -469,34 +470,27 @@ class Dataset:
         pb = self._model.catalog.getPathBuilder()
         for assoc_table in self.dataset_table.find_associations():
             other_fkey = assoc_table.other_fkeys.pop()
-            self_fkey = assoc_table.self_fkey
             target_table = other_fkey.pk_table
             member_table = assoc_table.table
+            # Look at domain tables and nested datasets.
             if (
                 target_table.schema.name != self._model.domain_schema
                 and target_table != self.dataset_table
             ):
-                # Look at domain tables and nested datasets.
                 continue
-            if target_table == self.dataset_table:
-                # find_assoc gives us the keys in the wrong position, so swap.
-                self_fkey, other_fkey = other_fkey, self_fkey
+            member_column = (
+                "Nested_Dataset"
+                if target_table == self.dataset_table
+                else other_fkey.foreign_key_columns[0].name
+            )
             target_path = pb.schemas[target_table.schema.name].tables[target_table.name]
             member_path = pb.schemas[member_table.schema.name].tables[member_table.name]
-            # Get the names of the columns that we are going to need for linking
-            member_link = tuple(
-                c.name for c in next(iter(other_fkey.column_map.items()))
-            )
-            path = pb.schemas[member_table.schema.name].tables[member_table.name].path
-            path.filter(member_path.Dataset == dataset_rid)
-            path.link(
+            path = member_path.filter(member_path.Dataset == dataset_rid).link(
                 target_path,
-                on=(
-                    member_path.columns[member_link[0]]
-                    == target_path.columns[member_link[1]]
-                ),
+                on=(member_path.columns[member_column] == target_path.columns["RID"]),
             )
             target_entities = list(path.entities().fetch())
             members[target_table.name].extend(target_entities)
@@ -747,9 +741,9 @@ class Dataset:
             p = [f"{self._model.ml_schema}:Dataset/RID={{Dataset_RID}}"]
             for table in path[1:]:
                 if table == dataset_dataset:
-                    p.append(f"(RID)=(deriva-ml:Dataset_Dataset:Dataset)")
+                    p.append("(RID)=(deriva-ml:Dataset_Dataset:Dataset)")
                 elif table == self.dataset_table:
-                    p.append(f"(Nested_Dataset)=(deriva-ml:Dataset:RID)")
+                    p.append("(Nested_Dataset)=(deriva-ml:Dataset:RID)")
                 elif table.name == "Dataset_Version":
                     p.append(f"(RID)=({self._model.ml_schema}:Dataset_Version:Dataset)")
                 else:
@@ -898,7 +892,7 @@ class Dataset:
                     config_file=spec_file,
                     output_dir=tmp_dir,
                     defer_download=True,
-                    timeout=(10, 300),
+                    timeout=(10, 610),
                     envars={"Dataset_RID": dataset.rid},
                 )
                 minid_page_url = exporter.export()[0]  # Get the MINID launch page
@@ -1111,7 +1105,7 @@ class Dataset:
         return [
             {
                 "processor": "json",
-                "processor_params": {"query_path": f"/schema", "output_path": "schema"},
+                "processor_params": {"query_path": "/schema", "output_path": "schema"},
             }
         ] + self._dataset_specification(writer)

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/dataset_aux_classes.py RENAMED Viewed

@@ -187,6 +187,14 @@ class DatasetSpec(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
+    @field_validator("version", mode="before")
+    @classmethod
+    def version_field_validator(cls, v: Any) -> Any:
+        if isinstance(v, dict):
+            return DatasetVersion(**v)
+        else:
+            return v
     @model_validator(mode="before")
     @classmethod
     def _check_bare_rid(cls, data: Any) -> dict[str, str | bool]:

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/dataset_bag.py RENAMED Viewed

@@ -79,7 +79,7 @@ class DatasetBag:
         with self.database as dbase:
             select_args = ",".join(
                 [
-                    f'"{table_name}".{c[1]}'
+                    f'"{table_name}"."{c[1]}"'
                     for c in dbase.execute(
                         f'PRAGMA table_info("{table_name}")'
                     ).fetchall()
@@ -104,7 +104,7 @@ class DatasetBag:
         )
         def column_name(col: Column) -> str:
-            return f'"{self.model.normalize_table_name(col.table.name)}".{col.name}'
+            return f'"{self.model.normalize_table_name(col.table.name)}"."{col.name}"'
         for ts, on in paths:
             tables = " JOIN ".join(ts)

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/demo_catalog.py RENAMED Viewed

@@ -26,7 +26,6 @@ from deriva_ml import (
     RID,
 )
-from deriva_ml.execution import Execution
 from deriva_ml.schema_setup.create_schema import initialize_ml_schema, create_ml_schema
 from deriva_ml.dataset import Dataset
@@ -114,7 +113,7 @@ def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RI
     double_nested_dataset = ml_instance.create_dataset(
         type_rid.name,
-        description=f"Double nested dataset",
+        description="Double nested dataset",
         version=DatasetVersion(1, 0, 0),
     )
     ml_instance.add_dataset_members(double_nested_dataset, nested_datasets)
@@ -295,6 +294,7 @@ def create_demo_catalog(
             project_name=project_name,
             logging_level=logging.WARN,
         )
+        working_dir = deriva_ml.working_dir
         dataset_table = deriva_ml.dataset_table
         dataset_table.annotations.update(
             Dataset(

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/deriva_definitions.py RENAMED Viewed

@@ -3,12 +3,22 @@ Shared definitions that are used in different DerivaML modules.
 """
 import warnings
+from datetime import date
 from enum import Enum
 from typing import Any, Iterable, Optional, Annotated
 import deriva.core.ermrest_model as em
+from urllib.parse import urlparse, urljoin
 from deriva.core.ermrest_model import builtin_types
-from pydantic import BaseModel, model_serializer, Field, computed_field, field_validator
+from pydantic import (
+    BaseModel,
+    model_serializer,
+    Field,
+    computed_field,
+    field_validator,
+    ValidationError,
+)
+from socket import gethostname
 ML_SCHEMA = "deriva-ml"
@@ -109,6 +119,36 @@ class BuiltinTypes(Enum):
     serial8 = builtin_types.serial8.typename
+class FileSpec(BaseModel):
+    """An entry into the File table
+    Attributes:
+        url: The File url to the url.
+        description: The description of the file.
+    """
+    url: str
+    description: Optional[str] = ""
+    md5: str
+    length: int
+    @field_validator("url")
+    @classmethod
+    def validate_file_url(cls, v):
+        url_parts = urlparse(v)
+        if url_parts.scheme == "tag":
+            return v
+        elif not url_parts.scheme:
+            print(v)
+            return f'tag://{gethostname()},{date.today()}:file://{v}'
+        else:
+            raise ValidationError("url is not a file URL")
+    @model_serializer()
+    def serialize_filespec(self):
+        return {'URL': self.url, 'Description': self.description, 'MD5': self.md5, 'Length': self.length}
 class VocabularyTerm(BaseModel):
     """An entry in a vocabulary table.
@@ -144,6 +184,7 @@ class MLVocab(StrEnum):
     workflow_type = "Workflow_Type"
     execution_asset_type = "Execution_Asset_Type"
     execution_metadata_type = "Execution_Metadata_Type"
+    file_type = "File_Type"
 class ExecMetadataVocab(StrEnum):

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/deriva_ml_base.py RENAMED Viewed

@@ -8,6 +8,8 @@ relationships that follow a specific data model.
 """
+from __future__ import annotations
 import getpass
 import logging
 from datetime import datetime
@@ -51,6 +53,7 @@ from .deriva_definitions import (
     ML_SCHEMA,
     VocabularyTerm,
     MLVocab,
+    FileSpec,
 )
 if TYPE_CHECKING:
@@ -112,10 +115,12 @@ class DerivaML(Dataset):
             if working_dir
             else Path.home() / "deriva-ml"
         ) / default_workdir
         self.working_dir.mkdir(parents=True, exist_ok=True)
         self.cache_dir = (
             Path(cache_dir) if cache_dir else Path.home() / "deriva-ml" / "cache"
         )
         self.cache_dir.mkdir(parents=True, exist_ok=True)
         # Initialize dataset class.
@@ -151,11 +156,11 @@ class DerivaML(Dataset):
             )
     def __del__(self):
-        if self._execution and self._execution.status != Status.completed:
-            try:
-                self._execution.update_status(Status.aborted, f"Execution Aborted")
-            except requests.HTTPError as e:
-                pass
+        try:
+            if self._execution and self._execution.status != Status.completed:
+                self._execution.update_status(Status.aborted, "Execution Aborted")
+        except (AttributeError, requests.HTTPError):
+            pass
     @staticmethod
     def _get_session_config():
@@ -450,9 +455,9 @@ class DerivaML(Dataset):
         # Make sure that the provided assets or terms are actually assets or terms.
         if not all(map(self.model.is_asset, assets)):
-            raise DerivaMLException(f"Invalid create_feature asset table.")
+            raise DerivaMLException("Invalid create_feature asset table.")
         if not all(map(self.model.is_vocabulary, terms)):
-            raise DerivaMLException(f"Invalid create_feature asset table.")
+            raise DerivaMLException("Invalid create_feature asset table.")
         # Get references to the necessary tables and make sure that the
         # provided feature name exists.
@@ -785,7 +790,77 @@ class DerivaML(Dataset):
             ]
         )
-    def list_files(self) -> list[dict[str, Any]]:
+    @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
+    def add_files(
+        self,
+        files: Iterable[FileSpec],
+        file_types: str | list[str],
+        execution_rid: Optional[RID] = None,
+    ) -> Iterable[RID]:
+        """Add a new file to the File table in the catalog.
+        Args:
+            file_types: One or more file types.  Must be a term from the File_Type controlled vocabulary.
+            files: A sequence of file specifications that describe the files to add.
+            execution_rid: Resource Identifier (RID) of the execution to associate with the file.
+        Returns:
+            Iterable of the RIDs of the files that were added.
+        """
+        defined_types = self.list_vocabulary_terms(MLVocab.file_type)
+        if execution_rid and self.resolve_rid(execution_rid).table.name != 'Execution':
+            raise DerivaMLException(f'RID {execution_rid} is not for an execution table.')
+        def check_file_type(dtype: str) -> bool:
+            for term in defined_types:
+                if dtype == term.name or (term.synonyms and file_type in term.synonyms):
+                    return True
+            return False
+        # Create the entry for the new dataset_table and get its RID.
+        file_types = [file_types] if isinstance(file_types, str) else file_types
+        pb = self._model.catalog.getPathBuilder()
+        for file_type in file_types:
+            if not check_file_type(file_type):
+                raise DerivaMLException("File type must be a vocabulary term.")
+        file_table_path = pb.schemas[self.ml_schema].tables["File"]
+        file_rids = [
+            e["RID"] for e in file_table_path.insert([f.model_dump() for f in files])
+        ]
+        # Get the name of the association table between file_table and file_type.
+        atable = next(
+            self._model.schemas[self._ml_schema]
+            .tables[MLVocab.file_type]
+            .find_associations()
+        ).name
+        pb.schemas[self._ml_schema].tables[atable].insert(
+            [
+                {"File_Type": file_type, "File": file_rid}
+                for file_rid in file_rids
+                for file_type in file_types
+            ]
+        )
+        if execution_rid:
+            # Get the name of the association table between file_table and execution.
+            exec_table = next(
+                self._model.schemas[self._ml_schema]
+                .tables["Execution"]
+                .find_associations()
+            ).name
+            pb.schemas[self._ml_schema].tables[exec_table].insert(
+                [
+                    {"File": file_rid, "Execution": execution_rid}
+                    for file_rid in file_rids
+                ]
+            )
+        return file_rids
+    def list_files(
+        self, file_types: Optional[list[str]] = None
+    ) -> list[dict[str, Any]]:
         """Return the contents of the file table.  Denormalized file types into the file record."""
         atable = next(
             self._model.schemas[self._ml_schema]
@@ -795,26 +870,28 @@ class DerivaML(Dataset):
         ml_path = self.pathBuilder.schemas[self._ml_schema]
         atable_path = ml_path.tables[atable]
         file_path = ml_path.File
+        type_path = ml_path.File_File_Type
         # Get a list of all the dataset_type values associated with this dataset_table.
         files = []
-        for file in file_path.entities().fetch():
-            file_types = (
-                atable_path.filter(file_path.Dataset == file["RID"])
-                .attributes(atable_path.Dataset_Type)
-                .fetch()
-            )
-            files.append(
-                file
-                | {
-                    MLVocab.dataset_type: [
-                        ds[MLVocab.dataset_type] for ft in file_types
-                    ]
-                }
-            )
-        return files
+        path = file_path.link(type_path)
+        path = path.attributes(
+            path.File.RID,
+            path.File.URL,
+            path.File.MD5,
+            path.File.Length,
+            path.File.Description,
+            path.File_File_Type.File_Type,
+        )
+        file_map = {}
+        for f in path.fetch():
+            file_map.setdefault(f['RID'], f | {'File_Types': []})['File_Types'].append(f['File_Type'])
+        # Now get rid of the File_Type key and return the result
+        return [ (f, f.pop('File_Type'))[0] for f in file_map.values()]
     def list_workflows(self) -> list[Workflow]:
-        """Return a list of all of the workflows in the catalog."""
+        """Return a list of all the workflows in the catalog."""
         workflow_path = self.pathBuilder.schemas[self.ml_schema].Workflow
         return [
             Workflow(
@@ -898,7 +975,7 @@ class DerivaML(Dataset):
         if self._execution:
             DerivaMLException(
-                f"Only one execution can be created for a Deriva ML instance."
+                "Only one execution can be created for a Deriva ML instance."
             )
         else:
             self._execution = Execution(configuration, self)

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/execution.py RENAMED Viewed

@@ -113,13 +113,13 @@ class Execution:
         if self._ml_object.resolve_rid(configuration.workflow).table.name != "Workflow":
             raise DerivaMLException(
-                f"Workflow specified in execution configuration is not a Workflow"
+                "Workflow specified in execution configuration is not a Workflow"
             )
         for d in self.configuration.datasets:
             if self._ml_object.resolve_rid(d.rid).table.name != "Dataset":
                 raise DerivaMLException(
-                    f"Dataset specified in execution configuration is not a dataset"
+                    "Dataset specified in execution configuration is not a dataset"
                 )
         for a in self.configuration.assets:
@@ -127,7 +127,7 @@ class Execution:
                 self._ml_object.resolve_rid(a).table.name
             ):
                 raise DerivaMLException(
-                    f"Asset specified in execution configuration is not a asset table"
+                    "Asset specified in execution configuration is not a asset table"
                 )
         schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
@@ -248,7 +248,7 @@ class Execution:
         self.start_time = datetime.now()
         self.uploaded_assets = None
-        self.update_status(Status.initializing, f"Start ML algorithm ...")
+        self.update_status(Status.initializing, "Start ML algorithm ...")
     def execution_stop(self) -> None:
         """Finish the execution and update the duration and status of execution."""
@@ -303,7 +303,7 @@ class Execution:
             self.update_status(Status.failed, error)
             raise DerivaMLException(f"Fail to upload execution_assets. Error: {error}")
-        self.update_status(Status.running, f"Updating features...")
+        self.update_status(Status.running, "Updating features...")
         feature_assets = defaultdict(dict)
@@ -350,7 +350,7 @@ class Execution:
                         ],
                     )
-        self.update_status(Status.running, f"Upload assets complete")
+        self.update_status(Status.running, "Upload assets complete")
         return results
     def upload_execution_outputs(

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/execution_configuration.py RENAMED Viewed

@@ -1,12 +1,12 @@
 from __future__ import annotations
 import json
-from typing import Optional
+from typing import Optional, Any
 from pydantic import (
     BaseModel,
     conlist,
-    ConfigDict,
+    ConfigDict, field_validator,
 )
 from pathlib import Path

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/execution_environment.py RENAMED Viewed

@@ -47,7 +47,7 @@ def get_platform_info():
     for attr in attributes:
         try:
             platform_info[attr] = getattr(platform, attr)()
-        except Exception as exc:
+        except Exception:
             # Not all attributes are available on all platforms.
             continue
     return platform_info
@@ -67,7 +67,7 @@ def get_os_info():
     ]:
         try:
             values[func] = getattr(os, "get" + func)()
-        except (OSError, AttributeError) as exc:
+        except (OSError, AttributeError):
             pass
     values["umask"] = oct(get_umask())
     values["name"] = os.name

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/schema_setup/create_schema.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import argparse
 import sys
-from deriva.core import DerivaServer, ErmrestCatalog, get_credential
+from deriva.core import DerivaServer, get_credential
 from deriva.core.ermrest_model import Model
 from deriva.core.ermrest_model import (
     builtin_types,
@@ -32,7 +32,7 @@ def define_table_workflow(workflow_annotation: dict):
     )
-def define_table_dataset(sname, dataset_annotation: dict = None):
+def define_table_dataset(dataset_annotation: dict = None):
     return Table.define(
         tname="Dataset",
         column_defs=[
@@ -43,7 +43,7 @@ def define_table_dataset(sname, dataset_annotation: dict = None):
     )
-def define_table_dataset_version(sname: str, dataset_version_annotation: dict = None):
+def define_table_dataset_version(sname: str):
     return Table.define(
         tname="Dataset_Version",
         column_defs=[
@@ -100,6 +100,14 @@ def define_asset_execution_asset(sname: str, execution_asset_annotation: dict):
     return table_def
+def define_table_file(sname):
+    """Define files table structure"""
+    return Table.define_asset(
+        sname=sname,
+        tname="File",
+    )
 def create_www_schema(model: Model):
     """
     Set up a new schema and tables to hold web-page like content.  The tables include a page table, and an asset
@@ -142,15 +150,12 @@ def create_www_schema(model: Model):
             },
         )
     )
     return www_schema
 def create_ml_schema(
     model: Model, schema_name: str = "deriva-ml", project_name: str = None
 ):
-    ml_catalog: ErmrestCatalog = model.catalog
     if model.schemas.get(schema_name):
         model.schemas[schema_name].drop(cascade=True)
     # get annotations
@@ -195,7 +200,7 @@ def create_ml_schema(
     )
     dataset_table = schema.create_table(
-        define_table_dataset(schema_name, annotations["dataset_annotation"])
+        define_table_dataset(annotations["dataset_annotation"])
     )
     dataset_type = schema.create_table(
         Table.define_vocabulary(MLVocab.dataset_type, f"{project_name}:{{RID}}")
@@ -263,6 +268,27 @@ def create_ml_schema(
         )
     )
+    # File table
+    file_table = schema.create_table(define_table_file(schema_name))
+    file_type = schema.create_table(
+        Table.define_vocabulary(MLVocab.file_type, f"{project_name}:{{RID}}")
+    )
+    schema.create_table(
+        Table.define_association(
+            associates=[
+                ("File", file_table),
+                (MLVocab.file_type, file_type),
+            ]
+        )
+    )
+    schema.create_table(
+        Table.define_association(
+            [
+                ("File", file_table),
+                ("Execution", execution_table),
+            ]
+        )
+    )
     create_www_schema(model)
     initialize_ml_schema(model, schema_name)

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/test_functions.py RENAMED Viewed

@@ -4,16 +4,13 @@ catalog_id = "eye-ai"
 # source_dataset = '2-7K8W'
 source_dataset = "3R6"
 create_catalog = False
-import logging
-from deriva_ml.demo_catalog import create_demo_catalog, DemoML, populate_demo_catalog
+from deriva_ml.demo_catalog import create_demo_catalog, DemoML
 from deriva_ml import (
     Workflow,
     ExecutionConfiguration,
     MLVocab as vc,
     DerivaML,
     DatasetSpec,
-    DatasetVersion,
-    RID,
 )
@@ -23,8 +20,7 @@ def setup_demo_ml():
         host, "test-schema", create_features=True, create_datasets=True
     )
     ml_instance = DemoML(host, test_catalog.catalog_id)
-    config = execution_test(ml_instance)
-    return ml_instance, config
+    return ml_instance
 def setup_dev():
@@ -100,12 +96,12 @@ def execution_test(ml_instance):
         vc.workflow_type, "ML Demo", description="A ML Workflow that uses Deriva ML API"
     )
-    api_workflow = Workflow(
+    api_workflow = ml_instance.add_workflow(Workflow(
         name="Manual Workflow",
         url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/docs/Notebooks/DerivaML%20Execution.ipynb",
         workflow_type="Manual Workflow",
         description="A manual operation",
-    )
+    ))
     manual_execution = ml_instance.create_execution(
         ExecutionConfiguration(description="Sample Execution", workflow=api_workflow)
@@ -114,7 +110,7 @@ def execution_test(ml_instance):
     # Now lets create model configuration for our program.
     model_file = manual_execution.execution_asset_path("API_Model") / "modelfile.txt"
     with open(model_file, "w") as fp:
-        fp.write(f"My model")
+        fp.write("My model")
     # Now upload the file and retrieve the RID of the new asset from the returned results.
     uploaded_assets = manual_execution.upload_execution_outputs()

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/src/deriva_ml/upload.py RENAMED Viewed

@@ -48,7 +48,6 @@ from deriva.core.ermrest_model import Table
 from deriva.core.hatrac_store import HatracStore
 from deriva.core.utils import hash_utils, mime_utils
 from deriva.transfer.upload.deriva_upload import GenericUploader
-import logging
 from pydantic import validate_call, ConfigDict
 from deriva_ml.deriva_definitions import (

{deriva_ml-1.7.0 → deriva_ml-1.8.1/src/deriva_ml.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: deriva-ml
-Version: 1.7.0
+Version: 1.8.1
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/tests/test_basic_tables.py RENAMED Viewed

@@ -36,7 +36,7 @@ class TestVocabulary(TestDerivaML):
         self.assertEqual(term.name, self.ml_instance.lookup_term("CV2", "T1").name)
         # Check for redundant terms.
-        with self.assertRaises(DerivaMLException) as context:
+        with self.assertRaises(DerivaMLException):
             self.ml_instance.add_term(
                 "CV2", "T1", description="A vocab", exists_ok=False
             )

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/tests/test_dataset.py RENAMED Viewed

@@ -121,11 +121,24 @@ class TestDataset(TestDerivaML):
         print(f"datasets {datasets}")
         import pprint
+        print("double_nested_dataset")
         pprint.pprint(
-            self.ml_instance.list_dataset_members(dataset_rid=double_nested_dataset)[
-                "Dataset"
+            self.ml_instance.list_dataset_members(dataset_rid=double_nested_dataset)
+        )
+        print("nested_dataset")
+        pprint.pprint(
+            [
+                self.ml_instance.list_dataset_members(dataset_rid=ds)
+                for ds in nested_datasets
             ]
         )
+        print("dataset")
+        pprint.pprint(
+            [self.ml_instance.list_dataset_members(dataset_rid=ds) for ds in datasets]
+        )
         print(
             "double nested children",
             self.ml_instance.list_dataset_children(dataset_rid=double_nested_dataset),
@@ -194,9 +207,7 @@ class TestDataset(TestDerivaML):
             "Manual Workflow",
             description="Initial setup of Model File",
         )
-        type_rid = self.ml_instance.add_term(
-            "Dataset_Type", "TestSet", description="A test"
-        )
+        self.ml_instance.add_term("Dataset_Type", "TestSet", description="A test")
         api_workflow = self.ml_instance.add_workflow(
             Workflow(

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/tests/test_execution.py RENAMED Viewed

@@ -1,12 +1,9 @@
-from idlelib.run import manage_socket
 from derivaml_test import TestDerivaML
 from deriva_ml import (
     MLVocab as vc,
     Workflow,
     ExecutionConfiguration,
     DatasetSpec,
-    DerivaML,
 )
@@ -42,7 +39,7 @@ class TestExecution(TestDerivaML):
                 description="Sample Execution", workflow=api_workflow
             )
         )
-        with manual_execution as e:
+        with manual_execution:
             pass
         manual_execution.upload_execution_outputs()
@@ -141,7 +138,7 @@ class TestExecution(TestDerivaML):
             manual_execution.execution_asset_path("API_Model") / "modelfile.txt"
         )
         with open(model_file, "w") as fp:
-            fp.write(f"My model")
+            fp.write("My model")
         # Now upload the file and retrieve the RID of the new asset from the returned results.
         uploaded_assets = manual_execution.upload_execution_outputs()
         self.ml_instance._execution = None

{deriva_ml-1.7.0 → deriva_ml-1.8.1}/tests/test_upload.py RENAMED Viewed

@@ -90,10 +90,10 @@ class TestUpload(TestDerivaML):
             manual_execution.execution_asset_path("API_Model") / "modelfile.txt"
         )
         with open(model_file, "w") as fp:
-            fp.write(f"My model")
+            fp.write("My model")
         # Now upload the file and retrieve the RID of the new asset from the returned results.
-        uploaded_assets = manual_execution.upload_execution_outputs()
+        manual_execution.upload_execution_outputs()
         path = self.ml_instance.catalog.getPathBuilder().schemas["deriva-ml"]
         self.assertEqual(1, len(list(path.Execution_Asset.entities().fetch())))