PyPI - deriva-ml - Versions diffs - 1.14.46__py3-none-any.whl → 1.16.0__py3-none-any.whl - Mend

deriva-ml 1.14.46py3-none-any.whl → 1.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

deriva_ml/__init__.py +69 -30
deriva_ml/bump_version.py +1 -1
deriva_ml/core/__init__.py +2 -2
deriva_ml/core/base.py +12 -12
deriva_ml/core/config.py +67 -0
deriva_ml/dataset/__init__.py +15 -2
deriva_ml/dataset/aux_classes.py +20 -1
deriva_ml/dataset/dataset.py +3 -2
deriva_ml/dataset/dataset_bag.py +115 -0
deriva_ml/dataset/upload.py +1 -0
deriva_ml/demo_catalog.py +1 -1
deriva_ml/execution/__init__.py +25 -0
deriva_ml/execution/execution.py +46 -26
deriva_ml/execution/execution_configuration.py +8 -32
deriva_ml/model/catalog.py +113 -1
deriva_ml/model/database.py +2 -2
deriva_ml/protocols/dataset.py +19 -0
deriva_ml/run_notebook.py +55 -50
deriva_ml/schema/annotations.py +7 -5
{deriva_ml-1.14.46.dist-info → deriva_ml-1.16.0.dist-info}/METADATA +2 -1
deriva_ml-1.16.0.dist-info/RECORD +44 -0
deriva_ml-1.14.46.dist-info/RECORD +0 -42
{deriva_ml-1.14.46.dist-info → deriva_ml-1.16.0.dist-info}/WHEEL +0 -0
{deriva_ml-1.14.46.dist-info → deriva_ml-1.16.0.dist-info}/entry_points.txt +0 -0
{deriva_ml-1.14.46.dist-info → deriva_ml-1.16.0.dist-info}/licenses/LICENSE +0 -0
{deriva_ml-1.14.46.dist-info → deriva_ml-1.16.0.dist-info}/top_level.txt +0 -0

deriva_ml/__init__.py CHANGED Viewed

@@ -1,45 +1,84 @@
-__all__ = [
-    "DerivaML",
-    "DerivaMLException",
-    "DerivaMLInvalidTerm",
-    "DerivaMLTableTypeError",
-    "Execution",
-    "ExecAssetType",
-    "ExecMetadataType",
-    "Workflow",
-    "DatasetBag",
-    "DatasetVersion",
-    "DatasetSpec",
-    "FileSpec",
-    "VersionPart",
-    "RID",
-    "BuiltinTypes",
-    "ColumnDefinition",
-    "MLVocab",
-    "MLAsset",
-    "TableDefinition",
-    "ExecutionConfiguration",
-]
 from importlib.metadata import PackageNotFoundError, version
+from typing import TYPE_CHECKING
-from deriva_ml.core import (
+# Safe imports - no circular dependencies
+from deriva_ml.core.config import DerivaMLConfig
+from deriva_ml.core.definitions import (
     RID,
     BuiltinTypes,
     ColumnDefinition,
-    DerivaML,
+    DerivaAssetColumns,
+    DerivaSystemColumns,
     ExecAssetType,
     ExecMetadataType,
     FileSpec,
+    FileUploadState,
+    ForeignKeyDefinition,
+    KeyDefinition,
     MLAsset,
     MLVocab,
     TableDefinition,
+    UploadState,
+)
+from deriva_ml.core.exceptions import (
+    DerivaMLException,
+    DerivaMLInvalidTerm,
+    DerivaMLTableTypeError,
 )
-from deriva_ml.core.exceptions import DerivaMLException, DerivaMLInvalidTerm, DerivaMLTableTypeError
-from deriva_ml.dataset.aux_classes import DatasetSpec, DatasetVersion, VersionPart
-from deriva_ml.dataset.dataset_bag import DatasetBag
-from deriva_ml.execution.execution import Execution, ExecutionConfiguration
-from deriva_ml.execution.workflow import Workflow
+from deriva_ml.dataset.aux_classes import DatasetConfig, DatasetConfigList, DatasetSpec, DatasetVersion
+from .execution import Execution, ExecutionConfiguration, Workflow
+# Type-checking only - avoid circular import at runtime
+if TYPE_CHECKING:
+    from deriva_ml.core.base import DerivaML
+# Lazy import function for runtime usage
+def __getattr__(name):
+    """Lazy import to avoid circular dependencies."""
+    if name == "DerivaML":
+        from deriva_ml.core.base import DerivaML
+        return DerivaML
+    elif name == "Execution":
+        from deriva_ml.execution.execution import Execution
+        return Execution
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+__all__ = [
+    "DerivaML",  # Lazy-loaded
+    "DerivaMLConfig",
+    "DatasetConfig",
+    "DatasetConfigList",
+    "DatasetSpec",
+    "DatasetVersion",
+    "Execution",
+    "ExecutionConfiguration",
+    "Workflow",
+    # Exceptions
+    "DerivaMLException",
+    "DerivaMLInvalidTerm",
+    "DerivaMLTableTypeError",
+    # Definitions
+    "RID",
+    "BuiltinTypes",
+    "ColumnDefinition",
+    "DerivaSystemColumns",
+    "DerivaAssetColumns",
+    "ExecAssetType",
+    "ExecMetadataType",
+    "FileSpec",
+    "FileUploadState",
+    "ForeignKeyDefinition",
+    "KeyDefinition",
+    "MLAsset",
+    "MLVocab",
+    "TableDefinition",
+    "UploadState",
+]
 try:
     __version__ = version("deriva_ml")

deriva_ml/bump_version.py CHANGED Viewed

@@ -105,7 +105,7 @@ def main() -> int:
     # Find latest semver tag with prefix
     tag = latest_semver_tag(prefix)
+    print(f"Latest semver tag: {tag}")
     if not tag:
         seed_initial_tag(f"{prefix}{start}")
         print(f"Seeded {prefix}{start}. Done.")

deriva_ml/core/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from deriva_ml.core.base import DerivaML
+from deriva_ml.core.config import DerivaMLConfig
 from deriva_ml.core.definitions import (
     RID,
     BuiltinTypes,
@@ -17,12 +18,11 @@ from deriva_ml.core.exceptions import DerivaMLException, DerivaMLInvalidTerm, De
 __all__ = [
     "DerivaML",
+    "DerivaMLConfig",
     # Exceptions
     "DerivaMLException",
     "DerivaMLInvalidTerm",
     "DerivaMLTableTypeError",
     # Definitions
     "RID",
     "BuiltinTypes",

deriva_ml/core/base.py CHANGED Viewed

@@ -15,7 +15,6 @@ from __future__ import annotations  # noqa: I001
 # Standard library imports
 from collections import defaultdict
-import getpass
 import logging
 from datetime import datetime
 from itertools import chain
@@ -29,12 +28,7 @@ import requests
 from pydantic import ConfigDict, validate_call
 # Deriva imports
-from deriva.core import (
-    DEFAULT_SESSION_CONFIG,
-    format_exception,
-    get_credential,
-    urlquote,
-)
+from deriva.core import DEFAULT_SESSION_CONFIG, format_exception, get_credential, urlquote, init_logging
 import deriva.core.datapath as datapath
 from deriva.core.datapath import DataPathException, _SchemaWrapper as SchemaWrapper
@@ -55,6 +49,7 @@ from deriva_ml.core.definitions import (
     TableDefinition,
     VocabularyTerm,
 )
+from deriva_ml.core.config import DerivaMLConfig
 from deriva_ml.core.exceptions import DerivaMLTableTypeError, DerivaMLException
 from deriva_ml.dataset.aux_classes import DatasetSpec
 from deriva_ml.dataset.dataset import Dataset
@@ -116,8 +111,10 @@ class DerivaML(Dataset):
         project_name: str | None = None,
         cache_dir: str | Path | None = None,
         working_dir: str | Path | None = None,
+        hydra_runtime_output_dir: str | Path | None = None,
         ml_schema: str = ML_SCHEMA,
         logging_level=logging.WARNING,
+        deriva_logging_level=logging.WARNING,
         credential=None,
         use_minid: bool = True,
         check_auth: bool = True,
@@ -166,12 +163,10 @@ class DerivaML(Dataset):
         self.model = DerivaModel(self.catalog.getCatalogModel(), domain_schema=domain_schema)
         # Set up working and cache directories
-        default_workdir = self.__class__.__name__ + "_working"
-        self.working_dir = (
-            Path(working_dir) / getpass.getuser() if working_dir else Path.home() / "deriva-ml"
-        ) / default_workdir
+        self.working_dir = DerivaMLConfig.compute_workdir(working_dir)
         self.working_dir.mkdir(parents=True, exist_ok=True)
+        self.hydra_runtime_output_dir = hydra_runtime_output_dir
         self.cache_dir = Path(cache_dir) if cache_dir else self.working_dir / "cache"
         self.cache_dir.mkdir(parents=True, exist_ok=True)
@@ -182,6 +177,11 @@ class DerivaML(Dataset):
         self._logger = logging.getLogger("deriva_ml")
         self._logger.setLevel(logging_level)
+        # Configure deriva logging level
+        init_logging(deriva_logging_level)
+        logging.getLogger("bagit").setLevel(deriva_logging_level)
+        logging.getLogger("bdbag").setLevel(deriva_logging_level)
         # Store instance configuration
         self.host_name = hostname
         self.catalog_id = catalog_id

deriva_ml/core/config.py ADDED Viewed

@@ -0,0 +1,67 @@
+import logging
+from pathlib import Path
+from typing import Any
+from hydra.conf import HydraConf, RunDir
+from hydra.core.hydra_config import HydraConfig
+from hydra_zen import store
+from omegaconf import OmegaConf
+from pydantic import BaseModel, model_validator
+from deriva_ml.core.definitions import ML_SCHEMA
+class DerivaMLConfig(BaseModel):
+    hostname: str
+    catalog_id: str | int = 1
+    domain_schema: str | None = None
+    project_name: str | None = None
+    cache_dir: str | Path | None = None
+    working_dir: str | Path | None = None
+    hydra_runtime_output_dir: str | Path | None = None
+    ml_schema: str = ML_SCHEMA
+    logging_level: Any = logging.WARNING
+    deriva_logging_level: Any = logging.WARNING
+    credential: Any = None
+    use_minid: bool = True
+    check_auth: bool = True
+    @model_validator(mode="after")
+    def init_working_dir(self):
+        """
+        Sets up the working directory for the model.
+        This method configures the working directory, ensuring that all required
+        file operations are performed in the appropriate location. If the user does not
+        specify a directory, a default directory based on the user's home directory
+        or username will be used.
+        This is a repeat of what is in the DerivaML.__init__ bu we put this here so that the working
+        directory is available to hydra.
+        Returns:
+            Self: The object instance with the working directory initialized.
+        """
+        self.working_dir = DerivaMLConfig.compute_workdir(self.working_dir)
+        self.hydra_runtime_output_dir = Path(HydraConfig.get().runtime.output_dir)
+        return self
+    @staticmethod
+    def compute_workdir(working_dir) -> Path:
+        # Create a default working directory if none is provided
+        working_dir = Path(working_dir) if working_dir else Path.home() / "deriva-ml"
+        return working_dir.absolute()
+OmegaConf.register_new_resolver("compute_workdir", DerivaMLConfig.compute_workdir, replace=True)
+store(
+    HydraConf(
+        run=RunDir("${compute_workdir:${deriva_ml.working_dir}}/hydra/${now:%Y-%m-%d_%H-%M-%S}"),
+        output_subdir="hydra-config",
+    ),
+    group="hydra",
+    name="config",
+)
+store.add_to_hydra_store()

deriva_ml/dataset/__init__.py CHANGED Viewed

@@ -1,4 +1,17 @@
-from .aux_classes import DatasetSpec
+from typing import Protocol, runtime_checkable
+from deriva_ml.core.definitions import RID
+from .aux_classes import DatasetConfig, DatasetConfigList, DatasetSpec, DatasetVersion, VersionPart
 from .dataset import Dataset
+from .dataset_bag import DatasetBag
-__all__ = ["Dataset", "DatasetSpec"]
+__all__ = [
+    "Dataset",
+    "DatasetSpec",
+    "DatasetConfig",
+    "DatasetConfigList",
+    "DatasetBag",
+    "DatasetVersion",
+    "VersionPart",
+]

deriva_ml/dataset/aux_classes.py CHANGED Viewed

@@ -5,6 +5,7 @@ THis module defines the DataSet class with is used to manipulate n
 from enum import Enum
 from typing import Any, Optional, SupportsInt
+from hydra_zen import hydrated_dataclass
 from pydantic import (
     BaseModel,
     ConfigDict,
@@ -182,8 +183,9 @@ class DatasetSpec(BaseModel):
     """
     rid: RID
-    materialize: bool = True
     version: DatasetVersion | conlist(item_type=int, min_length=3, max_length=3) | tuple[int, int, int] | str
+    materialize: bool = True
+    description: str = ""
     model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -208,3 +210,20 @@ class DatasetSpec(BaseModel):
     @field_serializer("version")
     def serialize_version(self, version: DatasetVersion) -> dict[str, Any]:
         return version.to_dict()
+@hydrated_dataclass(DatasetSpec)
+class DatasetConfig:
+    rid: str
+    version: str
+    materialize: bool = True
+    description: str = ""
+class DatasetList(BaseModel):
+    datasets: list[DatasetSpec]
+    description: str = ""
+@hydrated_dataclass(DatasetList)
+class DatasetConfigList:
+    datasets: list[DatasetConfig]
+    description: str = ""

deriva_ml/dataset/dataset.py CHANGED Viewed

@@ -22,10 +22,11 @@ Typical usage example:
 from __future__ import annotations
-# Standard library imports
 import json
 import logging
 from collections import defaultdict
+# Standard library imports
 from graphlib import TopologicalSorter
 from pathlib import Path
 from tempfile import TemporaryDirectory
@@ -1138,7 +1139,7 @@ class Dataset:
         with TemporaryDirectory() as tmp_dir:
             if self._use_minid:
                 # Get bag from S3
-                archive_path = fetch_single_file(minid.bag_url)
+                archive_path = fetch_single_file(minid.bag_url, output_path=tmp_dir)
             else:
                 exporter = DerivaExport(host=self._model.catalog.deriva_server.server, output_dir=tmp_dir)
                 archive_path = exporter.retrieve_file(minid.bag_url)

deriva_ml/dataset/dataset_bag.py CHANGED Viewed

@@ -266,6 +266,22 @@ class DatasetBag:
             sql_cmd = f'SELECT * FROM "{feature_table}"'
             return cast(datapath._ResultSet, [dict(zip(col_names, r)) for r in db.execute(sql_cmd).fetchall()])
+    def list_dataset_element_types(self) -> list[Table]:
+        """
+        Lists the data types of elements contained within a dataset.
+        This method analyzes the dataset and identifies the data types for all
+        elements within it. It is useful for understanding the structure and
+        content of the dataset and allows for better manipulation and usage of its
+        data.
+        Returns:
+            list[str]: A list of strings where each string represents a data type
+            of an element found in the dataset.
+        """
+        return self.model.list_dataset_element_types()
     def list_dataset_children(self, recurse: bool = False) -> list[DatasetBag]:
         """Get nested datasets.
@@ -333,6 +349,105 @@ class DatasetBag:
         # Term not found
         raise DerivaMLInvalidTerm(vocab_table, term_name)
+    def _denormalize(self, include_tables: list[str] | None) -> str:
+        """
+        Generates an SQL statement for denormalizing the dataset based on the tables to include. Processes cycles in
+        graph relationships, ensures proper join order, and generates selected columns for denormalization.
+        Args:
+            include_tables (list[str] | None): List of table names to include in the denormalized dataset. If None,
+                all tables from the dataset will be included.
+        Returns:
+            str: SQL query string that represents the process of denormalization.
+        """
+        def column_name(col: Column) -> str:
+            return f'"{self.model.normalize_table_name(col.table.name)}"."{col.name}"'
+        # Skip over tables that we don't want to include in the denormalized dataset.
+        # Also, strip off the Dataset/Dataset_X part of the path so we don't include dataset columns in the denormalized
+        # table.
+        join_tables, tables, denormalized_columns, dataset_rids, dataset_element_tables = (
+            self.model._prepare_wide_table(self, self.dataset_rid, include_tables)
+        )
+        select_args = [
+            # SQLlite will strip out the table name from the column in the select statement, so we need to add
+            # an explicit alias to the column name.
+            f'"{self.model.normalize_table_name(table_name)}"."{column_name}" AS "{table_name}.{column_name}"'
+            for table_name, column_name in denormalized_columns
+        ]
+        # First table in the table list is the table specified in the method call.
+        normalized_join_tables = [self.model.normalize_table_name(t) for t in join_tables]
+        sql_statement = f'SELECT {",".join(select_args)} FROM "{normalized_join_tables[0]}"'
+        for t in normalized_join_tables[1:]:
+            on = tables[t]
+            sql_statement += f' LEFT JOIN "{t}" ON '
+            sql_statement += "OR ".join([f"{column_name(o[0])} = {column_name(o[1])}" for o in on])
+        # Select only rows from the datasets you wish to include.
+        dataset_rid_list = ",".join([f'"{self.dataset_rid}"'] + [f'"{b.dataset_rid}"' for b in dataset_rids])
+        sql_statement += f'WHERE  "{self.model.normalize_table_name("Dataset")}"."RID" IN ({dataset_rid_list})'
+        # Only include rows that have actual values in them.
+        real_row = [f'"{self.model.normalize_table_name(t)}".RID IS NOT NULL ' for t in dataset_element_tables]
+        sql_statement += f" AND ({' OR '.join(real_row)})"
+        return sql_statement
+    def denormalize_as_dataframe(self, include_tables: list[str] | None = None) -> pd.DataFrame:
+        """
+        Denormalize the dataset and return the result as a dataframe.
+        This routine will examine the domain schema for the dataset, determine which tables to include and denormalize
+        the dataset values into a single wide table.  The result is returned as a dataframe.
+        The optional argument include_tables can be used to specify a subset of tables to include in the denormalized
+        view.  The tables in this argument can appear anywhere in the dataset schema.  The method will determine which
+        additional tables are required to complete the denormalization process.  If include_tables is not specified,
+        all of the tables in the schema will be included.
+        The resulting wide table will include a column for every table needed to complete the denormalization process.
+        Args:
+            include_tables: List of table names to include in the denormalized dataset. If None, than the entire schema
+            is used.
+        Returns:
+            Dataframe containing the denormalized dataset.
+        """
+        return pd.read_sql(self._denormalize(include_tables=include_tables), self.database)
+    def denormalize_as_dict(self, include_tables: list[str] | None = None) -> Generator[dict[str, Any], None, None]:
+        """
+        Denormalize the dataset and return the result as a set of dictionarys.
+        This routine will examine the domain schema for the dataset, determine which tables to include and denormalize
+        the dataset values into a single wide table.  The result is returned as a generateor that returns a dictionary
+        for each row in the denormlized wide table.
+        The optional argument include_tables can be used to specify a subset of tables to include in the denormalized
+        view.  The tables in this argument can appear anywhere in the dataset schema.  The method will determine which
+        additional tables are required to complete the denormalization process.  If include_tables is not specified,
+        all of the tables in the schema will be included.
+        The resulting wide table will include a column for every table needed to complete the denormalization process.
+        Args:
+            include_tables: List of table names to include in the denormalized dataset. If None, than the entire schema
+            is used.
+        Returns:
+            A generator that returns a dictionary representation of each row in the denormalized dataset.
+        """
+        with self.database as dbase:
+            cursor = dbase.execute(self._denormalize(include_tables=include_tables))
+            columns = [desc[0] for desc in cursor.description]
+            for row in cursor:
+                yield dict(zip(columns, row))
 # Add annotations after definition to deal with forward reference issues in pydantic

deriva_ml/dataset/upload.py CHANGED Viewed

@@ -412,6 +412,7 @@ def asset_file_path(
         "Description",
     }.union(set(DerivaSystemColumns))
     asset_metadata = {c.name for c in asset_table.columns} - asset_columns
     if not (asset_metadata >= set(metadata.keys())):
         raise DerivaMLException(f"Metadata {metadata} does not match asset metadata {asset_metadata}")

deriva_ml/demo_catalog.py CHANGED Viewed

@@ -367,7 +367,7 @@ def create_demo_catalog(
     create_features=False,
     create_datasets=False,
     on_exit_delete=True,
-    logging_level=logging.INFO,
+    logging_level=logging.WARNING,
 ) -> ErmrestCatalog:
     test_catalog = create_ml_catalog(hostname, project_name=project_name)
     if on_exit_delete:

deriva_ml/execution/__init__.py CHANGED Viewed

@@ -0,0 +1,25 @@
+from typing import TYPE_CHECKING
+# Safe imports - no circular dependencies
+from deriva_ml.execution.execution_configuration import ExecutionConfiguration
+from deriva_ml.execution.workflow import Workflow
+if TYPE_CHECKING:
+    from deriva_ml.execution.execution import Execution
+# Lazy import for runtime
+def __getattr__(name):
+    """Lazy import to avoid circular dependencies."""
+    if name == "Execution":
+        from deriva_ml.execution.execution import Execution
+        return Execution
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+__all__ = [
+    "Execution",  # Lazy-loaded
+    "ExecutionConfiguration",
+    "Workflow",
+]

deriva_ml/execution/execution.py CHANGED Viewed

@@ -41,7 +41,6 @@ from deriva_ml.core.base import DerivaML
 from deriva_ml.core.definitions import (
     DRY_RUN_RID,
     RID,
-    ExecAssetType,
     ExecMetadataType,
     FileSpec,
     FileUploadState,
@@ -198,7 +197,6 @@ class Execution:
         workflow_rid (RID): RID of the associated workflow.
         status (Status): Current execution status.
         asset_paths (list[AssetFilePath]): Paths to execution assets.
-        parameters (dict): Execution parameters.
         start_time (datetime | None): When execution started.
         stop_time (datetime | None): When execution completed.
@@ -206,7 +204,6 @@ class Execution:
         >>> config = ExecutionConfiguration(
         ...     workflow="analysis",
         ...     description="Process samples",
-        ...     parameters={"threshold": 0.5}
         ... )
         >>> with ml.create_execution(config) as execution:
         ...     execution.download_dataset_bag(dataset_spec)
@@ -250,7 +247,6 @@ class Execution:
         self.dataset_rids: List[RID] = []
         self.datasets: list[DatasetBag] = []
-        self.parameters = self.configuration.parameters
         self._working_dir = self._ml_object.working_dir
         self._cache_dir = self._ml_object.cache_dir
@@ -292,9 +288,18 @@ class Execution:
                 ]
             )[0]["RID"]
-        if isinstance(self.configuration.workflow, Workflow) and self.configuration.workflow.is_notebook:
-            # Put execution_rid into the cell output so we can find it later.
-            display(Markdown(f"Execution RID: {self._ml_object.cite(self.execution_rid)}"))
+        if rid_path := os.environ.get("DERIVA_ML_SAVE_EXECUTION_RID", None):
+            # Put execution_rid into the provided file path so we can find it later.
+            with Path(rid_path).open("w") as f:
+                json.dump(
+                    {
+                        "hostname": self._ml_object.host_name,
+                        "catalog_id": self._ml_object.catalog_id,
+                        "workflow_rid": self.workflow_rid,
+                        "execution_rid": self.execution_rid,
+                    },
+                    f,
+                )
         # Create a directory for execution rid so we can recover the state in case of a crash.
         execution_root(prefix=self._ml_object.working_dir, exec_rid=self.execution_rid)
@@ -302,13 +307,28 @@ class Execution:
     def _save_runtime_environment(self):
         runtime_env_path = self.asset_file_path(
-            "Execution_Metadata",
-            f"environment_snapshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
-            ExecMetadataType.runtime_env.value,
+            asset_name="Execution_Metadata",
+            file_name=f"environment_snapshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
+            asset_types=ExecMetadataType.runtime_env.value,
         )
         with Path(runtime_env_path).open("w") as fp:
             json.dump(get_execution_environment(), fp)
+    def _upload_hydra_config_assets(self):
+        """Upload hydra assets to the catalog."""
+        hydra_runtime_output_dir = self._ml_object.hydra_runtime_output_dir
+        if hydra_runtime_output_dir:
+            timestamp = hydra_runtime_output_dir.parts[-1]
+            for hydra_asset in hydra_runtime_output_dir.rglob("*"):
+                if hydra_asset.is_dir():
+                    continue
+                asset = self.asset_file_path(
+                    asset_name=MLAsset.execution_metadata,
+                    file_name=hydra_runtime_output_dir / hydra_asset,
+                    rename_file=f"hydra-{timestamp}-{hydra_asset.name}",
+                    asset_types=ExecMetadataType.execution_config.value,
+                )
     def _initialize_execution(self, reload: RID | None = None) -> None:
         """Initialize the execution by a configuration in the Execution_Metadata table.
         Set up a working directory and download all the assets and data.
@@ -354,9 +374,9 @@ class Execution:
         # Save configuration details for later upload
         if not reload:
             cfile = self.asset_file_path(
-                MLAsset.execution_metadata,
-                "configuration.json",
-                ExecMetadataType.execution_config.value,
+                asset_name=MLAsset.execution_metadata,
+                file_name="configuration.json",
+                asset_types=ExecMetadataType.execution_config.value,
             )
             with Path(cfile).open("w", encoding="utf-8") as config_file:
                 json.dump(self.configuration.model_dump(), config_file)
@@ -364,24 +384,18 @@ class Execution:
             lock_file = Path(self.configuration.workflow.git_root) / "uv.lock"
             if lock_file.exists():
                 _ = self.asset_file_path(
-                    MLAsset.execution_metadata,
-                    lock_file,
-                    ExecMetadataType.execution_config.value,
+                    asset_name=MLAsset.execution_metadata,
+                    file_name=lock_file,
+                    asset_types=ExecMetadataType.execution_config.value,
                 )
-            for parameter_file in self.configuration.parameters:
-                self.asset_file_path(
-                    MLAsset.execution_asset,
-                    parameter_file,
-                    ExecAssetType.input_file.value,
-                )
+            self._upload_hydra_config_assets()
             # save runtime env
             self._save_runtime_environment()
             # Now upload the files so we have the info in case the execution fails.
             self.uploaded_assets = self._upload_execution_dirs()
         self.start_time = datetime.now()
         self.update_status(Status.pending, "Initialize status finished.")
@@ -856,6 +870,7 @@ class Execution:
         file_name: str | Path,
         asset_types: list[str] | str | None = None,
         copy_file=False,
+        rename_file: str | None = None,
         **kwargs,
     ) -> AssetFilePath:
         """Return a pathlib Path to the directory in which to place files for the specified execution_asset type.
@@ -875,6 +890,8 @@ class Execution:
             asset_name: Type of asset to be uploaded.  Must be a term in Asset_Type controlled vocabulary.
             file_name: Name of file to be uploaded.
             asset_types: Type of asset to be uploaded.  Defaults to the name of the asset.
+            copy_file: Whether to copy the file rather than creating a symbolic link.
+            rename_file: If provided, the file will be renamed to this name if the file already exists..
             **kwargs: Any additional metadata values that may be part of the asset table.
         Returns:
@@ -893,12 +910,15 @@ class Execution:
         for t in asset_types:
             self._ml_object.lookup_term(MLVocab.asset_type, t)
+        # Determine if we will need to rename an existing file as the asset.
         file_name = Path(file_name)
+        target_name = Path(rename_file) if file_name.exists() and rename_file else file_name
         asset_path = asset_file_path(
             prefix=self._working_dir,
             exec_rid=self.execution_rid,
             asset_table=self._model.name_to_table(asset_name),
-            file_name=file_name.name,
+            file_name=target_name.name,
             metadata=kwargs,
         )
@@ -914,12 +934,12 @@ class Execution:
         # Persist the asset types into a file
         with Path(asset_type_path(self._working_dir, self.execution_rid, asset_table)).open("a") as asset_type_file:
-            asset_type_file.write(json.dumps({file_name.name: asset_types}) + "\n")
+            asset_type_file.write(json.dumps({target_name.name: asset_types}) + "\n")
         return AssetFilePath(
             asset_path=asset_path,
             asset_name=asset_name,
-            file_name=file_name.name,
+            file_name=target_name.name,
             asset_metadata=kwargs,
             asset_types=asset_types,
         )

deriva_ml/execution/execution_configuration.py CHANGED Viewed

@@ -30,7 +30,7 @@ from typing import Any
 from pydantic import BaseModel, ConfigDict, Field, field_validator
 from deriva_ml.core.definitions import RID
-from deriva_ml.dataset.aux_classes import DatasetSpec
+from deriva_ml.dataset.aux_classes import DatasetList, DatasetSpec
 from deriva_ml.execution.workflow import Workflow
@@ -64,45 +64,21 @@ class ExecutionConfiguration(BaseModel):
         ... )
     """
-    datasets: list[DatasetSpec] = []
+    datasets: list[DatasetSpec] | DatasetList = []
     assets: list[RID] = []
     workflow: RID | Workflow
-    parameters: dict[str, Any] | Path = {}
     description: str = ""
     argv: list[str] = Field(default_factory=lambda: sys.argv)
     model_config = ConfigDict(arbitrary_types_allowed=True)
-    @field_validator("parameters", mode="before")
+    @field_validator("datasets", mode="before")
     @classmethod
-    def validate_parameters(cls, value: Any) -> Any:
-        """Validates and loads execution parameters.
-        If value is a file path, loads and parses it as JSON. Otherwise, returns
-        the value as is.
-        Args:
-            value: Parameter value to validate, either:
-                - Dictionary of parameters
-                - Path to JSON file
-                - String path to JSON file
-        Returns:
-            dict[str, Any]: Validated parameter dictionary.
-        Raises:
-            ValueError: If JSON file is invalid or cannot be read.
-            FileNotFoundError: If parameter file doesn't exist.
-        Example:
-            >>> config = ExecutionConfiguration(parameters="params.json")
-            >>> print(config.parameters)  # Contents of params.json as dict
-        """
-        if isinstance(value, str) or isinstance(value, Path):
-            with Path(value).open("r") as f:
-                return json.load(f)
-        else:
-            return value
+    def validate_datasets(cls, value: Any) -> Any:
+        if isinstance(value, DatasetList):
+            config_list: DatasetList = value
+            value = config_list.datasets
+        return value
     @field_validator("workflow", mode="before")
     @classmethod

deriva_ml/model/catalog.py CHANGED Viewed

@@ -9,6 +9,7 @@ from __future__ import annotations
 # Standard library imports
 from collections import Counter
+from graphlib import CycleError, TopologicalSorter
 from typing import Any, Callable, Final, Iterable, NewType, TypeAlias
 from deriva.core.ermrest_catalog import ErmrestCatalog
@@ -21,6 +22,7 @@ from pydantic import ConfigDict, validate_call
 from deriva_ml.core.definitions import (
     ML_SCHEMA,
+    RID,
     DerivaAssetColumns,
     TableDefinition,
 )
@@ -28,6 +30,7 @@ from deriva_ml.core.exceptions import DerivaMLException, DerivaMLTableTypeError
 # Local imports
 from deriva_ml.feature import Feature
+from deriva_ml.protocols.dataset import DatasetLike
 try:
     from icecream import ic
@@ -287,6 +290,113 @@ class DerivaModel:
         else:
             self.model.apply()
+    def list_dataset_element_types(self) -> list[Table]:
+        """
+        Lists the data types of elements contained within a dataset.
+        This method analyzes the dataset and identifies the data types for all
+        elements within it. It is useful for understanding the structure and
+        content of the dataset and allows for better manipulation and usage of its
+        data.
+        Returns:
+            list[str]: A list of strings where each string represents a data type
+            of an element found in the dataset.
+        """
+        dataset_table = self.name_to_table("Dataset")
+        def domain_table(table: Table) -> bool:
+            return table.schema.name == self.domain_schema or table.name == dataset_table.name
+        return [t for a in dataset_table.find_associations() if domain_table(t := a.other_fkeys.pop().pk_table)]
+    def _prepare_wide_table(self, dataset: DatasetLike, dataset_rid: RID, include_tables: list[str] | None) -> tuple:
+        """
+        Generates details of a wide table from the model
+        Args:
+            include_tables (list[str] | None): List of table names to include in the denormalized dataset. If None,
+                all tables from the dataset will be included.
+        Returns:
+            str: SQL query string that represents the process of denormalization.
+        """
+        # Skip over tables that we don't want to include in the denormalized dataset.
+        # Also, strip off the Dataset/Dataset_X part of the path so we don't include dataset columns in the denormalized
+        # table.
+        include_tables = set(include_tables) if include_tables else set()
+        for t in include_tables:
+            # Check to make sure the table is in the catalog.
+            _ = self.name_to_table(t)
+        table_paths = [
+            path
+            for path in self._schema_to_paths()
+            if (not include_tables) or include_tables.intersection({p.name for p in path})
+        ]
+        # Get the names of all of the tables that can be dataset elements.
+        dataset_element_tables = {
+            e.name for e in self.list_dataset_element_types() if e.schema.name == self.domain_schema
+        }
+        skip_columns = {"RCT", "RMT", "RCB", "RMB"}
+        tables = {}
+        graph = {}
+        for path in table_paths:
+            for left, right in zip(path[0:], path[1:]):
+                graph.setdefault(left.name, set()).add(right.name)
+        # New lets remove any cycles that we may have in the graph.
+        # We will use a topological sort to find the order in which we need to join the tables.
+        # If we find a cycle, we will remove the table from the graph and splice in an additional ON clause.
+        # We will then repeat the process until there are no cycles.
+        graph_has_cycles = True
+        join_tables = []
+        while graph_has_cycles:
+            try:
+                ts = TopologicalSorter(graph)
+                join_tables = list(reversed(list(ts.static_order())))
+                graph_has_cycles = False
+            except CycleError as e:
+                cycle_nodes = e.args[1]
+                if len(cycle_nodes) > 3:
+                    raise DerivaMLException(f"Unexpected cycle found when normalizing dataset {cycle_nodes}")
+                # Remove cycle from graph and splice in additional ON constraint.
+                graph[cycle_nodes[1]].remove(cycle_nodes[0])
+        # The Dataset_Version table is a special case as it points to dataset and dataset to version.
+        if "Dataset_Version" in join_tables:
+            join_tables.remove("Dataset_Version")
+        for path in table_paths:
+            for left, right in zip(path[0:], path[1:]):
+                if right.name == "Dataset_Version":
+                    # The Dataset_Version table is a special case as it points to dataset and dataset to version.
+                    continue
+                if join_tables.index(right.name) < join_tables.index(left.name):
+                    continue
+                table_relationship = self._table_relationship(left, right)
+                tables.setdefault(self.normalize_table_name(right.name), set()).add(
+                    (table_relationship[0], table_relationship[1])
+                )
+        # Get the list of columns that will appear in the final denormalized dataset.
+        denormalized_columns = [
+            (table_name, c.name)
+            for table_name in join_tables
+            if not self.is_association(table_name)  # Don't include association columns in the denormalized view.'
+            for c in self.name_to_table(table_name).columns
+            if c.name not in skip_columns
+        ]
+        # List of dataset ids to include in the denormalized view.
+        dataset_rids = dataset.list_dataset_children(recurse=True)
+        return join_tables, tables, denormalized_columns, dataset_rids, dataset_element_tables
     def _table_relationship(
         self,
         table1: TableInput,
@@ -302,7 +412,9 @@ class DerivaModel:
             [(fk.referenced_columns[0], fk.foreign_key_columns[0]) for fk in table1.referenced_by if fk.table == table2]
         )
         if len(relationships) != 1:
-            raise DerivaMLException(f"Ambiguous linkage between {table1.name} and {table2.name}")
+            raise DerivaMLException(
+                f"Ambiguous linkage between {table1.name} and {table2.name}: {[(r[0].name, r[1].name) for r in relationships]}"
+            )
         return relationships[0]
     def _schema_to_paths(

deriva_ml/model/database.py CHANGED Viewed

@@ -226,7 +226,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
           indexes: A tuple whose first element is the column index of the file name and whose second element
         is the index of the URL in an asset table.  Tuple is None if table is not an asset table.
           o: list:
-          indexes: Optional[tuple[int: int]]:
+          indexes: Optional[tuple[int, int]]:
         Returns:
           Tuple of updated column values.
@@ -262,7 +262,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
             DatasetBag object for the specified dataset.
         """
         if dataset_rid and dataset_rid not in self.bag_rids:
-            DerivaMLException(f"Dataset RID {dataset_rid} is not in model.")
+            raise DerivaMLException(f"Dataset RID {dataset_rid} is not in model.")
         return DatasetBag(self, dataset_rid or self.dataset_rid)
     def dataset_version(self, dataset_rid: Optional[RID] = None) -> DatasetVersion:

deriva_ml/protocols/dataset.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""A module defining the DatasetLike protocol for dataset operations.
+This module contains the definition of the DatasetLike protocol, which
+provides an interface for datasets to implement specific functionality related
+to listing dataset children. It is particularly useful for ensuring type
+compatibility for objects that mimic datasets in their behavior.
+Classes:
+    DatasetLike: A protocol that specifies methods required for dataset-like
+    objects.
+"""
+from typing import Protocol, runtime_checkable
+from deriva_ml.core.definitions import RID
+@runtime_checkable
+class DatasetLike(Protocol):
+    def list_dataset_children(self, dataset_rid: RID, recurse: bool = False) -> list[RID]: ...

deriva_ml/run_notebook.py CHANGED Viewed

@@ -3,14 +3,13 @@
 import json
 import os
 import tempfile
-from datetime import datetime
 from pathlib import Path
 import nbformat
 import papermill as pm
-import regex as re
 import yaml
 from deriva.core import BaseCLI
+from jupyter_client.kernelspec import KernelSpecManager
 from nbconvert import MarkdownExporter
 from deriva_ml import DerivaML, ExecAssetType, Execution, ExecutionConfiguration, MLAsset, Workflow
@@ -44,13 +43,6 @@ class DerivaMLRunNotebookCLI(BaseCLI):
             help="Display logging output from notebook.",
         )
-        self.parser.add_argument(
-            "--catalog",
-            metavar="<1>",
-            default=1,
-            help="Catalog number. Default 1",
-        )
         self.parser.add_argument(
             "--parameter",
             "-p",
@@ -61,7 +53,13 @@ class DerivaMLRunNotebookCLI(BaseCLI):
             help="Provide a parameter name and value to inject into the notebook.",
         )
-        self.parser.add_argument("--kernel", "-k", nargs=1, help="Name of kernel to run..", default=None)
+        self.parser.add_argument(
+            "--kernel",
+            "-k",
+            type=str,
+            help="Name of kernel to run..",
+            default=self._find_kernel_for_venv(),
+        )
     @staticmethod
     def _coerce_number(val: str):
@@ -100,26 +98,50 @@ class DerivaMLRunNotebookCLI(BaseCLI):
             print(f"Notebook file must be an ipynb file: {notebook_file.name}.")
             exit(1)
-        os.environ["DERIVA_HOST"] = args.host
-        os.environ["DERIVA_CATALOG"] = args.catalog
         # Create a workflow instance for this specific version of the script.
         # Return an existing workflow if one is found.
         notebook_parameters = pm.inspect_notebook(notebook_file)
         if args.inspect:
             for param, value in notebook_parameters.items():
                 print(f"{param}:{value['inferred_type_name']}  (default {value['default']})")
             return
         else:
-            notebook_parameters = (
-                {k: v["default"] for k, v in notebook_parameters.items()}
-                | {"host": args.host, "hostname": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
-                | parameters
-            )
-            print(f"Running notebook {notebook_file.name} with parameters:")
-            for param, value in notebook_parameters.items():
-                print(f"  {param}:{value}")
-            self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel[0], log=args.log_output)
+            notebook_parameters = {k: v["default"] for k, v in notebook_parameters.items()} | parameters
+            self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
+    @staticmethod
+    def _find_kernel_for_venv() -> str | None:
+        """
+        Return the name and spec of an existing Jupyter kernel corresponding
+        to a given Python virtual environment path.
+        Parameters
+        ----------
+        venv_path : str
+            Absolute or relative path to the virtual environment.
+        Returns
+        -------
+        dict | None
+            The kernel spec (as a dict) if found, or None if not found.
+        """
+        venv = os.environ.get("VIRTUAL_ENV")
+        if not venv:
+            return None
+        venv_path = Path(venv).resolve()
+        ksm = KernelSpecManager()
+        for name, spec in ksm.get_all_specs().items():
+            kernel_json = spec.get("spec", {})
+            argv = kernel_json.get("argv", [])
+            # check for python executable path inside argv
+            for arg in argv:
+                try:
+                    if Path(arg).resolve() == venv_path.joinpath("bin", "python").resolve():
+                        return name
+                except Exception:
+                    continue
+        return None
     def run_notebook(self, notebook_file: Path, parameters, kernel=None, log=False):
         url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
@@ -127,8 +149,9 @@ class DerivaMLRunNotebookCLI(BaseCLI):
         os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
         os.environ["DERIVA_ML_NOTEBOOK_PATH"] = notebook_file.as_posix()
         with tempfile.TemporaryDirectory() as tmpdirname:
-            print(f"Running notebook {notebook_file.name} with parameters:")
             notebook_output = Path(tmpdirname) / Path(notebook_file).name
+            execution_rid_path = Path(tmpdirname) / "execution_rid.json"
+            os.environ["DERIVA_ML_SAVE_EXECUTION_RID"] = execution_rid_path.as_posix()
             pm.execute_notebook(
                 input_path=notebook_file,
                 output_path=notebook_output,
@@ -137,22 +160,19 @@ class DerivaMLRunNotebookCLI(BaseCLI):
                 log_output=log,
             )
             print(f"Notebook output saved to {notebook_output}")
-            catalog_id = execution_rid = None
-            with Path(notebook_output).open("r") as f:
-                for line in f:
-                    if m := re.search(
-                        r"Execution RID: https://(?P<host>.*)/id/(?P<catalog_id>.*)/(?P<execution_rid>[\w-]+)",
-                        line,
-                    ):
-                        hostname = m["host"]
-                        catalog_id = m["catalog_id"]
-                        execution_rid = m["execution_rid"]
-            if not execution_rid:
+            with execution_rid_path.open("r") as f:
+                execution_config = json.load(f)
+            if not execution_config:
                 print("Execution RID not found.")
                 exit(1)
+            execution_rid = execution_config["execution_rid"]
+            hostname = execution_config["hostname"]
+            catalog_id = execution_config["catalog_id"]
+            workflow_rid = execution_config["workflow_rid"]
             ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id, working_dir=tmpdirname)
-            workflow_rid = ml_instance.retrieve_rid(execution_rid)["Workflow"]
+            workflow_rid = ml_instance.retrieve_rid(execution_config["execution_rid"])["Workflow"]
             execution = Execution(
                 configuration=ExecutionConfiguration(workflow=workflow_rid),
@@ -183,21 +203,6 @@ class DerivaMLRunNotebookCLI(BaseCLI):
                 file_name=notebook_output_md,
                 asset_types=ExecAssetType.notebook_output,
             )
-            execution.asset_file_path(
-                asset_name=MLAsset.execution_asset,
-                file_name=notebook_output_md,
-                asset_types=ExecAssetType.notebook_output,
-            )
-            print("parameter....")
-            parameter_file = execution.asset_file_path(
-                asset_name=MLAsset.execution_asset,
-                file_name=f"notebook-parameters-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json",
-                asset_types=ExecAssetType.input_file.value,
-            )
-            with Path(parameter_file).open("w") as f:
-                json.dump(parameters, f)
             execution.upload_execution_outputs()
             print(ml_instance.cite(execution_rid))

deriva_ml/schema/annotations.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import argparse
+import sys
 from deriva.core.ermrest_model import Model, Table
 from deriva.core.utils.core_utils import tag as deriva_tags
@@ -183,10 +184,10 @@ def catalog_annotation(model: DerivaModel) -> None:
 def asset_annotation(asset_table: Table):
     """Generate annotations for an asset table.
     Args:
         asset_table: The Table object representing the asset table.
     Returns:
         A dictionary containing the annotations for the asset table.
     """
@@ -316,7 +317,8 @@ def generate_annotation(model: Model, schema: str) -> dict:
                 },
                 {
                     "source": [
-                        {"inbound": [schema, "Execution_Metadata_Execution_fkey"]},
+                        {"inbound": [schema, "Execution_Metadata_Execution_Execution_fkey"]},
+                        {"outbound": [schema, "Execution_Metadata_Execution_Execution_Metadata_fkey"]},
                         "RID",
                     ],
                     "markdown_name": "Execution Metadata",
@@ -453,9 +455,9 @@ def generate_annotation(model: Model, schema: str) -> dict:
 def main():
     """Main entry point for the annotations CLI.
     Applies annotations to the ML schema based on command line arguments.
     Returns:
         None. Executes the CLI.
     """

{deriva_ml-1.14.46.dist-info → deriva_ml-1.16.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deriva-ml
-Version: 1.14.46
+Version: 1.16.0
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10
@@ -21,6 +21,7 @@ Requires-Dist: nbstripout
 Requires-Dist: papermill
 Requires-Dist: pandas-stubs==2.2.3.250527
 Requires-Dist: pyyaml
+Requires-Dist: hydra_zen
 Dynamic: license-file
 # DerivaML

deriva_ml-1.16.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,44 @@
+deriva_ml/__init__.py,sha256=Yt8q0WbLFt7fbRLZe_f0bJWy1Qo6vidQzlYWQoT8U7o,2097
+deriva_ml/bump_version.py,sha256=eN2G5G_OeiuFxhOdjjwfxD8Rmv6dFvzIm0y_1x4Mif4,4020
+deriva_ml/demo_catalog.py,sha256=6hlSVGNQ364chisKvSyMy2BBxzhQq1mLPPlW324eca4,14931
+deriva_ml/feature.py,sha256=6-aphkxdKjWa9oPSGFWxHcwAc_8hmWj-7I4M178YG5Y,8470
+deriva_ml/install_kernel.py,sha256=b62XY0SLViYO_Zye5r1Pl9qhYZyu_fk4KAO8NS1pxgM,2165
+deriva_ml/run_notebook.py,sha256=_pds1q3WcfWqhCBqKeznbwSv5n7OND8FkL6JQ2Jkfmc,8093
+deriva_ml/core/__init__.py,sha256=Ko8GsWc7K_eDFW0-GaNS6gOWYP8cWHWir-ChSQaHntE,856
+deriva_ml/core/base.py,sha256=xsz1h5QZVE7PCVZiCt7lRV43Dupq9c7elUsbGk3QHJQ,61919
+deriva_ml/core/config.py,sha256=dF4rOLFmbk1DEkQimqbiH4pC519nRZWpwKItARNMiZ4,2244
+deriva_ml/core/constants.py,sha256=6wBJ8qMxe-dbCjRGrjUIX-RK0mTWrLDTeUpaVbLFoM8,888
+deriva_ml/core/definitions.py,sha256=uq_8uYFBVBVHS691Ri2kdQsN37z0GNYTaZskJIb_ocM,1385
+deriva_ml/core/enums.py,sha256=sSN4B4OynbB-AXwxRszoFr-KWIWIAfhVa06EzAEHwVc,7194
+deriva_ml/core/ermrest.py,sha256=N0IJ3TE87jElaBChEIo5AFDTr0SIrb6F90yiimRfPr4,10182
+deriva_ml/core/exceptions.py,sha256=4MZNPOyN-UMaGeY9sqJDVwh_iOmz1ntp4usSyCNqVMg,934
+deriva_ml/core/filespec.py,sha256=BQAAcRXfXq1lDcsKlokLOOXCBtEZpPgXxrFOIZYAgLg,4229
+deriva_ml/dataset/__init__.py,sha256=tV3yK9tb8iB9f5P3ml459bP2uPWJhCJcplhmbGVtoMI,411
+deriva_ml/dataset/aux_classes.py,sha256=K-cVBrZY1j0ZO__FORHRVdVz3O69OgvhO5YkhwJJyxE,7348
+deriva_ml/dataset/dataset.py,sha256=c6hGsIH9UOn8ayDP7EsYzqgKeZm2Kr7naliPLQxGtSg,64473
+deriva_ml/dataset/dataset_bag.py,sha256=peFEMU8PfExbzJ0VJGIL3QDIPz0stmUR7daCXptA3f4,20256
+deriva_ml/dataset/history.py,sha256=FK5AYYz11p4E4FWMVg4r7UPWOD4eobrq3b3xMjWF59g,3197
+deriva_ml/dataset/upload.py,sha256=Q9bNVv6xTK_IpwFOU_ugq33IWRs0AWyFoF8Rzwi6OVs,16430
+deriva_ml/execution/__init__.py,sha256=Zs-ZNmwrJJW6suJilzh3vdcPvzI8HIA0Ym0VUwuiQME,668
+deriva_ml/execution/environment.py,sha256=B7nywqxFTRUWgyu8n7rFoKcVC9on422kjeFG2FPQfvg,9302
+deriva_ml/execution/execution.py,sha256=X4HBADT_F5ZuER8qBcnNYqRUuMU3BaEV7rMgXEUrLCg,46096
+deriva_ml/execution/execution_configuration.py,sha256=oWgBueuFO0-PBm9LM08EQeFeY9IXF8tVbd3LyRsTiNw,5437
+deriva_ml/execution/workflow.py,sha256=7CwPrgs3FKQHiEVus0PpK9w5hVKLKZnCrlu_nT8GFe8,13604
+deriva_ml/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+deriva_ml/model/catalog.py,sha256=TY6QdlhZX7OL5bhWcGkAFpZNaZye5l_rkb1Cih-bTjs,19180
+deriva_ml/model/database.py,sha256=KEPJKIlmIxTiF4Th1NgpuuuMBhbfsgsd_k8UHs-hMg4,14843
+deriva_ml/model/sql_mapper.py,sha256=_0QsJEVSgSPtxrWKSgjfPZCQ1aMVcjR_Tk2OxLhWEvY,1696
+deriva_ml/protocols/dataset.py,sha256=1TyaT--89Elcs-nCvVyJxUj4cDaLztZOuSOzzj1cBMk,699
+deriva_ml/schema/__init__.py,sha256=yV-MfzCF3FA4OOz7mZwMM2q6-x1vgOJ057kUvikFF6E,130
+deriva_ml/schema/annotations.py,sha256=CMcRqYUlyW8iLCYp6sYJsncaRNtp4kFKoxcg-i-t-50,18302
+deriva_ml/schema/check_schema.py,sha256=6dadLYHPqRex6AYVClmsESI8WhC7-rb-XnGf2G298xw,3609
+deriva_ml/schema/create_schema.py,sha256=9qK9_8SRQT-DwcEwTGSkhi3j2NaoH5EVgthvV2kO-gg,13042
+deriva_ml/schema/deriva-ml-reference.json,sha256=AEOMIgwKO3dNMMWHb0lxaXyamvfAEbUPh8qw0aAtsUQ,242460
+deriva_ml/schema/policy.json,sha256=5ykB8nnZFl-oCHzlAwppCFKJHWJFIkYognUMVEanfY8,1826
+deriva_ml/schema/table_comments_utils.py,sha256=4flCqnZAaqg_uSZ9I18pNUWAZoLfmMCXbmI5uERY5vM,2007
+deriva_ml-1.16.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+deriva_ml-1.16.0.dist-info/METADATA,sha256=gN7KnQ1MDdqSSaVJOIKY-lBEwEE8s0bRMoVLrZGYgtA,1214
+deriva_ml-1.16.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+deriva_ml-1.16.0.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
+deriva_ml-1.16.0.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
+deriva_ml-1.16.0.dist-info/RECORD,,

deriva_ml-1.14.46.dist-info/RECORD DELETED Viewed

@@ -1,42 +0,0 @@
-deriva_ml/__init__.py,sha256=_aMdxGG4mRTcXodLZLNpXqH8v5uqMbqFUryE9KqNSB8,1158
-deriva_ml/bump_version.py,sha256=KpHmkpEztly2QHYL4dyaIGdEMyP4F0D89rawyh5EDTs,3982
-deriva_ml/demo_catalog.py,sha256=JjPAIac_hKPh5krEhGJydjXquRnivi7kQoR8W4Khp-s,14928
-deriva_ml/feature.py,sha256=6-aphkxdKjWa9oPSGFWxHcwAc_8hmWj-7I4M178YG5Y,8470
-deriva_ml/install_kernel.py,sha256=b62XY0SLViYO_Zye5r1Pl9qhYZyu_fk4KAO8NS1pxgM,2165
-deriva_ml/run_notebook.py,sha256=QRO_CK9Q9qt_n-c0rxGdIRyTHjGOuZxt-wj0WQTnaAM,8171
-deriva_ml/core/__init__.py,sha256=V_i90pc5PB1F4UdOO6DZWzpEFaZDTaPRU-EzKXQ19eI,787
-deriva_ml/core/base.py,sha256=LI_ZLpVJwWx4DW2Wo7luALQauQ3xhBxFYHSKDAfNsag,61649
-deriva_ml/core/constants.py,sha256=6wBJ8qMxe-dbCjRGrjUIX-RK0mTWrLDTeUpaVbLFoM8,888
-deriva_ml/core/definitions.py,sha256=uq_8uYFBVBVHS691Ri2kdQsN37z0GNYTaZskJIb_ocM,1385
-deriva_ml/core/enums.py,sha256=sSN4B4OynbB-AXwxRszoFr-KWIWIAfhVa06EzAEHwVc,7194
-deriva_ml/core/ermrest.py,sha256=N0IJ3TE87jElaBChEIo5AFDTr0SIrb6F90yiimRfPr4,10182
-deriva_ml/core/exceptions.py,sha256=4MZNPOyN-UMaGeY9sqJDVwh_iOmz1ntp4usSyCNqVMg,934
-deriva_ml/core/filespec.py,sha256=BQAAcRXfXq1lDcsKlokLOOXCBtEZpPgXxrFOIZYAgLg,4229
-deriva_ml/dataset/__init__.py,sha256=ukl2laJqa9J2AVqb4zlpIYc-3RaAlfRR33NMIQaoNrQ,104
-deriva_ml/dataset/aux_classes.py,sha256=9mZAln7_rrzaRbKhKA6dJOp3xeD6dHOC9NXOtJKROo4,6933
-deriva_ml/dataset/dataset.py,sha256=AU27ZtzDSpCodtbq9T-8AtqiA-x8r78wQvFBOCgaqsQ,64451
-deriva_ml/dataset/dataset_bag.py,sha256=mPIZRX5aTbVRcJbCFtdkmlnexquF8NE-onbVK_8IxVk,14224
-deriva_ml/dataset/history.py,sha256=FK5AYYz11p4E4FWMVg4r7UPWOD4eobrq3b3xMjWF59g,3197
-deriva_ml/dataset/upload.py,sha256=i_7KLfRSd2-THqZ1aG2OFAFGoyb8dJBCZZ5t1ftrtMQ,16429
-deriva_ml/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-deriva_ml/execution/environment.py,sha256=B7nywqxFTRUWgyu8n7rFoKcVC9on422kjeFG2FPQfvg,9302
-deriva_ml/execution/execution.py,sha256=NJjjrxGsedv0zoe-T-LxfO_5UG83KOHaxU3SY5EJ0QQ,44928
-deriva_ml/execution/execution_configuration.py,sha256=Rw4VWkBCZN9yatvSKdTqEWTfu470lpcVKfHFR0uN0jI,6248
-deriva_ml/execution/workflow.py,sha256=7CwPrgs3FKQHiEVus0PpK9w5hVKLKZnCrlu_nT8GFe8,13604
-deriva_ml/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-deriva_ml/model/catalog.py,sha256=dzTBcRlqgEVkPY32AUax_iu75RgFiT4Pu5au7rmrv8k,14068
-deriva_ml/model/database.py,sha256=SBkYFf0qwbGmvL0Xtn_n5DCz4roGfrhuYrM8G69Cy9Y,14837
-deriva_ml/model/sql_mapper.py,sha256=_0QsJEVSgSPtxrWKSgjfPZCQ1aMVcjR_Tk2OxLhWEvY,1696
-deriva_ml/schema/__init__.py,sha256=yV-MfzCF3FA4OOz7mZwMM2q6-x1vgOJ057kUvikFF6E,130
-deriva_ml/schema/annotations.py,sha256=TuQ3vWFnK0160fRmtvsCkHx9qAcRa63MSyERB4x5a98,18197
-deriva_ml/schema/check_schema.py,sha256=6dadLYHPqRex6AYVClmsESI8WhC7-rb-XnGf2G298xw,3609
-deriva_ml/schema/create_schema.py,sha256=9qK9_8SRQT-DwcEwTGSkhi3j2NaoH5EVgthvV2kO-gg,13042
-deriva_ml/schema/deriva-ml-reference.json,sha256=AEOMIgwKO3dNMMWHb0lxaXyamvfAEbUPh8qw0aAtsUQ,242460
-deriva_ml/schema/policy.json,sha256=5ykB8nnZFl-oCHzlAwppCFKJHWJFIkYognUMVEanfY8,1826
-deriva_ml/schema/table_comments_utils.py,sha256=4flCqnZAaqg_uSZ9I18pNUWAZoLfmMCXbmI5uERY5vM,2007
-deriva_ml-1.14.46.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-deriva_ml-1.14.46.dist-info/METADATA,sha256=jhm5D5-SqSJD-JVSMyqLcVPpjG3vY6MLJTIZacyt_Fc,1190
-deriva_ml-1.14.46.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-deriva_ml-1.14.46.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
-deriva_ml-1.14.46.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
-deriva_ml-1.14.46.dist-info/RECORD,,

{deriva_ml-1.14.46.dist-info → deriva_ml-1.16.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{deriva_ml-1.14.46.dist-info → deriva_ml-1.16.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{deriva_ml-1.14.46.dist-info → deriva_ml-1.16.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{deriva_ml-1.14.46.dist-info → deriva_ml-1.16.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

deriva-ml 1.14.46__py3-none-any.whl → 1.16.0__py3-none-any.whl

deriva-ml 1.14.46py3-none-any.whl → 1.16.0py3-none-any.whl