PyPI - deriva-ml - Versions diffs - 1.14.32__tar.gz → 1.14.34__tar.gz - Mend

deriva-ml 1.14.32tar.gz → 1.14.34tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deriva-ml
-Version: 1.14.32
+Version: 1.14.34
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10
@@ -25,10 +25,9 @@ Deriva-ML is a python library to simplify the process of creating and executing
 using a deriva catalog.
-## Installing the GitHub CLI
+Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
-The script release.sh will create a new release tag in GitHub.  This script requires the
-GitHUB CLI be installed.
+To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
-See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
+## References

deriva_ml-1.14.34/README.md ADDED Viewed

@@ -0,0 +1,11 @@
+# DerivaML
+Deriva-ML is a python library to simplify the process of creating and executing reproducible machine learning workflows
+using a deriva catalog.
+Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
+To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
+## References

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/pyproject.toml RENAMED Viewed

@@ -64,6 +64,9 @@ setup_hooks = []
 pre_commit_hooks = []
 post_commit_hooks = []
+[tool.pytest]
+mock_use_standalone_module = true
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = ["test_*.py"]
@@ -97,6 +100,7 @@ dev = [
     "mkdocstrings[python]",
     "mkdocs-material",
     "pytest>=8.4.1",
+    "pytest-mock",
     "pytest-coverage>=0.0",
     "ruff"
 ]

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/core/base.py RENAMED Viewed

@@ -117,7 +117,7 @@ class DerivaML(Dataset):
         cache_dir: str | Path | None = None,
         working_dir: str | Path | None = None,
         ml_schema: str = ML_SCHEMA,
-        logging_level=logging.INFO,
+        logging_level=logging.WARNING,
         credential=None,
         use_minid: bool = True,
     ):

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/feature.py RENAMED Viewed

@@ -52,6 +52,7 @@ class FeatureRecord(BaseModel):
     class Config:
         arbitrary_types_allowed = True
+        extra = "forbid"
     @classmethod
     def feature_columns(cls) -> set[Column]:

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml/run_notebook.py RENAMED Viewed

@@ -95,7 +95,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
             exit(1)
         os.environ["DERIVA_HOST"] = args.host
-        os.environ["DERIVA_CATALOG_ID"] = args.catalog
+        os.environ["DERIVA_CATALOG"] = args.catalog
         # Create a workflow instance for this specific version of the script.
         # Return an existing workflow if one is found.
@@ -106,14 +106,14 @@ class DerivaMLRunNotebookCLI(BaseCLI):
             return
         else:
             notebook_parameters = (
-                {"host": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
-                | {k: v["default"] for k, v in notebook_parameters.items()}
+                {k: v["default"] for k, v in notebook_parameters.items()}
+                | {"host": args.host, "hostname": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
                 | parameters
             )
             print(f"Running notebook {notebook_file.name} with parameters:")
             for param, value in notebook_parameters.items():
                 print(f"  {param}:{value}")
-            self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
+            self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel[0], log=args.log_output)
     def run_notebook(self, notebook_file, parameters, kernel=None, log=False):
         url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: deriva-ml
-Version: 1.14.32
+Version: 1.14.34
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10
@@ -25,10 +25,9 @@ Deriva-ML is a python library to simplify the process of creating and executing
 using a deriva catalog.
-## Installing the GitHub CLI
+Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
-The script release.sh will create a new release tag in GitHub.  This script requires the
-GitHUB CLI be installed.
+To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
-See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
+## References

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/conftest.py RENAMED Viewed

@@ -5,7 +5,7 @@ Pytest configuration and shared fixtures.
 import os
 import pytest
-from test_utils import MLCatalog, MLDatasetCatalog
+from test_utils import MLCatalog, MLDatasetCatalog, create_jupyter_kernel, destroy_jupyter_kernel
 from deriva_ml import DerivaML
 from deriva_ml.demo_catalog import (
@@ -61,6 +61,16 @@ def dataset_test(catalog_with_datasets):
     return catalog_with_datasets
+@pytest.fixture(scope="function")
+def notebook_test(deriva_catalog, tmp_path):
+    deriva_catalog.reset_demo_catalog()
+    create_jupyter_kernel("test_kernel", tmp_path)
+    yield DerivaML(deriva_catalog.hostname, deriva_catalog.catalog_id, use_minid=False, working_dir=tmp_path)
+    print("Resetting catalog... ", end="")
+    deriva_catalog.reset_demo_catalog()
+    destroy_jupyter_kernel("test_kernel")
 @pytest.fixture(scope="function")
 def test_ml_demo_catalog(ml_catalog, tmp_path):
     # reset_demo_catalog(ml_catalog.catalog)

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/execution/test_execution.py RENAMED Viewed

@@ -3,6 +3,7 @@ Tests for the execution module.
 """
 import subprocess
+from pathlib import Path
 from tempfile import TemporaryDirectory
 from deriva_ml import (
@@ -23,20 +24,23 @@ class TestWorkflow:
         ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
         ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
         print("Running workflow-test.py ...")
+        workflow_script = Path(__file__).parent / "workflow-test.py"
         workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
         workflows = list(workflow_table.entities().fetch())
         assert 0 == len(workflows)
         result = subprocess.run(
             [
                 "python",
-                "execution/workflow-test.py",
+                workflow_script.as_posix(),
                 ml_instance.catalog.deriva_server.server,
                 ml_instance.catalog_id,
             ],
             capture_output=True,
             text=True,
         )
+        print(result.stdout)
+        print(result.stderr)
         workflows = list(workflow_table.entities().fetch())
         assert 1 == len(workflows)
         workflow_rid = workflows[0]["RID"]
@@ -50,18 +54,22 @@ class TestWorkflow:
         result = subprocess.run(
             [
                 "python",
-                "execution/workflow-test.py",
+                workflow_script.as_posix(),
                 ml_instance.catalog.deriva_server.server,
                 ml_instance.catalog_id,
             ],
             capture_output=True,
             text=True,
         )
+        print(result.stdout)
+        print(result.stderr)
         new_workflow = result.stdout.strip()
         assert new_workflow == workflow_rid
-    def test_workflow_creation_notebook(self, test_ml):
-        ml_instance = test_ml
+    def test_workflow_creation_notebook(self, notebook_test):
+        ml_instance = notebook_test
+        notebook_path = Path(__file__).parent / "workflow-test.ipynb"  # directory where this test lives
         ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
         ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
         workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
@@ -69,14 +77,17 @@ class TestWorkflow:
         assert 0 == len(workflows)
         print("Running notebook...")
         result = subprocess.run(
             [
                 "deriva-ml-run-notebook",
-                "execution/workflow-test.ipynb",
+                notebook_path.as_posix(),
                 "--host",
                 ml_instance.catalog.deriva_server.server,
                 "--catalog",
                 ml_instance.catalog_id,
+                "--kernel",
+                "test_kernel",
                 "--log-output",
             ],
             capture_output=True,

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/execution/workflow-test.ipynb RENAMED Viewed

@@ -18,9 +18,7 @@
    "outputs": [],
    "source": [
     "from deriva_ml import DerivaML, MLVocab as vc\n",
-    "import os\n",
-    "import logging\n",
-    "logger = logging.getLogger()"
+    "import os"
    ]
   },
   {
@@ -42,8 +40,8 @@
    },
    "outputs": [],
    "source": [
-    "hostname = None\n",
-    "catalog_id = None"
+    "host = None\n",
+    "catalog = None"
    ]
   },
   {
@@ -54,24 +52,21 @@
    "outputs": [],
    "source": [
     "# Modify these to your desired server and catalog.\n",
-    "hostname = hostname or os.environ.get(\"DERIVA_HOST\")\n",
-    "catalog_id = catalog_id or os.environ.get(\"DERIVA_CATALOG_ID\")\n",
+    "host = host or os.environ.get(\"DERIVA_HOST\")\n",
+    "catalog = catalog or os.environ.get(\"DERIVA_CATALOG\")\n",
     "\n",
     "# Change this line to call the domain specific class derived from DerivaML\n",
-    "ml_instance = DerivaML(hostname, catalog_id)\n",
-    "logger.info(\"Got ML instance:\")\n",
+    "ml_instance = DerivaML(host, catalog)\n",
     "\n",
     "ml_instance.add_term(vc.asset_type, \"Test Model\", description=\"Model for our Test workflow\")\n",
     "ml_instance.add_term(vc.workflow_type, \"Test Workflow\", description=\"A ML Workflow that uses Deriva ML API\")\n",
-    "print(\"Added terms to ML instance\")\n",
+    "\n",
     "api_workflow = ml_instance.create_workflow(\n",
-    "    name=\"Test Workflow One\",\n",
-    "    workflow_type=\"Test Workflow\",\n",
-    "    description=\"A test operation\",\n",
-    ")\n",
-    "logger.info(f\"URL: {api_workflow.url}\")\n",
-    "rid = ml_instance.add_workflow(api_workflow)\n",
-    "logger.info(f\"RID {rid}\")\n"
+    "     name=\"Test Workflow One\",\n",
+    "     workflow_type=\"Test Workflow\",\n",
+    "     description=\"A test operation\",\n",
+    " )\n",
+    "rid = ml_instance.add_workflow(api_workflow)"
    ]
   }
  ],

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/execution/workflow-test.py RENAMED Viewed

@@ -6,7 +6,6 @@ from deriva_ml import MLVocab as vc
 hostname = sys.argv[1]
 catalog_id = sys.argv[2]
 ml_instance = DerivaML(hostname, catalog_id)
 ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/feature/test_features.py RENAMED Viewed

@@ -2,9 +2,8 @@
 Tests for feature functionality.
 """
-from unittest.mock import Mock
 import pytest
+from pydantic import ValidationError
 from deriva_ml import (
     BuiltinTypes,
@@ -21,14 +20,14 @@ from deriva_ml.feature import FeatureRecord
 class TestFeatureRecord:
     """Test cases for the FeatureRecord base class."""
-    def test_feature_record_creation(self):
+    def test_feature_record_creation(self, mocker):
         """Test basic FeatureRecord creation."""
         # Create a mock feature
-        mock_feature = Mock()
-        mock_feature.feature_columns = {Mock(name="value"), Mock(name="confidence")}
-        mock_feature.asset_columns = {Mock(name="image_file")}
-        mock_feature.term_columns = {Mock(name="category")}
-        mock_feature.value_columns = {Mock(name="score")}
+        mock_feature = mocker.Mock()
+        mock_feature.feature_columns = {mocker.Mock(name="value"), mocker.Mock(name="confidence")}
+        mock_feature.asset_columns = {mocker.Mock(name="image_file")}
+        mock_feature.term_columns = {mocker.Mock(name="category")}
+        mock_feature.value_columns = {mocker.Mock(name="score")}
         # Create a test class that inherits from FeatureRecord
         class TestFeature(FeatureRecord):
@@ -58,17 +57,17 @@ class TestFeatureRecord:
         assert record.category == "good"
         assert record.score == 0.8
-    def test_feature_record_column_methods(self):
+    def test_feature_record_column_methods(self, mocker):
         """Test the column access methods of FeatureRecord."""
         # Create mock columns
-        value_col = Mock(name="value")
-        confidence_col = Mock(name="confidence")
-        asset_col = Mock(name="image_file")
-        term_col = Mock(name="category")
-        value_only_col = Mock(name="score")
+        value_col = mocker.Mock(name="value")
+        confidence_col = mocker.Mock(name="confidence")
+        asset_col = mocker.Mock(name="image_file")
+        term_col = mocker.Mock(name="category")
+        value_only_col = mocker.Mock(name="score")
         # Create a mock feature
-        mock_feature = Mock()
+        mock_feature = mocker.Mock()
         mock_feature.feature_columns = {value_col, confidence_col, asset_col, term_col, value_only_col}
         mock_feature.asset_columns = {asset_col}
         mock_feature.term_columns = {term_col}
@@ -141,6 +140,22 @@ class TestFeatures:
         with pytest.raises(DerivaMLException):
             ml_instance.lookup_feature("Subject", "SubjectHealth1")
+    def test_feature_record(self, dataset_test, tmp_path):
+        ml_instance = DerivaML(
+            dataset_test.catalog.hostname, dataset_test.catalog.catalog_id, working_dir=tmp_path, use_minid=False
+        )
+        SubjectHealthFeature = ml_instance.feature_record_class("Subject", "Health")
+        print(SubjectHealthFeature.model_fields.keys())
+        print(SubjectHealthFeature.feature_columns())
+        with pytest.raises(ValidationError):
+            SubjectHealthFeature(Subject="SubjectRID", Health="Good", Scale=23, Foo="Bar")
+        print(SubjectHealthFeature.value_columns())
+        print(SubjectHealthFeature.term_columns())
+        print(SubjectHealthFeature.asset_columns())
+        print(SubjectHealthFeature.feature_columns())
     def test_add_feature(self, dataset_test, tmp_path):
         ml_instance = DerivaML(
             dataset_test.catalog.hostname, dataset_test.catalog.catalog_id, working_dir=tmp_path, use_minid=False
@@ -166,7 +181,8 @@ class TestFeatures:
         with feature_execution.execute() as exe:
             SubjectHealthFeature = ml_instance.feature_record_class("Subject", "Health")
-            exe.add_features([SubjectHealthFeature(Subject=subject_rids[0], Health="Good", Scale=23)])
+            print(SubjectHealthFeature.feature_columns())
+            exe.add_features([SubjectHealthFeature(Subject=subject_rids[0], SubjectHealth="Sick", Scale=23)])
         feature_execution.upload_execution_outputs()
         features = list(ml_instance.list_feature_values("Subject", "Health"))

{deriva_ml-1.14.32 → deriva_ml-1.14.34}/tests/test_utils.py RENAMED Viewed

@@ -1,8 +1,12 @@
+import os
+import shutil
 from tempfile import TemporaryDirectory
 from urllib.parse import quote as urlquote
 from demo_catalog import create_demo_features
 from deriva.core.datapath import DataPathException
+from ipykernel.kernelspec import install
+from jupyter_client.kernelspec import KernelSpecManager
 from deriva_ml import DerivaML
 from deriva_ml.demo_catalog import (
@@ -42,7 +46,14 @@ class MLCatalog:
         pb = self.catalog.getPathBuilder()
         ml_path = pb.schemas["deriva-ml"]
         domain_path = pb.schemas[self.domain_schema]
-        for t in ["Dataset_Execution", "Dataset_Version", "Dataset_Dataset", "Workflow", "Workflow_Execution"]:
+        for t in [
+            "Dataset_Execution",
+            "Dataset_Version",
+            "Dataset_Dataset",
+            "Execution",
+            "Workflow_Execution",
+            "Workflow",
+        ]:
             try:
                 ml_path.tables[t].path.delete()
             except DataPathException:
@@ -110,3 +121,60 @@ class MLDatasetCatalog:
         with TemporaryDirectory() as tmp_dir:
             ml_instance = DerivaML(self.catalog.hostname, self.catalog.catalog_id, working_dir=tmp_dir, use_minid=False)
             self.dataset_description: DatasetDescription = create_demo_datasets(ml_instance)
+def create_jupyter_kernel(name: str, kernel_dir, display_name: str = None, user: bool = True) -> None:
+    """
+    Create and install a Jupyter kernel spec using ipykernel.
+    Parameters
+    ----------
+    name : str
+        The internal name of the kernel (used in `--kernel`).
+    display_name : str, optional
+        The label shown in Jupyter’s kernel chooser (defaults to name).
+    user : bool, default=True
+        If True, install for the current user only.
+        If False, requires admin rights (system-wide).
+    """
+    if display_name is None:
+        display_name = name
+    os.environ["JUPYTER_PATH"] = f"{kernel_dir}/share/jupyter"
+    print(f"Installing Jupyter kernel '{name}' with display name '{display_name}'")
+    install(
+        kernel_name=name,
+        display_name=display_name,
+        prefix=kernel_dir,  # ensures it uses the current environment
+    )
+    print("✅ Kernel installed successfully.")
+def destroy_jupyter_kernel(name: str, user: bool = True) -> None:
+    """
+    Remove a Jupyter kernel spec by name.
+    Parameters
+    ----------
+    name : str
+        The internal kernel name (the same name used in create_jupyter_kernel).
+    user : bool, default=True
+        If True, remove from the user-level kernels directory.
+        If False, attempt system-wide removal (requires permissions).
+    """
+    ksm = KernelSpecManager()
+    kernels = ksm.find_kernel_specs()
+    if name not in kernels:
+        print(f"❌ Kernel '{name}' not found.")
+        return
+    kernel_path = kernels[name]
+    print(f"Removing kernel '{name}' at {kernel_path}")
+    try:
+        shutil.rmtree(kernel_path)
+        print(f"✅ Kernel '{name}' removed successfully.")
+    except Exception as e:
+        print(f"⚠️ Failed to remove kernel '{name}': {e}")

deriva-ml 1.14.32__tar.gz → 1.14.34__tar.gz

deriva-ml 1.14.32tar.gz → 1.14.34tar.gz