PyPI - deriva-ml - Versions diffs - 1.8.4__py3-none-any.whl → 1.8.5__py3-none-any.whl - Mend

deriva-ml 1.8.4py3-none-any.whl → 1.8.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

deriva_ml/VERSION.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.8.4"
1	+ __version__ = "1.8.5"

deriva_ml/__init__.py CHANGED Viewed

@@ -4,6 +4,7 @@ __all__ = [
     "FileUploadState",
     "FileSpec",
     "ExecutionConfiguration",
+    "Execution",
     "Workflow",
     "DatasetBag",
     "DatasetVersion",
@@ -39,4 +40,4 @@ from .execution_configuration import (
     ExecutionConfiguration,
     Workflow,
 )
+from .execution import Execution

deriva_ml/deriva_ml_base.py CHANGED Viewed

@@ -20,6 +20,7 @@ from pathlib import Path
 import requests
 from setuptools_git_versioning import get_latest_file_commit
 import subprocess
+import shutil
 from typing import Optional, Any, Iterable, TYPE_CHECKING
 from deriva.core import (
     ErmrestCatalog,
@@ -30,6 +31,7 @@ from deriva.core import (
 )
 import deriva.core.datapath as datapath
 from deriva.core.datapath import DataPathException
+from deriva.core.deriva_server import DerivaServer
 from deriva.core.ermrest_catalog import ResolveRidResult
 from deriva.core.ermrest_model import Key, Table
 from deriva.core.hatrac_store import HatracStore
@@ -115,13 +117,13 @@ class DerivaML(Dataset):
             model_version: A string that indicates the version model.  Typically passed in via
         """
         self.credential = get_credential(hostname)
-        self.catalog = ErmrestCatalog(
+        server = DerivaServer(
             "https",
             hostname,
-            catalog_id,
-            self.credential,
+            credentials=self.credential,
             session_config=self._get_session_config(),
         )
+        self.catalog = server.connect_ermrest(catalog_id)
         self.model = DerivaModel(
             self.catalog.getCatalogModel(), domain_schema=domain_schema
         )
@@ -157,7 +159,19 @@ class DerivaML(Dataset):
             # Check if running in Jupyter's ZMQ kernel (used by notebooks)
             if ipython is not None and "IPKernelApp" in ipython.config:
                 self._notebook = Path(ipython.user_ns.get("__session__"))
-            # Check if running in Jupyter's ZMQ kernel (used by notebooks)
+                # Check if running in Jupyter's ZMQ kernel (used by notebooks)
+                try:
+                    if subprocess.run(
+                        [shutil.which("nbstripout"), "--is-installed"],
+                        check=False,
+                        capture_output=True,
+                    ).returncode:
+                        self._logger.warn(
+                            "nbstripout is not installed in repository. Please run nbstripout --install"
+                        )
+                except subprocess.CalledProcessError:
+                    self._logger.error("nbstripout is not found.")
         except (ImportError, AttributeError):
             pass
@@ -1001,16 +1015,16 @@ class DerivaML(Dataset):
     ) -> RID:
         """Identify current executing program and return a workflow RID for it
-        Determane the notebook of script that is currently being executed. Assume that  this is
+        Determine the notebook or script that is currently being executed. Assume that  this is
         being executed from a cloned GitHub repository.  Determine the remote repository name for
-        this object.  Then either retrieve an existing workflow for this executable of create
+        this object.  Then either retrieve an existing workflow for this executable or create
         a new one.
         Args:
             name: The name of the workflow.
             workflow_type: The type of the workflow.
             description: The description of the workflow.
-            create: Whether or not to create a new workflow.
+            create: Whether to create a new workflow.
         """
         # Make sure type is correct.
         self.lookup_term(MLVocab.workflow_type, workflow_type)
@@ -1045,14 +1059,14 @@ class DerivaML(Dataset):
         )
         return self.add_workflow(workflow) if create else None
-    def _github_url(self) -> tuple[str, str, bool]:
+    def _github_url(self) -> tuple[Path, str, bool]:
         """Return a GitHUB URL for the latest commit of the script from which this routine is called.
         This routine is used to be called from a script or notebook (e.g. python -m file). It assumes that
         the file is in a gitHUB repository and commited.  It returns a URL to the last commited version of this
         file in GitHUB.
-        Returns: A tuple with the filename, gethub_url and a boolaen to indicated if uncommited changes
+        Returns: A tuple with the filename, gethub_url and a boolean to indicated if uncommited changes
             have been made to the file.
         """
@@ -1098,7 +1112,7 @@ class DerivaML(Dataset):
                 check=True,
             )
             is_dirty = bool(
-                " M " in result.stdout.strip()
+                "M " in result.stdout.strip()
             )  # Returns True if output indicates a modified file
         except subprocess.CalledProcessError:
             is_dirty = False  # If Git command fails, assume no changes

deriva_ml/execution.py CHANGED Viewed

@@ -254,7 +254,7 @@ class Execution:
     def _create_notebook_checkpoint(self):
         """Trigger a checkpoint creation using Jupyter's API."""
         notebook_name = self._ml_object._notebook
-        servers = list_running_servers()
         # Look for the server running this notebook.
         root = Path("").absolute().parent.as_posix()
         servers = list(list_running_servers())

{deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: deriva-ml
-Version: 1.8.4
+Version: 1.8.5
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10
@@ -13,6 +13,7 @@ Requires-Dist: pydantic>=2.10.6
 Requires-Dist: semver>3.0.0
 Requires-Dist: setuptools-git-versioning<3,>=2.0
 Requires-Dist: nbstripout
+Dynamic: license-file
 Deriva-ML is a python libary to simplify the process of creating and executing reproducible machine learning workflows
 using a deriva catalog.

{deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,14 @@
-deriva_ml/VERSION.py,sha256=8kdJa8mgK7VES73y02oBbzwoXZCUs42GzbJ4UU-L_3I,22
-deriva_ml/__init__.py,sha256=0PHNB8gRDALLtaffRmU7wCUgWbRHVQZcjuPJxMLNEco,856
+deriva_ml/VERSION.py,sha256=Dtbi4ISKI_kkTsaWuM-q8NfE3DySQu91TTP04Yhd8d8,22
+deriva_ml/__init__.py,sha256=DyHiqklSer7q7oPGAemkzg5Qcq2swMZf9ALwJhGf6Jo,905
 deriva_ml/database_model.py,sha256=uhoyVyd8MQmY8J9ovCH8fjxhZDxxXNkdJyYdeyEGPXA,13898
 deriva_ml/dataset.py,sha256=xC6QPUp4MZcJiEnOEU3NnzoLBL9RcJWtPTyzIQP0Ivw,60666
 deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
 deriva_ml/dataset_bag.py,sha256=e6IHv3saZUnZRfl0EjfnlV2NnmPeOagYYv3PuZqS1l0,11501
 deriva_ml/demo_catalog.py,sha256=xQPhFlflqwJskNQrQ-jdBSnGzBm2-aONBgcRxfsdNKM,11045
 deriva_ml/deriva_definitions.py,sha256=pZLPoUxiuJ-uGglmQ6sF9oVXsSUuOnPEqywoec78XNM,8893
-deriva_ml/deriva_ml_base.py,sha256=3iA1OaPU-6Q7ixt87uDmPuHHZ5P-FyHvX0AKfi4tKp0,42224
-deriva_ml/deriva_ml_execute.py,sha256=y_rGjc97eidBuzy-AaQGe93vuTbWbkNkK9rpReqV0IY,4433
+deriva_ml/deriva_ml_base.py,sha256=e2UtT3TlDpFQrG6z0DaB2iV22wmi4TLP7qXF3hvb8to,42868
 deriva_ml/deriva_model.py,sha256=LV3FjIhIlz13ckZSmu0aOJhT9EVE0-M9oVMudfkxb0g,12004
-deriva_ml/execution.py,sha256=c7dbk4HvEh7E4BLlBrf_azUxxhRSUmLQa_6G8t8OKVY,29929
+deriva_ml/execution.py,sha256=VlapQGPDQI2MOmYnA5-hpf-XM6Fu4hPLpFjNN5q9Udo,29889
 deriva_ml/execution_configuration.py,sha256=bjnZwXN6M7YPy5dFQwoGEBU8YjhQRSe1FW0rL0V9TaM,3422
 deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
 deriva_ml/feature.py,sha256=7e8WYPCfJSrGxJh9oUTduYSnB5ekybRhXa_0HIigS_w,5459
@@ -27,9 +26,9 @@ deriva_ml/schema_setup/annotations.py,sha256=Uogm9YkRtoKSdgfQlICqRywbCATppwBO-Xr
 deriva_ml/schema_setup/create_schema.py,sha256=jwziMWJPbjRgjiRBT-KtidnXI8YNEFO74A9fwfptjHY,10626
 deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
 deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
-deriva_ml-1.8.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-deriva_ml-1.8.4.dist-info/METADATA,sha256=F14U7NvY310NBB4wGp3-OVmAUXvMy_sDNuS1ZmRjwek,631
-deriva_ml-1.8.4.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
-deriva_ml-1.8.4.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
-deriva_ml-1.8.4.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
-deriva_ml-1.8.4.dist-info/RECORD,,
+deriva_ml-1.8.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+deriva_ml-1.8.5.dist-info/METADATA,sha256=uuAq67MUZyY2LP8NbZ8RMJ5q-aX3pJV5ioYZqqLbuFA,653
+deriva_ml-1.8.5.dist-info/WHEEL,sha256=tTnHoFhvKQHCh4jz3yCn0WPTYIy7wXx3CJtJ7SJGV7c,91
+deriva_ml-1.8.5.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
+deriva_ml-1.8.5.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
+deriva_ml-1.8.5.dist-info/RECORD,,

{deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (76.1.0)
+Generator: setuptools (77.0.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

deriva_ml/deriva_ml_execute.py DELETED Viewed

@@ -1,104 +0,0 @@
-from sympy import cxxcode
-from  deriva_ml import DerivaML, execution_configuration
-def execute(host, catalog, script):
-    workflow_rid = foobar
-    execution_configuration = cxxcode(
-    )
-    ml_instance = DerivaML()
-    ml_instance.create_execution(configuration)
-    script
-from deriva_ml import DerivaML, ExecutionConfiguration, DatasetSpec, RID, DerivaMLException
-import os
-import sys
-import json
-import traceback
-import argparse
-import requests
-from requests.exceptions import HTTPError, ConnectionError
-from deriva.transfer import GenericDownloader
-from deriva.transfer.download import DerivaDownloadError, DerivaDownloadConfigurationError, \
-    DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, \
-    DerivaDownloadBaggingError
-from deriva.core import BaseCLI, KeyValuePairArgs, format_credential, format_exception, urlparse
-class DerivaMLExecCLI(BaseCLI):
-    def __init__(self, description, epilog, **kwargs):
-        BaseCLI.__init__(self, description, epilog, **kwargs)
-        self.parser.add_argument("--catalog", default=1, metavar="<1>", help="Catalog number. Default: 1")
-        self.parser.add_argument("--timeout", metavar="<seconds>",
-                                 help="Total number of seconds elapsed before the download is aborted.")
-        self.parser.add_argument("output_dir", metavar="<output dir>", help="Path to an output directory.")
-        self.parser.add_argument("envars", metavar="[key=value key=value ...]",
-                                 nargs=argparse.REMAINDER, action=KeyValuePairArgs, default={},
-                                 help="Variable length of whitespace-delimited key=value pair arguments used for "
-                                      "string interpolation in specific parts of the configuration file. "
-                                      "For example: key1=value1 key2=value2")
-    def main(self):
-        try:
-            args = self.parse_cli()
-        except ValueError as e:
-            sys.stderr.write(str(e))
-            return 2
-        if not args.quiet:
-            sys.stderr.write("\n")
-        try:
-            try:
-                ml_instance = DerivaML(args.hostname, args.catalog)
-                downloaded = self.execute()
-                sys.stdout.write("\n%s\n" % (json.dumps(downloaded)))
-            except ConnectionError as e:
-                raise DerivaDownloadError("Connection error occurred. %s" % format_exception(e))
-            except HTTPError as e:
-                if e.response.status_code == requests.codes.unauthorized:
-                    raise DerivaDownloadAuthenticationError(
-                        "The requested service requires authentication and a valid login session could "
-                        "not be found for the specified host. Server responded: %s" % e)
-                elif e.response.status_code == requests.codes.forbidden:
-                    raise DerivaDownloadAuthorizationError(
-                        "A requested operation was forbidden. Server responded: %s" % e)
-        except (DerivaDownloadError, DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError,
-                DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, DerivaDownloadBaggingError) as e:
-            sys.stderr.write(("\n" if not args.quiet else "") + format_exception(e))
-            if args.debug:
-                traceback.print_exc()
-            return 1
-        except:
-            sys.stderr.write("An unexpected error occurred.")
-            traceback.print_exc()
-            return 1
-        finally:
-            if not args.quiet:
-                sys.stderr.write("\n\n")
-        return 0
-def do_stuff():
-    pass
-def main(datasets: list[RID], model: list[RID], hostname: str, catalog_id: str):
-    my_url = DerivaML.github_url()
-    ml_instance = DerivaML(hostname, catalog_id)
-    ml_instance.lookup_workflow(my_url)
-    config = ExecutionConfiguration(
-        datasets=[DatasetSpec(rid=dataset,
-                              version=ml_instance.dataset_version(dataset)) for dataset in datasets],
-        assets=model,
-        workflow= ml_instance.lookup_workflow(my_url)
-    )
-    execution = ml_instance.create_execution(config)
-    with execution as e:
-        do_stuff()
-    execution.upload_execution_outputs()
-if __name__ == "__main__":
-    main(datasets, model, hostname, catalog_id)
-if __file__ == matplotlib_inline

{deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

deriva-ml 1.8.4__py3-none-any.whl → 1.8.5__py3-none-any.whl

deriva-ml 1.8.4py3-none-any.whl → 1.8.5py3-none-any.whl