deriva-ml 1.8.4__py3-none-any.whl → 1.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deriva_ml/VERSION.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.8.4"
1
+ __version__ = "1.8.5"
deriva_ml/__init__.py CHANGED
@@ -4,6 +4,7 @@ __all__ = [
4
4
  "FileUploadState",
5
5
  "FileSpec",
6
6
  "ExecutionConfiguration",
7
+ "Execution",
7
8
  "Workflow",
8
9
  "DatasetBag",
9
10
  "DatasetVersion",
@@ -39,4 +40,4 @@ from .execution_configuration import (
39
40
  ExecutionConfiguration,
40
41
  Workflow,
41
42
  )
42
-
43
+ from .execution import Execution
@@ -20,6 +20,7 @@ from pathlib import Path
20
20
  import requests
21
21
  from setuptools_git_versioning import get_latest_file_commit
22
22
  import subprocess
23
+ import shutil
23
24
  from typing import Optional, Any, Iterable, TYPE_CHECKING
24
25
  from deriva.core import (
25
26
  ErmrestCatalog,
@@ -30,6 +31,7 @@ from deriva.core import (
30
31
  )
31
32
  import deriva.core.datapath as datapath
32
33
  from deriva.core.datapath import DataPathException
34
+ from deriva.core.deriva_server import DerivaServer
33
35
  from deriva.core.ermrest_catalog import ResolveRidResult
34
36
  from deriva.core.ermrest_model import Key, Table
35
37
  from deriva.core.hatrac_store import HatracStore
@@ -115,13 +117,13 @@ class DerivaML(Dataset):
115
117
  model_version: A string that indicates the version model. Typically passed in via
116
118
  """
117
119
  self.credential = get_credential(hostname)
118
- self.catalog = ErmrestCatalog(
120
+ server = DerivaServer(
119
121
  "https",
120
122
  hostname,
121
- catalog_id,
122
- self.credential,
123
+ credentials=self.credential,
123
124
  session_config=self._get_session_config(),
124
125
  )
126
+ self.catalog = server.connect_ermrest(catalog_id)
125
127
  self.model = DerivaModel(
126
128
  self.catalog.getCatalogModel(), domain_schema=domain_schema
127
129
  )
@@ -157,7 +159,19 @@ class DerivaML(Dataset):
157
159
  # Check if running in Jupyter's ZMQ kernel (used by notebooks)
158
160
  if ipython is not None and "IPKernelApp" in ipython.config:
159
161
  self._notebook = Path(ipython.user_ns.get("__session__"))
160
- # Check if running in Jupyter's ZMQ kernel (used by notebooks)
162
+ # Check if running in Jupyter's ZMQ kernel (used by notebooks)
163
+ try:
164
+ if subprocess.run(
165
+ [shutil.which("nbstripout"), "--is-installed"],
166
+ check=False,
167
+ capture_output=True,
168
+ ).returncode:
169
+ self._logger.warn(
170
+ "nbstripout is not installed in repository. Please run nbstripout --install"
171
+ )
172
+ except subprocess.CalledProcessError:
173
+ self._logger.error("nbstripout is not found.")
174
+
161
175
  except (ImportError, AttributeError):
162
176
  pass
163
177
 
@@ -1001,16 +1015,16 @@ class DerivaML(Dataset):
1001
1015
  ) -> RID:
1002
1016
  """Identify current executing program and return a workflow RID for it
1003
1017
 
1004
- Determane the notebook of script that is currently being executed. Assume that this is
1018
+ Determine the notebook or script that is currently being executed. Assume that this is
1005
1019
  being executed from a cloned GitHub repository. Determine the remote repository name for
1006
- this object. Then either retrieve an existing workflow for this executable of create
1020
+ this object. Then either retrieve an existing workflow for this executable or create
1007
1021
  a new one.
1008
1022
 
1009
1023
  Args:
1010
1024
  name: The name of the workflow.
1011
1025
  workflow_type: The type of the workflow.
1012
1026
  description: The description of the workflow.
1013
- create: Whether or not to create a new workflow.
1027
+ create: Whether to create a new workflow.
1014
1028
  """
1015
1029
  # Make sure type is correct.
1016
1030
  self.lookup_term(MLVocab.workflow_type, workflow_type)
@@ -1045,14 +1059,14 @@ class DerivaML(Dataset):
1045
1059
  )
1046
1060
  return self.add_workflow(workflow) if create else None
1047
1061
 
1048
- def _github_url(self) -> tuple[str, str, bool]:
1062
+ def _github_url(self) -> tuple[Path, str, bool]:
1049
1063
  """Return a GitHUB URL for the latest commit of the script from which this routine is called.
1050
1064
 
1051
1065
  This routine is used to be called from a script or notebook (e.g. python -m file). It assumes that
1052
1066
  the file is in a gitHUB repository and commited. It returns a URL to the last commited version of this
1053
1067
  file in GitHUB.
1054
1068
 
1055
- Returns: A tuple with the filename, gethub_url and a boolaen to indicated if uncommited changes
1069
+ Returns: A tuple with the filename, gethub_url and a boolean to indicated if uncommited changes
1056
1070
  have been made to the file.
1057
1071
 
1058
1072
  """
@@ -1098,7 +1112,7 @@ class DerivaML(Dataset):
1098
1112
  check=True,
1099
1113
  )
1100
1114
  is_dirty = bool(
1101
- " M " in result.stdout.strip()
1115
+ "M " in result.stdout.strip()
1102
1116
  ) # Returns True if output indicates a modified file
1103
1117
  except subprocess.CalledProcessError:
1104
1118
  is_dirty = False # If Git command fails, assume no changes
deriva_ml/execution.py CHANGED
@@ -254,7 +254,7 @@ class Execution:
254
254
  def _create_notebook_checkpoint(self):
255
255
  """Trigger a checkpoint creation using Jupyter's API."""
256
256
  notebook_name = self._ml_object._notebook
257
- servers = list_running_servers()
257
+
258
258
  # Look for the server running this notebook.
259
259
  root = Path("").absolute().parent.as_posix()
260
260
  servers = list(list_running_servers())
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.8.4
3
+ Version: 1.8.5
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -13,6 +13,7 @@ Requires-Dist: pydantic>=2.10.6
13
13
  Requires-Dist: semver>3.0.0
14
14
  Requires-Dist: setuptools-git-versioning<3,>=2.0
15
15
  Requires-Dist: nbstripout
16
+ Dynamic: license-file
16
17
 
17
18
  Deriva-ML is a python libary to simplify the process of creating and executing reproducible machine learning workflows
18
19
  using a deriva catalog.
@@ -1,15 +1,14 @@
1
- deriva_ml/VERSION.py,sha256=8kdJa8mgK7VES73y02oBbzwoXZCUs42GzbJ4UU-L_3I,22
2
- deriva_ml/__init__.py,sha256=0PHNB8gRDALLtaffRmU7wCUgWbRHVQZcjuPJxMLNEco,856
1
+ deriva_ml/VERSION.py,sha256=Dtbi4ISKI_kkTsaWuM-q8NfE3DySQu91TTP04Yhd8d8,22
2
+ deriva_ml/__init__.py,sha256=DyHiqklSer7q7oPGAemkzg5Qcq2swMZf9ALwJhGf6Jo,905
3
3
  deriva_ml/database_model.py,sha256=uhoyVyd8MQmY8J9ovCH8fjxhZDxxXNkdJyYdeyEGPXA,13898
4
4
  deriva_ml/dataset.py,sha256=xC6QPUp4MZcJiEnOEU3NnzoLBL9RcJWtPTyzIQP0Ivw,60666
5
5
  deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
6
6
  deriva_ml/dataset_bag.py,sha256=e6IHv3saZUnZRfl0EjfnlV2NnmPeOagYYv3PuZqS1l0,11501
7
7
  deriva_ml/demo_catalog.py,sha256=xQPhFlflqwJskNQrQ-jdBSnGzBm2-aONBgcRxfsdNKM,11045
8
8
  deriva_ml/deriva_definitions.py,sha256=pZLPoUxiuJ-uGglmQ6sF9oVXsSUuOnPEqywoec78XNM,8893
9
- deriva_ml/deriva_ml_base.py,sha256=3iA1OaPU-6Q7ixt87uDmPuHHZ5P-FyHvX0AKfi4tKp0,42224
10
- deriva_ml/deriva_ml_execute.py,sha256=y_rGjc97eidBuzy-AaQGe93vuTbWbkNkK9rpReqV0IY,4433
9
+ deriva_ml/deriva_ml_base.py,sha256=e2UtT3TlDpFQrG6z0DaB2iV22wmi4TLP7qXF3hvb8to,42868
11
10
  deriva_ml/deriva_model.py,sha256=LV3FjIhIlz13ckZSmu0aOJhT9EVE0-M9oVMudfkxb0g,12004
12
- deriva_ml/execution.py,sha256=c7dbk4HvEh7E4BLlBrf_azUxxhRSUmLQa_6G8t8OKVY,29929
11
+ deriva_ml/execution.py,sha256=VlapQGPDQI2MOmYnA5-hpf-XM6Fu4hPLpFjNN5q9Udo,29889
13
12
  deriva_ml/execution_configuration.py,sha256=bjnZwXN6M7YPy5dFQwoGEBU8YjhQRSe1FW0rL0V9TaM,3422
14
13
  deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
15
14
  deriva_ml/feature.py,sha256=7e8WYPCfJSrGxJh9oUTduYSnB5ekybRhXa_0HIigS_w,5459
@@ -27,9 +26,9 @@ deriva_ml/schema_setup/annotations.py,sha256=Uogm9YkRtoKSdgfQlICqRywbCATppwBO-Xr
27
26
  deriva_ml/schema_setup/create_schema.py,sha256=jwziMWJPbjRgjiRBT-KtidnXI8YNEFO74A9fwfptjHY,10626
28
27
  deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
29
28
  deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
30
- deriva_ml-1.8.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- deriva_ml-1.8.4.dist-info/METADATA,sha256=F14U7NvY310NBB4wGp3-OVmAUXvMy_sDNuS1ZmRjwek,631
32
- deriva_ml-1.8.4.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
33
- deriva_ml-1.8.4.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
34
- deriva_ml-1.8.4.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
35
- deriva_ml-1.8.4.dist-info/RECORD,,
29
+ deriva_ml-1.8.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
30
+ deriva_ml-1.8.5.dist-info/METADATA,sha256=uuAq67MUZyY2LP8NbZ8RMJ5q-aX3pJV5ioYZqqLbuFA,653
31
+ deriva_ml-1.8.5.dist-info/WHEEL,sha256=tTnHoFhvKQHCh4jz3yCn0WPTYIy7wXx3CJtJ7SJGV7c,91
32
+ deriva_ml-1.8.5.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
33
+ deriva_ml-1.8.5.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
34
+ deriva_ml-1.8.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.1.0)
2
+ Generator: setuptools (77.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,104 +0,0 @@
1
- from sympy import cxxcode
2
-
3
- from deriva_ml import DerivaML, execution_configuration
4
-
5
- def execute(host, catalog, script):
6
- workflow_rid = foobar
7
- execution_configuration = cxxcode(
8
-
9
- )
10
- ml_instance = DerivaML()
11
- ml_instance.create_execution(configuration)
12
- script
13
-
14
-
15
- from deriva_ml import DerivaML, ExecutionConfiguration, DatasetSpec, RID, DerivaMLException
16
- import os
17
- import sys
18
- import json
19
- import traceback
20
- import argparse
21
- import requests
22
- from requests.exceptions import HTTPError, ConnectionError
23
- from deriva.transfer import GenericDownloader
24
- from deriva.transfer.download import DerivaDownloadError, DerivaDownloadConfigurationError, \
25
- DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, \
26
- DerivaDownloadBaggingError
27
- from deriva.core import BaseCLI, KeyValuePairArgs, format_credential, format_exception, urlparse
28
-
29
-
30
- class DerivaMLExecCLI(BaseCLI):
31
- def __init__(self, description, epilog, **kwargs):
32
-
33
- BaseCLI.__init__(self, description, epilog, **kwargs)
34
- self.parser.add_argument("--catalog", default=1, metavar="<1>", help="Catalog number. Default: 1")
35
- self.parser.add_argument("--timeout", metavar="<seconds>",
36
- help="Total number of seconds elapsed before the download is aborted.")
37
- self.parser.add_argument("output_dir", metavar="<output dir>", help="Path to an output directory.")
38
- self.parser.add_argument("envars", metavar="[key=value key=value ...]",
39
- nargs=argparse.REMAINDER, action=KeyValuePairArgs, default={},
40
- help="Variable length of whitespace-delimited key=value pair arguments used for "
41
- "string interpolation in specific parts of the configuration file. "
42
- "For example: key1=value1 key2=value2")
43
-
44
- def main(self):
45
- try:
46
- args = self.parse_cli()
47
- except ValueError as e:
48
- sys.stderr.write(str(e))
49
- return 2
50
- if not args.quiet:
51
- sys.stderr.write("\n")
52
-
53
- try:
54
- try:
55
- ml_instance = DerivaML(args.hostname, args.catalog)
56
- downloaded = self.execute()
57
- sys.stdout.write("\n%s\n" % (json.dumps(downloaded)))
58
- except ConnectionError as e:
59
- raise DerivaDownloadError("Connection error occurred. %s" % format_exception(e))
60
- except HTTPError as e:
61
- if e.response.status_code == requests.codes.unauthorized:
62
- raise DerivaDownloadAuthenticationError(
63
- "The requested service requires authentication and a valid login session could "
64
- "not be found for the specified host. Server responded: %s" % e)
65
- elif e.response.status_code == requests.codes.forbidden:
66
- raise DerivaDownloadAuthorizationError(
67
- "A requested operation was forbidden. Server responded: %s" % e)
68
- except (DerivaDownloadError, DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError,
69
- DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, DerivaDownloadBaggingError) as e:
70
- sys.stderr.write(("\n" if not args.quiet else "") + format_exception(e))
71
- if args.debug:
72
- traceback.print_exc()
73
- return 1
74
- except:
75
- sys.stderr.write("An unexpected error occurred.")
76
- traceback.print_exc()
77
- return 1
78
- finally:
79
- if not args.quiet:
80
- sys.stderr.write("\n\n")
81
- return 0
82
-
83
-
84
- def do_stuff():
85
- pass
86
-
87
- def main(datasets: list[RID], model: list[RID], hostname: str, catalog_id: str):
88
- my_url = DerivaML.github_url()
89
- ml_instance = DerivaML(hostname, catalog_id)
90
- ml_instance.lookup_workflow(my_url)
91
- config = ExecutionConfiguration(
92
- datasets=[DatasetSpec(rid=dataset,
93
- version=ml_instance.dataset_version(dataset)) for dataset in datasets],
94
- assets=model,
95
- workflow= ml_instance.lookup_workflow(my_url)
96
- )
97
- execution = ml_instance.create_execution(config)
98
- with execution as e:
99
- do_stuff()
100
- execution.upload_execution_outputs()
101
-
102
- if __name__ == "__main__":
103
- main(datasets, model, hostname, catalog_id)
104
- if __file__ == matplotlib_inline