deriva-ml 1.8.4__py3-none-any.whl → 1.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/VERSION.py +1 -1
- deriva_ml/__init__.py +2 -1
- deriva_ml/deriva_ml_base.py +24 -10
- deriva_ml/execution.py +1 -1
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info}/METADATA +3 -2
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info}/RECORD +10 -11
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info}/WHEEL +1 -1
- deriva_ml/deriva_ml_execute.py +0 -104
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info}/entry_points.txt +0 -0
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info/licenses}/LICENSE +0 -0
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.5.dist-info}/top_level.txt +0 -0
deriva_ml/VERSION.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.8.
|
|
1
|
+
__version__ = "1.8.5"
|
deriva_ml/__init__.py
CHANGED
|
@@ -4,6 +4,7 @@ __all__ = [
|
|
|
4
4
|
"FileUploadState",
|
|
5
5
|
"FileSpec",
|
|
6
6
|
"ExecutionConfiguration",
|
|
7
|
+
"Execution",
|
|
7
8
|
"Workflow",
|
|
8
9
|
"DatasetBag",
|
|
9
10
|
"DatasetVersion",
|
|
@@ -39,4 +40,4 @@ from .execution_configuration import (
|
|
|
39
40
|
ExecutionConfiguration,
|
|
40
41
|
Workflow,
|
|
41
42
|
)
|
|
42
|
-
|
|
43
|
+
from .execution import Execution
|
deriva_ml/deriva_ml_base.py
CHANGED
|
@@ -20,6 +20,7 @@ from pathlib import Path
|
|
|
20
20
|
import requests
|
|
21
21
|
from setuptools_git_versioning import get_latest_file_commit
|
|
22
22
|
import subprocess
|
|
23
|
+
import shutil
|
|
23
24
|
from typing import Optional, Any, Iterable, TYPE_CHECKING
|
|
24
25
|
from deriva.core import (
|
|
25
26
|
ErmrestCatalog,
|
|
@@ -30,6 +31,7 @@ from deriva.core import (
|
|
|
30
31
|
)
|
|
31
32
|
import deriva.core.datapath as datapath
|
|
32
33
|
from deriva.core.datapath import DataPathException
|
|
34
|
+
from deriva.core.deriva_server import DerivaServer
|
|
33
35
|
from deriva.core.ermrest_catalog import ResolveRidResult
|
|
34
36
|
from deriva.core.ermrest_model import Key, Table
|
|
35
37
|
from deriva.core.hatrac_store import HatracStore
|
|
@@ -115,13 +117,13 @@ class DerivaML(Dataset):
|
|
|
115
117
|
model_version: A string that indicates the version model. Typically passed in via
|
|
116
118
|
"""
|
|
117
119
|
self.credential = get_credential(hostname)
|
|
118
|
-
|
|
120
|
+
server = DerivaServer(
|
|
119
121
|
"https",
|
|
120
122
|
hostname,
|
|
121
|
-
|
|
122
|
-
self.credential,
|
|
123
|
+
credentials=self.credential,
|
|
123
124
|
session_config=self._get_session_config(),
|
|
124
125
|
)
|
|
126
|
+
self.catalog = server.connect_ermrest(catalog_id)
|
|
125
127
|
self.model = DerivaModel(
|
|
126
128
|
self.catalog.getCatalogModel(), domain_schema=domain_schema
|
|
127
129
|
)
|
|
@@ -157,7 +159,19 @@ class DerivaML(Dataset):
|
|
|
157
159
|
# Check if running in Jupyter's ZMQ kernel (used by notebooks)
|
|
158
160
|
if ipython is not None and "IPKernelApp" in ipython.config:
|
|
159
161
|
self._notebook = Path(ipython.user_ns.get("__session__"))
|
|
160
|
-
|
|
162
|
+
# Check if running in Jupyter's ZMQ kernel (used by notebooks)
|
|
163
|
+
try:
|
|
164
|
+
if subprocess.run(
|
|
165
|
+
[shutil.which("nbstripout"), "--is-installed"],
|
|
166
|
+
check=False,
|
|
167
|
+
capture_output=True,
|
|
168
|
+
).returncode:
|
|
169
|
+
self._logger.warn(
|
|
170
|
+
"nbstripout is not installed in repository. Please run nbstripout --install"
|
|
171
|
+
)
|
|
172
|
+
except subprocess.CalledProcessError:
|
|
173
|
+
self._logger.error("nbstripout is not found.")
|
|
174
|
+
|
|
161
175
|
except (ImportError, AttributeError):
|
|
162
176
|
pass
|
|
163
177
|
|
|
@@ -1001,16 +1015,16 @@ class DerivaML(Dataset):
|
|
|
1001
1015
|
) -> RID:
|
|
1002
1016
|
"""Identify current executing program and return a workflow RID for it
|
|
1003
1017
|
|
|
1004
|
-
|
|
1018
|
+
Determine the notebook or script that is currently being executed. Assume that this is
|
|
1005
1019
|
being executed from a cloned GitHub repository. Determine the remote repository name for
|
|
1006
|
-
this object. Then either retrieve an existing workflow for this executable
|
|
1020
|
+
this object. Then either retrieve an existing workflow for this executable or create
|
|
1007
1021
|
a new one.
|
|
1008
1022
|
|
|
1009
1023
|
Args:
|
|
1010
1024
|
name: The name of the workflow.
|
|
1011
1025
|
workflow_type: The type of the workflow.
|
|
1012
1026
|
description: The description of the workflow.
|
|
1013
|
-
create: Whether
|
|
1027
|
+
create: Whether to create a new workflow.
|
|
1014
1028
|
"""
|
|
1015
1029
|
# Make sure type is correct.
|
|
1016
1030
|
self.lookup_term(MLVocab.workflow_type, workflow_type)
|
|
@@ -1045,14 +1059,14 @@ class DerivaML(Dataset):
|
|
|
1045
1059
|
)
|
|
1046
1060
|
return self.add_workflow(workflow) if create else None
|
|
1047
1061
|
|
|
1048
|
-
def _github_url(self) -> tuple[
|
|
1062
|
+
def _github_url(self) -> tuple[Path, str, bool]:
|
|
1049
1063
|
"""Return a GitHUB URL for the latest commit of the script from which this routine is called.
|
|
1050
1064
|
|
|
1051
1065
|
This routine is used to be called from a script or notebook (e.g. python -m file). It assumes that
|
|
1052
1066
|
the file is in a gitHUB repository and commited. It returns a URL to the last commited version of this
|
|
1053
1067
|
file in GitHUB.
|
|
1054
1068
|
|
|
1055
|
-
Returns: A tuple with the filename, gethub_url and a
|
|
1069
|
+
Returns: A tuple with the filename, gethub_url and a boolean to indicated if uncommited changes
|
|
1056
1070
|
have been made to the file.
|
|
1057
1071
|
|
|
1058
1072
|
"""
|
|
@@ -1098,7 +1112,7 @@ class DerivaML(Dataset):
|
|
|
1098
1112
|
check=True,
|
|
1099
1113
|
)
|
|
1100
1114
|
is_dirty = bool(
|
|
1101
|
-
"
|
|
1115
|
+
"M " in result.stdout.strip()
|
|
1102
1116
|
) # Returns True if output indicates a modified file
|
|
1103
1117
|
except subprocess.CalledProcessError:
|
|
1104
1118
|
is_dirty = False # If Git command fails, assume no changes
|
deriva_ml/execution.py
CHANGED
|
@@ -254,7 +254,7 @@ class Execution:
|
|
|
254
254
|
def _create_notebook_checkpoint(self):
|
|
255
255
|
"""Trigger a checkpoint creation using Jupyter's API."""
|
|
256
256
|
notebook_name = self._ml_object._notebook
|
|
257
|
-
|
|
257
|
+
|
|
258
258
|
# Look for the server running this notebook.
|
|
259
259
|
root = Path("").absolute().parent.as_posix()
|
|
260
260
|
servers = list(list_running_servers())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.8.
|
|
3
|
+
Version: 1.8.5
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -13,6 +13,7 @@ Requires-Dist: pydantic>=2.10.6
|
|
|
13
13
|
Requires-Dist: semver>3.0.0
|
|
14
14
|
Requires-Dist: setuptools-git-versioning<3,>=2.0
|
|
15
15
|
Requires-Dist: nbstripout
|
|
16
|
+
Dynamic: license-file
|
|
16
17
|
|
|
17
18
|
Deriva-ML is a python libary to simplify the process of creating and executing reproducible machine learning workflows
|
|
18
19
|
using a deriva catalog.
|
|
@@ -1,15 +1,14 @@
|
|
|
1
|
-
deriva_ml/VERSION.py,sha256=
|
|
2
|
-
deriva_ml/__init__.py,sha256=
|
|
1
|
+
deriva_ml/VERSION.py,sha256=Dtbi4ISKI_kkTsaWuM-q8NfE3DySQu91TTP04Yhd8d8,22
|
|
2
|
+
deriva_ml/__init__.py,sha256=DyHiqklSer7q7oPGAemkzg5Qcq2swMZf9ALwJhGf6Jo,905
|
|
3
3
|
deriva_ml/database_model.py,sha256=uhoyVyd8MQmY8J9ovCH8fjxhZDxxXNkdJyYdeyEGPXA,13898
|
|
4
4
|
deriva_ml/dataset.py,sha256=xC6QPUp4MZcJiEnOEU3NnzoLBL9RcJWtPTyzIQP0Ivw,60666
|
|
5
5
|
deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
|
|
6
6
|
deriva_ml/dataset_bag.py,sha256=e6IHv3saZUnZRfl0EjfnlV2NnmPeOagYYv3PuZqS1l0,11501
|
|
7
7
|
deriva_ml/demo_catalog.py,sha256=xQPhFlflqwJskNQrQ-jdBSnGzBm2-aONBgcRxfsdNKM,11045
|
|
8
8
|
deriva_ml/deriva_definitions.py,sha256=pZLPoUxiuJ-uGglmQ6sF9oVXsSUuOnPEqywoec78XNM,8893
|
|
9
|
-
deriva_ml/deriva_ml_base.py,sha256=
|
|
10
|
-
deriva_ml/deriva_ml_execute.py,sha256=y_rGjc97eidBuzy-AaQGe93vuTbWbkNkK9rpReqV0IY,4433
|
|
9
|
+
deriva_ml/deriva_ml_base.py,sha256=e2UtT3TlDpFQrG6z0DaB2iV22wmi4TLP7qXF3hvb8to,42868
|
|
11
10
|
deriva_ml/deriva_model.py,sha256=LV3FjIhIlz13ckZSmu0aOJhT9EVE0-M9oVMudfkxb0g,12004
|
|
12
|
-
deriva_ml/execution.py,sha256=
|
|
11
|
+
deriva_ml/execution.py,sha256=VlapQGPDQI2MOmYnA5-hpf-XM6Fu4hPLpFjNN5q9Udo,29889
|
|
13
12
|
deriva_ml/execution_configuration.py,sha256=bjnZwXN6M7YPy5dFQwoGEBU8YjhQRSe1FW0rL0V9TaM,3422
|
|
14
13
|
deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
|
|
15
14
|
deriva_ml/feature.py,sha256=7e8WYPCfJSrGxJh9oUTduYSnB5ekybRhXa_0HIigS_w,5459
|
|
@@ -27,9 +26,9 @@ deriva_ml/schema_setup/annotations.py,sha256=Uogm9YkRtoKSdgfQlICqRywbCATppwBO-Xr
|
|
|
27
26
|
deriva_ml/schema_setup/create_schema.py,sha256=jwziMWJPbjRgjiRBT-KtidnXI8YNEFO74A9fwfptjHY,10626
|
|
28
27
|
deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
|
|
29
28
|
deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
|
|
30
|
-
deriva_ml-1.8.
|
|
31
|
-
deriva_ml-1.8.
|
|
32
|
-
deriva_ml-1.8.
|
|
33
|
-
deriva_ml-1.8.
|
|
34
|
-
deriva_ml-1.8.
|
|
35
|
-
deriva_ml-1.8.
|
|
29
|
+
deriva_ml-1.8.5.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
30
|
+
deriva_ml-1.8.5.dist-info/METADATA,sha256=uuAq67MUZyY2LP8NbZ8RMJ5q-aX3pJV5ioYZqqLbuFA,653
|
|
31
|
+
deriva_ml-1.8.5.dist-info/WHEEL,sha256=tTnHoFhvKQHCh4jz3yCn0WPTYIy7wXx3CJtJ7SJGV7c,91
|
|
32
|
+
deriva_ml-1.8.5.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
|
|
33
|
+
deriva_ml-1.8.5.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
|
|
34
|
+
deriva_ml-1.8.5.dist-info/RECORD,,
|
deriva_ml/deriva_ml_execute.py
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
from sympy import cxxcode
|
|
2
|
-
|
|
3
|
-
from deriva_ml import DerivaML, execution_configuration
|
|
4
|
-
|
|
5
|
-
def execute(host, catalog, script):
|
|
6
|
-
workflow_rid = foobar
|
|
7
|
-
execution_configuration = cxxcode(
|
|
8
|
-
|
|
9
|
-
)
|
|
10
|
-
ml_instance = DerivaML()
|
|
11
|
-
ml_instance.create_execution(configuration)
|
|
12
|
-
script
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
from deriva_ml import DerivaML, ExecutionConfiguration, DatasetSpec, RID, DerivaMLException
|
|
16
|
-
import os
|
|
17
|
-
import sys
|
|
18
|
-
import json
|
|
19
|
-
import traceback
|
|
20
|
-
import argparse
|
|
21
|
-
import requests
|
|
22
|
-
from requests.exceptions import HTTPError, ConnectionError
|
|
23
|
-
from deriva.transfer import GenericDownloader
|
|
24
|
-
from deriva.transfer.download import DerivaDownloadError, DerivaDownloadConfigurationError, \
|
|
25
|
-
DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, \
|
|
26
|
-
DerivaDownloadBaggingError
|
|
27
|
-
from deriva.core import BaseCLI, KeyValuePairArgs, format_credential, format_exception, urlparse
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class DerivaMLExecCLI(BaseCLI):
|
|
31
|
-
def __init__(self, description, epilog, **kwargs):
|
|
32
|
-
|
|
33
|
-
BaseCLI.__init__(self, description, epilog, **kwargs)
|
|
34
|
-
self.parser.add_argument("--catalog", default=1, metavar="<1>", help="Catalog number. Default: 1")
|
|
35
|
-
self.parser.add_argument("--timeout", metavar="<seconds>",
|
|
36
|
-
help="Total number of seconds elapsed before the download is aborted.")
|
|
37
|
-
self.parser.add_argument("output_dir", metavar="<output dir>", help="Path to an output directory.")
|
|
38
|
-
self.parser.add_argument("envars", metavar="[key=value key=value ...]",
|
|
39
|
-
nargs=argparse.REMAINDER, action=KeyValuePairArgs, default={},
|
|
40
|
-
help="Variable length of whitespace-delimited key=value pair arguments used for "
|
|
41
|
-
"string interpolation in specific parts of the configuration file. "
|
|
42
|
-
"For example: key1=value1 key2=value2")
|
|
43
|
-
|
|
44
|
-
def main(self):
|
|
45
|
-
try:
|
|
46
|
-
args = self.parse_cli()
|
|
47
|
-
except ValueError as e:
|
|
48
|
-
sys.stderr.write(str(e))
|
|
49
|
-
return 2
|
|
50
|
-
if not args.quiet:
|
|
51
|
-
sys.stderr.write("\n")
|
|
52
|
-
|
|
53
|
-
try:
|
|
54
|
-
try:
|
|
55
|
-
ml_instance = DerivaML(args.hostname, args.catalog)
|
|
56
|
-
downloaded = self.execute()
|
|
57
|
-
sys.stdout.write("\n%s\n" % (json.dumps(downloaded)))
|
|
58
|
-
except ConnectionError as e:
|
|
59
|
-
raise DerivaDownloadError("Connection error occurred. %s" % format_exception(e))
|
|
60
|
-
except HTTPError as e:
|
|
61
|
-
if e.response.status_code == requests.codes.unauthorized:
|
|
62
|
-
raise DerivaDownloadAuthenticationError(
|
|
63
|
-
"The requested service requires authentication and a valid login session could "
|
|
64
|
-
"not be found for the specified host. Server responded: %s" % e)
|
|
65
|
-
elif e.response.status_code == requests.codes.forbidden:
|
|
66
|
-
raise DerivaDownloadAuthorizationError(
|
|
67
|
-
"A requested operation was forbidden. Server responded: %s" % e)
|
|
68
|
-
except (DerivaDownloadError, DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError,
|
|
69
|
-
DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, DerivaDownloadBaggingError) as e:
|
|
70
|
-
sys.stderr.write(("\n" if not args.quiet else "") + format_exception(e))
|
|
71
|
-
if args.debug:
|
|
72
|
-
traceback.print_exc()
|
|
73
|
-
return 1
|
|
74
|
-
except:
|
|
75
|
-
sys.stderr.write("An unexpected error occurred.")
|
|
76
|
-
traceback.print_exc()
|
|
77
|
-
return 1
|
|
78
|
-
finally:
|
|
79
|
-
if not args.quiet:
|
|
80
|
-
sys.stderr.write("\n\n")
|
|
81
|
-
return 0
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def do_stuff():
|
|
85
|
-
pass
|
|
86
|
-
|
|
87
|
-
def main(datasets: list[RID], model: list[RID], hostname: str, catalog_id: str):
|
|
88
|
-
my_url = DerivaML.github_url()
|
|
89
|
-
ml_instance = DerivaML(hostname, catalog_id)
|
|
90
|
-
ml_instance.lookup_workflow(my_url)
|
|
91
|
-
config = ExecutionConfiguration(
|
|
92
|
-
datasets=[DatasetSpec(rid=dataset,
|
|
93
|
-
version=ml_instance.dataset_version(dataset)) for dataset in datasets],
|
|
94
|
-
assets=model,
|
|
95
|
-
workflow= ml_instance.lookup_workflow(my_url)
|
|
96
|
-
)
|
|
97
|
-
execution = ml_instance.create_execution(config)
|
|
98
|
-
with execution as e:
|
|
99
|
-
do_stuff()
|
|
100
|
-
execution.upload_execution_outputs()
|
|
101
|
-
|
|
102
|
-
if __name__ == "__main__":
|
|
103
|
-
main(datasets, model, hostname, catalog_id)
|
|
104
|
-
if __file__ == matplotlib_inline
|
|
File without changes
|
|
File without changes
|
|
File without changes
|