deriva-ml 1.8.4__py3-none-any.whl → 1.8.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +10 -0
- deriva_ml/deriva_ml_base.py +107 -68
- deriva_ml/execution.py +1 -1
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.10.dist-info}/METADATA +5 -3
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.10.dist-info}/RECORD +9 -11
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.10.dist-info}/WHEEL +1 -1
- deriva_ml/VERSION.py +0 -1
- deriva_ml/deriva_ml_execute.py +0 -104
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.10.dist-info}/entry_points.txt +0 -0
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.10.dist-info/licenses}/LICENSE +0 -0
- {deriva_ml-1.8.4.dist-info → deriva_ml-1.8.10.dist-info}/top_level.txt +0 -0
deriva_ml/__init__.py
CHANGED
|
@@ -4,6 +4,7 @@ __all__ = [
|
|
|
4
4
|
"FileUploadState",
|
|
5
5
|
"FileSpec",
|
|
6
6
|
"ExecutionConfiguration",
|
|
7
|
+
"Execution",
|
|
7
8
|
"Workflow",
|
|
8
9
|
"DatasetBag",
|
|
9
10
|
"DatasetVersion",
|
|
@@ -39,4 +40,13 @@ from .execution_configuration import (
|
|
|
39
40
|
ExecutionConfiguration,
|
|
40
41
|
Workflow,
|
|
41
42
|
)
|
|
43
|
+
from .execution import Execution
|
|
44
|
+
|
|
45
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
__version__ = version("deriva_ml")
|
|
49
|
+
except PackageNotFoundError:
|
|
50
|
+
# package is not installed
|
|
51
|
+
pass
|
|
42
52
|
|
deriva_ml/deriva_ml_base.py
CHANGED
|
@@ -13,16 +13,15 @@ from __future__ import annotations
|
|
|
13
13
|
import getpass
|
|
14
14
|
import logging
|
|
15
15
|
from datetime import datetime
|
|
16
|
-
import hashlib
|
|
17
16
|
from itertools import chain
|
|
18
17
|
import inspect
|
|
18
|
+
import setuptools_scm
|
|
19
19
|
from pathlib import Path
|
|
20
20
|
import requests
|
|
21
|
-
from setuptools_git_versioning import get_latest_file_commit
|
|
22
21
|
import subprocess
|
|
22
|
+
import shutil
|
|
23
23
|
from typing import Optional, Any, Iterable, TYPE_CHECKING
|
|
24
24
|
from deriva.core import (
|
|
25
|
-
ErmrestCatalog,
|
|
26
25
|
get_credential,
|
|
27
26
|
urlquote,
|
|
28
27
|
DEFAULT_SESSION_CONFIG,
|
|
@@ -30,6 +29,7 @@ from deriva.core import (
|
|
|
30
29
|
)
|
|
31
30
|
import deriva.core.datapath as datapath
|
|
32
31
|
from deriva.core.datapath import DataPathException
|
|
32
|
+
from deriva.core.deriva_server import DerivaServer
|
|
33
33
|
from deriva.core.ermrest_catalog import ResolveRidResult
|
|
34
34
|
from deriva.core.ermrest_model import Key, Table
|
|
35
35
|
from deriva.core.hatrac_store import HatracStore
|
|
@@ -115,13 +115,13 @@ class DerivaML(Dataset):
|
|
|
115
115
|
model_version: A string that indicates the version model. Typically passed in via
|
|
116
116
|
"""
|
|
117
117
|
self.credential = get_credential(hostname)
|
|
118
|
-
|
|
118
|
+
server = DerivaServer(
|
|
119
119
|
"https",
|
|
120
120
|
hostname,
|
|
121
|
-
|
|
122
|
-
self.credential,
|
|
121
|
+
credentials=self.credential,
|
|
123
122
|
session_config=self._get_session_config(),
|
|
124
123
|
)
|
|
124
|
+
self.catalog = server.connect_ermrest(catalog_id)
|
|
125
125
|
self.model = DerivaModel(
|
|
126
126
|
self.catalog.getCatalogModel(), domain_schema=domain_schema
|
|
127
127
|
)
|
|
@@ -142,6 +142,8 @@ class DerivaML(Dataset):
|
|
|
142
142
|
|
|
143
143
|
# Initialize dataset class.
|
|
144
144
|
super().__init__(self.model, self.cache_dir)
|
|
145
|
+
self._logger = logging.getLogger("deriva_ml")
|
|
146
|
+
self._logger.setLevel(logging_level)
|
|
145
147
|
|
|
146
148
|
self.host_name = hostname
|
|
147
149
|
self.catalog_id = catalog_id
|
|
@@ -149,25 +151,12 @@ class DerivaML(Dataset):
|
|
|
149
151
|
self.version = model_version
|
|
150
152
|
self.configuration = None
|
|
151
153
|
self._execution: Optional[Execution] = None
|
|
152
|
-
self.
|
|
153
|
-
|
|
154
|
-
from IPython import get_ipython
|
|
155
|
-
|
|
156
|
-
ipython = get_ipython()
|
|
157
|
-
# Check if running in Jupyter's ZMQ kernel (used by notebooks)
|
|
158
|
-
if ipython is not None and "IPKernelApp" in ipython.config:
|
|
159
|
-
self._notebook = Path(ipython.user_ns.get("__session__"))
|
|
160
|
-
# Check if running in Jupyter's ZMQ kernel (used by notebooks)
|
|
161
|
-
except (ImportError, AttributeError):
|
|
162
|
-
pass
|
|
163
|
-
|
|
154
|
+
self._script_path, self._is_notebook = self._get_python_script()
|
|
155
|
+
self._notebook = self._get_python_notebook()
|
|
164
156
|
self.domain_schema = self.model.domain_schema
|
|
165
157
|
self.project_name = project_name or self.domain_schema
|
|
166
|
-
|
|
167
158
|
self.start_time = datetime.now()
|
|
168
159
|
self.status = Status.pending.value
|
|
169
|
-
self._logger = logging.getLogger("deriva_ml")
|
|
170
|
-
self._logger.setLevel(logging_level)
|
|
171
160
|
|
|
172
161
|
logging.basicConfig(
|
|
173
162
|
level=logging_level,
|
|
@@ -190,6 +179,65 @@ class DerivaML(Dataset):
|
|
|
190
179
|
except (AttributeError, requests.HTTPError):
|
|
191
180
|
pass
|
|
192
181
|
|
|
182
|
+
def _get_python_notebook(self) -> Path | None:
|
|
183
|
+
"""Figure out if you are running in a Jupyter notebook
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
A Path to the notebook file that is currently being executed.
|
|
187
|
+
"""
|
|
188
|
+
notebook = None
|
|
189
|
+
try:
|
|
190
|
+
ipython = get_ipython()
|
|
191
|
+
# Check if running in Jupyter's ZMQ kernel (used by notebooks)
|
|
192
|
+
if ipython is not None and "IPKernelApp" in ipython.config:
|
|
193
|
+
notebook = Path(ipython.user_ns.get("__session__"))
|
|
194
|
+
# Check if running in Jupyter's ZMQ kernel (used by notebooks)
|
|
195
|
+
try:
|
|
196
|
+
if subprocess.run(
|
|
197
|
+
[shutil.which("nbstripout"), "--is-installed"],
|
|
198
|
+
check=False,
|
|
199
|
+
capture_output=True,
|
|
200
|
+
).returncode:
|
|
201
|
+
self._logger.warning(
|
|
202
|
+
"nbstripout is not installed in repository. Please run nbstripout --install"
|
|
203
|
+
)
|
|
204
|
+
except subprocess.CalledProcessError:
|
|
205
|
+
self._logger.error("nbstripout is not found.")
|
|
206
|
+
except (ImportError, AttributeError):
|
|
207
|
+
pass
|
|
208
|
+
return notebook
|
|
209
|
+
|
|
210
|
+
def _get_python_script(self) -> tuple[Path, bool]:
|
|
211
|
+
"""Return the path to the currently executing script"""
|
|
212
|
+
is_notebook = False
|
|
213
|
+
if filename := self._get_python_notebook():
|
|
214
|
+
is_notebook = True
|
|
215
|
+
else:
|
|
216
|
+
stack = inspect.stack()
|
|
217
|
+
if len(stack) > 1:
|
|
218
|
+
filename = Path(
|
|
219
|
+
stack[2].filename
|
|
220
|
+
) # Get the caller's filename, which is two up the stack from here.
|
|
221
|
+
else:
|
|
222
|
+
raise DerivaMLException(
|
|
223
|
+
f"Looking for caller failed"
|
|
224
|
+
) # Stack is too shallow
|
|
225
|
+
return filename, is_notebook
|
|
226
|
+
|
|
227
|
+
def _get_git_root(self):
|
|
228
|
+
try:
|
|
229
|
+
result = subprocess.run(
|
|
230
|
+
["git", "rev-parse", "--show-toplevel"],
|
|
231
|
+
cwd=self._script_path.parent,
|
|
232
|
+
stdout=subprocess.PIPE,
|
|
233
|
+
stderr=subprocess.DEVNULL,
|
|
234
|
+
text=True,
|
|
235
|
+
check=True
|
|
236
|
+
)
|
|
237
|
+
return result.stdout.strip()
|
|
238
|
+
except subprocess.CalledProcessError:
|
|
239
|
+
return None # Not in a git repository
|
|
240
|
+
|
|
193
241
|
@staticmethod
|
|
194
242
|
def _get_session_config():
|
|
195
243
|
""" """
|
|
@@ -213,6 +261,9 @@ class DerivaML(Dataset):
|
|
|
213
261
|
"""Get a new instance of a pathBuilder object."""
|
|
214
262
|
return self.catalog.getPathBuilder()
|
|
215
263
|
|
|
264
|
+
def get_version(self) -> str:
|
|
265
|
+
return setuptools_scm.get_version(root=self._get_git_root())
|
|
266
|
+
|
|
216
267
|
@property
|
|
217
268
|
def domain_path(self):
|
|
218
269
|
"""Get a new instance of a pathBuilder object to the domain schema"""
|
|
@@ -1001,40 +1052,38 @@ class DerivaML(Dataset):
|
|
|
1001
1052
|
) -> RID:
|
|
1002
1053
|
"""Identify current executing program and return a workflow RID for it
|
|
1003
1054
|
|
|
1004
|
-
|
|
1055
|
+
Determine the notebook or script that is currently being executed. Assume that this is
|
|
1005
1056
|
being executed from a cloned GitHub repository. Determine the remote repository name for
|
|
1006
|
-
this object. Then either retrieve an existing workflow for this executable
|
|
1057
|
+
this object. Then either retrieve an existing workflow for this executable or create
|
|
1007
1058
|
a new one.
|
|
1008
1059
|
|
|
1009
1060
|
Args:
|
|
1010
1061
|
name: The name of the workflow.
|
|
1011
1062
|
workflow_type: The type of the workflow.
|
|
1012
1063
|
description: The description of the workflow.
|
|
1013
|
-
create: Whether
|
|
1064
|
+
create: Whether to create a new workflow.
|
|
1014
1065
|
"""
|
|
1015
1066
|
# Make sure type is correct.
|
|
1016
1067
|
self.lookup_term(MLVocab.workflow_type, workflow_type)
|
|
1017
|
-
|
|
1068
|
+
github_url, is_dirty = self._github_url()
|
|
1018
1069
|
|
|
1019
1070
|
if is_dirty:
|
|
1020
1071
|
self._logger.warning(
|
|
1021
|
-
f"File {
|
|
1072
|
+
f"File {self._script_path} has been modified since last commit. Consider commiting before executing"
|
|
1022
1073
|
)
|
|
1023
1074
|
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
sha256_hash.update(f.read())
|
|
1037
|
-
checksum = "SHA-256:" + sha256_hash.hexdigest()
|
|
1075
|
+
# If you are in a notebook, strip out the outputs before computing the checksum.
|
|
1076
|
+
cmd = (
|
|
1077
|
+
f"nbstripout {self._script_path} | git hash-object --stdin"
|
|
1078
|
+
if self._is_notebook
|
|
1079
|
+
else f"git hash-object {self._script_path}"
|
|
1080
|
+
)
|
|
1081
|
+
checksum = subprocess.run(
|
|
1082
|
+
cmd,
|
|
1083
|
+
capture_output=True,
|
|
1084
|
+
text=True,
|
|
1085
|
+
check=True,
|
|
1086
|
+
).stdout.strip()
|
|
1038
1087
|
|
|
1039
1088
|
workflow = Workflow(
|
|
1040
1089
|
name=name,
|
|
@@ -1045,67 +1094,57 @@ class DerivaML(Dataset):
|
|
|
1045
1094
|
)
|
|
1046
1095
|
return self.add_workflow(workflow) if create else None
|
|
1047
1096
|
|
|
1048
|
-
def _github_url(self) -> tuple[str,
|
|
1097
|
+
def _github_url(self) -> tuple[str, bool]:
|
|
1049
1098
|
"""Return a GitHUB URL for the latest commit of the script from which this routine is called.
|
|
1050
1099
|
|
|
1051
1100
|
This routine is used to be called from a script or notebook (e.g. python -m file). It assumes that
|
|
1052
1101
|
the file is in a gitHUB repository and commited. It returns a URL to the last commited version of this
|
|
1053
1102
|
file in GitHUB.
|
|
1054
1103
|
|
|
1055
|
-
Returns: A tuple with the
|
|
1104
|
+
Returns: A tuple with the gethub_url and a boolean to indicated if uncommited changes
|
|
1056
1105
|
have been made to the file.
|
|
1057
1106
|
|
|
1058
1107
|
"""
|
|
1059
1108
|
|
|
1060
|
-
# Get the name of the script that is calling this function.
|
|
1061
|
-
if self._notebook:
|
|
1062
|
-
# Try to get the __session__ variable from the user namespace.
|
|
1063
|
-
filename = Path("").absolute().parent / self._notebook
|
|
1064
|
-
else:
|
|
1065
|
-
stack = inspect.stack()
|
|
1066
|
-
if len(stack) > 1:
|
|
1067
|
-
filename = Path(
|
|
1068
|
-
stack[2].filename
|
|
1069
|
-
) # Get the caller's filename, which is two up the stack from here.
|
|
1070
|
-
else:
|
|
1071
|
-
raise DerivaMLException(
|
|
1072
|
-
f"Looking for caller failed"
|
|
1073
|
-
) # Stack is too shallow
|
|
1074
|
-
|
|
1075
1109
|
# Get repo URL from local github repo.
|
|
1076
1110
|
try:
|
|
1077
1111
|
result = subprocess.run(
|
|
1078
|
-
["git", "remote", "get-url", "origin"], capture_output=True, text=True
|
|
1112
|
+
["git", "remote", "get-url", "origin"], capture_output=True, text=True,
|
|
1113
|
+
cwd=self._script_path.parent,
|
|
1079
1114
|
)
|
|
1080
1115
|
github_url = result.stdout.strip().removesuffix(".git")
|
|
1081
1116
|
except subprocess.CalledProcessError:
|
|
1082
1117
|
raise DerivaMLException(f"No GIT remote found")
|
|
1083
1118
|
|
|
1084
1119
|
# Find the root directory for the repository
|
|
1085
|
-
repo_root =
|
|
1086
|
-
while repo_root != repo_root.root:
|
|
1087
|
-
if (repo_root / ".git").exists():
|
|
1088
|
-
break
|
|
1089
|
-
else:
|
|
1090
|
-
repo_root = repo_root.parent
|
|
1120
|
+
repo_root = self._get_git_root()
|
|
1091
1121
|
|
|
1092
1122
|
# Now check to see if file has been modified since the last commit.
|
|
1093
1123
|
try:
|
|
1094
1124
|
result = subprocess.run(
|
|
1095
1125
|
["git", "status", "--porcelain"],
|
|
1126
|
+
cwd=self._script_path.parent,
|
|
1096
1127
|
capture_output=True,
|
|
1097
1128
|
text=True,
|
|
1098
1129
|
check=True,
|
|
1099
1130
|
)
|
|
1100
1131
|
is_dirty = bool(
|
|
1101
|
-
"
|
|
1132
|
+
"M " in result.stdout.strip()
|
|
1102
1133
|
) # Returns True if output indicates a modified file
|
|
1103
1134
|
except subprocess.CalledProcessError:
|
|
1104
1135
|
is_dirty = False # If Git command fails, assume no changes
|
|
1105
1136
|
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1137
|
+
"""Get SHA-1 hash of latest commit of the file in the repository"""
|
|
1138
|
+
result = subprocess.run(
|
|
1139
|
+
["git", "log", "-n", "1", "--pretty=format:%H" "--", self._script_path],
|
|
1140
|
+
cwd=self._script_path.parent,
|
|
1141
|
+
capture_output=True,
|
|
1142
|
+
text=True,
|
|
1143
|
+
check=True,
|
|
1144
|
+
)
|
|
1145
|
+
sha = result.stdout.strip()
|
|
1146
|
+
url = f"{github_url}/blob/{sha}/{self._script_path.relative_to(repo_root)}"
|
|
1147
|
+
return url, is_dirty
|
|
1109
1148
|
|
|
1110
1149
|
# @validate_call
|
|
1111
1150
|
def create_execution(self, configuration: ExecutionConfiguration) -> "Execution":
|
deriva_ml/execution.py
CHANGED
|
@@ -254,7 +254,7 @@ class Execution:
|
|
|
254
254
|
def _create_notebook_checkpoint(self):
|
|
255
255
|
"""Trigger a checkpoint creation using Jupyter's API."""
|
|
256
256
|
notebook_name = self._ml_object._notebook
|
|
257
|
-
|
|
257
|
+
|
|
258
258
|
# Look for the server running this notebook.
|
|
259
259
|
root = Path("").absolute().parent.as_posix()
|
|
260
260
|
servers = list(list_running_servers())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.8.
|
|
3
|
+
Version: 1.8.10
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -11,8 +11,10 @@ Requires-Dist: pandas
|
|
|
11
11
|
Requires-Dist: regex~=2024.7.24
|
|
12
12
|
Requires-Dist: pydantic>=2.10.6
|
|
13
13
|
Requires-Dist: semver>3.0.0
|
|
14
|
-
Requires-Dist: setuptools
|
|
14
|
+
Requires-Dist: setuptools>=64
|
|
15
|
+
Requires-Dist: setuptools-scm<=6.0
|
|
15
16
|
Requires-Dist: nbstripout
|
|
17
|
+
Dynamic: license-file
|
|
16
18
|
|
|
17
19
|
Deriva-ML is a python libary to simplify the process of creating and executing reproducible machine learning workflows
|
|
18
20
|
using a deriva catalog.
|
|
@@ -1,15 +1,13 @@
|
|
|
1
|
-
deriva_ml/
|
|
2
|
-
deriva_ml/__init__.py,sha256=0PHNB8gRDALLtaffRmU7wCUgWbRHVQZcjuPJxMLNEco,856
|
|
1
|
+
deriva_ml/__init__.py,sha256=r1Z9N5vtZkAET7emqhpAx2bf_xJUp5wHOc4_DIplsG8,1082
|
|
3
2
|
deriva_ml/database_model.py,sha256=uhoyVyd8MQmY8J9ovCH8fjxhZDxxXNkdJyYdeyEGPXA,13898
|
|
4
3
|
deriva_ml/dataset.py,sha256=xC6QPUp4MZcJiEnOEU3NnzoLBL9RcJWtPTyzIQP0Ivw,60666
|
|
5
4
|
deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
|
|
6
5
|
deriva_ml/dataset_bag.py,sha256=e6IHv3saZUnZRfl0EjfnlV2NnmPeOagYYv3PuZqS1l0,11501
|
|
7
6
|
deriva_ml/demo_catalog.py,sha256=xQPhFlflqwJskNQrQ-jdBSnGzBm2-aONBgcRxfsdNKM,11045
|
|
8
7
|
deriva_ml/deriva_definitions.py,sha256=pZLPoUxiuJ-uGglmQ6sF9oVXsSUuOnPEqywoec78XNM,8893
|
|
9
|
-
deriva_ml/deriva_ml_base.py,sha256=
|
|
10
|
-
deriva_ml/deriva_ml_execute.py,sha256=y_rGjc97eidBuzy-AaQGe93vuTbWbkNkK9rpReqV0IY,4433
|
|
8
|
+
deriva_ml/deriva_ml_base.py,sha256=aVyGsFERZtpjNxfaVYzvKa7J0Ma-U3DEibfjnbr7lFQ,43817
|
|
11
9
|
deriva_ml/deriva_model.py,sha256=LV3FjIhIlz13ckZSmu0aOJhT9EVE0-M9oVMudfkxb0g,12004
|
|
12
|
-
deriva_ml/execution.py,sha256=
|
|
10
|
+
deriva_ml/execution.py,sha256=VlapQGPDQI2MOmYnA5-hpf-XM6Fu4hPLpFjNN5q9Udo,29889
|
|
13
11
|
deriva_ml/execution_configuration.py,sha256=bjnZwXN6M7YPy5dFQwoGEBU8YjhQRSe1FW0rL0V9TaM,3422
|
|
14
12
|
deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
|
|
15
13
|
deriva_ml/feature.py,sha256=7e8WYPCfJSrGxJh9oUTduYSnB5ekybRhXa_0HIigS_w,5459
|
|
@@ -27,9 +25,9 @@ deriva_ml/schema_setup/annotations.py,sha256=Uogm9YkRtoKSdgfQlICqRywbCATppwBO-Xr
|
|
|
27
25
|
deriva_ml/schema_setup/create_schema.py,sha256=jwziMWJPbjRgjiRBT-KtidnXI8YNEFO74A9fwfptjHY,10626
|
|
28
26
|
deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
|
|
29
27
|
deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
|
|
30
|
-
deriva_ml-1.8.
|
|
31
|
-
deriva_ml-1.8.
|
|
32
|
-
deriva_ml-1.8.
|
|
33
|
-
deriva_ml-1.8.
|
|
34
|
-
deriva_ml-1.8.
|
|
35
|
-
deriva_ml-1.8.
|
|
28
|
+
deriva_ml-1.8.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
29
|
+
deriva_ml-1.8.10.dist-info/METADATA,sha256=Mhx0joyR1gPEX8G6ZoEpvxNVW4sUG9C_S5TIA6ueZKk,670
|
|
30
|
+
deriva_ml-1.8.10.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
|
31
|
+
deriva_ml-1.8.10.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
|
|
32
|
+
deriva_ml-1.8.10.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
|
|
33
|
+
deriva_ml-1.8.10.dist-info/RECORD,,
|
deriva_ml/VERSION.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.8.4"
|
deriva_ml/deriva_ml_execute.py
DELETED
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
from sympy import cxxcode
|
|
2
|
-
|
|
3
|
-
from deriva_ml import DerivaML, execution_configuration
|
|
4
|
-
|
|
5
|
-
def execute(host, catalog, script):
|
|
6
|
-
workflow_rid = foobar
|
|
7
|
-
execution_configuration = cxxcode(
|
|
8
|
-
|
|
9
|
-
)
|
|
10
|
-
ml_instance = DerivaML()
|
|
11
|
-
ml_instance.create_execution(configuration)
|
|
12
|
-
script
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
from deriva_ml import DerivaML, ExecutionConfiguration, DatasetSpec, RID, DerivaMLException
|
|
16
|
-
import os
|
|
17
|
-
import sys
|
|
18
|
-
import json
|
|
19
|
-
import traceback
|
|
20
|
-
import argparse
|
|
21
|
-
import requests
|
|
22
|
-
from requests.exceptions import HTTPError, ConnectionError
|
|
23
|
-
from deriva.transfer import GenericDownloader
|
|
24
|
-
from deriva.transfer.download import DerivaDownloadError, DerivaDownloadConfigurationError, \
|
|
25
|
-
DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, \
|
|
26
|
-
DerivaDownloadBaggingError
|
|
27
|
-
from deriva.core import BaseCLI, KeyValuePairArgs, format_credential, format_exception, urlparse
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class DerivaMLExecCLI(BaseCLI):
|
|
31
|
-
def __init__(self, description, epilog, **kwargs):
|
|
32
|
-
|
|
33
|
-
BaseCLI.__init__(self, description, epilog, **kwargs)
|
|
34
|
-
self.parser.add_argument("--catalog", default=1, metavar="<1>", help="Catalog number. Default: 1")
|
|
35
|
-
self.parser.add_argument("--timeout", metavar="<seconds>",
|
|
36
|
-
help="Total number of seconds elapsed before the download is aborted.")
|
|
37
|
-
self.parser.add_argument("output_dir", metavar="<output dir>", help="Path to an output directory.")
|
|
38
|
-
self.parser.add_argument("envars", metavar="[key=value key=value ...]",
|
|
39
|
-
nargs=argparse.REMAINDER, action=KeyValuePairArgs, default={},
|
|
40
|
-
help="Variable length of whitespace-delimited key=value pair arguments used for "
|
|
41
|
-
"string interpolation in specific parts of the configuration file. "
|
|
42
|
-
"For example: key1=value1 key2=value2")
|
|
43
|
-
|
|
44
|
-
def main(self):
|
|
45
|
-
try:
|
|
46
|
-
args = self.parse_cli()
|
|
47
|
-
except ValueError as e:
|
|
48
|
-
sys.stderr.write(str(e))
|
|
49
|
-
return 2
|
|
50
|
-
if not args.quiet:
|
|
51
|
-
sys.stderr.write("\n")
|
|
52
|
-
|
|
53
|
-
try:
|
|
54
|
-
try:
|
|
55
|
-
ml_instance = DerivaML(args.hostname, args.catalog)
|
|
56
|
-
downloaded = self.execute()
|
|
57
|
-
sys.stdout.write("\n%s\n" % (json.dumps(downloaded)))
|
|
58
|
-
except ConnectionError as e:
|
|
59
|
-
raise DerivaDownloadError("Connection error occurred. %s" % format_exception(e))
|
|
60
|
-
except HTTPError as e:
|
|
61
|
-
if e.response.status_code == requests.codes.unauthorized:
|
|
62
|
-
raise DerivaDownloadAuthenticationError(
|
|
63
|
-
"The requested service requires authentication and a valid login session could "
|
|
64
|
-
"not be found for the specified host. Server responded: %s" % e)
|
|
65
|
-
elif e.response.status_code == requests.codes.forbidden:
|
|
66
|
-
raise DerivaDownloadAuthorizationError(
|
|
67
|
-
"A requested operation was forbidden. Server responded: %s" % e)
|
|
68
|
-
except (DerivaDownloadError, DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError,
|
|
69
|
-
DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, DerivaDownloadBaggingError) as e:
|
|
70
|
-
sys.stderr.write(("\n" if not args.quiet else "") + format_exception(e))
|
|
71
|
-
if args.debug:
|
|
72
|
-
traceback.print_exc()
|
|
73
|
-
return 1
|
|
74
|
-
except:
|
|
75
|
-
sys.stderr.write("An unexpected error occurred.")
|
|
76
|
-
traceback.print_exc()
|
|
77
|
-
return 1
|
|
78
|
-
finally:
|
|
79
|
-
if not args.quiet:
|
|
80
|
-
sys.stderr.write("\n\n")
|
|
81
|
-
return 0
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def do_stuff():
|
|
85
|
-
pass
|
|
86
|
-
|
|
87
|
-
def main(datasets: list[RID], model: list[RID], hostname: str, catalog_id: str):
|
|
88
|
-
my_url = DerivaML.github_url()
|
|
89
|
-
ml_instance = DerivaML(hostname, catalog_id)
|
|
90
|
-
ml_instance.lookup_workflow(my_url)
|
|
91
|
-
config = ExecutionConfiguration(
|
|
92
|
-
datasets=[DatasetSpec(rid=dataset,
|
|
93
|
-
version=ml_instance.dataset_version(dataset)) for dataset in datasets],
|
|
94
|
-
assets=model,
|
|
95
|
-
workflow= ml_instance.lookup_workflow(my_url)
|
|
96
|
-
)
|
|
97
|
-
execution = ml_instance.create_execution(config)
|
|
98
|
-
with execution as e:
|
|
99
|
-
do_stuff()
|
|
100
|
-
execution.upload_execution_outputs()
|
|
101
|
-
|
|
102
|
-
if __name__ == "__main__":
|
|
103
|
-
main(datasets, model, hostname, catalog_id)
|
|
104
|
-
if __file__ == matplotlib_inline
|
|
File without changes
|
|
File without changes
|
|
File without changes
|