deriva-ml 1.9.0__tar.gz → 1.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deriva_ml-1.9.0/src/deriva_ml.egg-info → deriva_ml-1.10.0}/PKG-INFO +11 -2
- deriva_ml-1.10.0/README.md +11 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/release.sh +2 -1
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/database_model.py +2 -3
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/dataset.py +4 -6
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/demo_catalog.py +3 -1
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/deriva_ml_base.py +22 -9
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/execution.py +36 -15
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/execution_configuration.py +1 -1
- {deriva_ml-1.9.0 → deriva_ml-1.10.0/src/deriva_ml.egg-info}/PKG-INFO +11 -2
- deriva_ml-1.9.0/README.md +0 -2
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/.github/workflows/publish-docs.yml +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/.gitignore +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/LICENSE +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/.DS_Store +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/Notebooks/DerivaML Features.ipynb +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/assets/ERD.png +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/assets/Launcher.png +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/assets/copy_minid.png +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/assets/deriva-logo.png +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/assets/deriva-ml.pdf +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/assets/sharing-at-home.pdf +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/code-docs/dataset.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/code-docs/dataset_aux_classes.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/code-docs/dataset_bag.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/code-docs/deriva_ml_base.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/code-docs/deriva_model.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/code-docs/execution.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/code-docs/execution_configuration.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/code-docs/feature.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/code-docs/upload.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/deriva_ml_structure.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/index.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/release-notes.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/user-guide/datasets.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/user-guide/execution-configuration.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/user-guide/identifiers.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/user-guide/install.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/docs/user-guide/ml_workflow_instruction.md +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/mkdocs.yml +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/pyproject.toml +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/setup.cfg +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/__init__.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/dataset_aux_classes.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/dataset_bag.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/deriva_definitions.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/deriva_model.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/execution_environment.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/feature.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/history.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/schema_setup/__init__.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/schema_setup/annotations.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/schema_setup/create_schema.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/schema_setup/policy.json +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/schema_setup/table_comments_utils.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/test_functions.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml/upload.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml.egg-info/SOURCES.txt +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml.egg-info/entry_points.txt +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml.egg-info/requires.txt +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/src/deriva_ml.egg-info/top_level.txt +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/tests/__init__.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/tests/derivaml_test.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/tests/runner.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/tests/test_basic_tables.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/tests/test_dataset.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/tests/test_download.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/tests/test_execution.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/tests/test_features.py +0 -0
- {deriva_ml-1.9.0 → deriva_ml-1.10.0}/tests/test_upload.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.10.0
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -16,5 +16,14 @@ Requires-Dist: setuptools-scm<=6.0
|
|
|
16
16
|
Requires-Dist: nbstripout
|
|
17
17
|
Dynamic: license-file
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
# DerivaML
|
|
20
|
+
Deriva-ML is a python library to simplify the process of creating and executing reproducible machine learning workflows
|
|
20
21
|
using a deriva catalog.
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
## Installing the GitHub CLI
|
|
25
|
+
|
|
26
|
+
The script release.sh will create a new release tag in GitHub. This script requires the
|
|
27
|
+
GitHUB CLI be installed.
|
|
28
|
+
|
|
29
|
+
See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# DerivaML
|
|
2
|
+
Deriva-ML is a python library to simplify the process of creating and executing reproducible machine learning workflows
|
|
3
|
+
using a deriva catalog.
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
## Installing the GitHub CLI
|
|
7
|
+
|
|
8
|
+
The script release.sh will create a new release tag in GitHub. This script requires the
|
|
9
|
+
GitHUB CLI be installed.
|
|
10
|
+
|
|
11
|
+
See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
|
|
@@ -9,6 +9,7 @@ fi
|
|
|
9
9
|
|
|
10
10
|
# Default version bump is patch unless specified (patch, minor, or major)
|
|
11
11
|
VERSION_TYPE=${1:-patch}
|
|
12
|
+
|
|
12
13
|
echo "Bumping version: $VERSION_TYPE"
|
|
13
14
|
|
|
14
15
|
# Bump the version using bump-my-version.
|
|
@@ -32,6 +33,6 @@ python -m build
|
|
|
32
33
|
NEW_TAG=$(git describe --tags --abbrev=0)
|
|
33
34
|
echo "New version tag: $NEW_TAG"
|
|
34
35
|
|
|
35
|
-
twine upload
|
|
36
|
+
twine upload dist/*${NEW_TAG/v/}*
|
|
36
37
|
|
|
37
38
|
echo "Release process complete!"
|
|
@@ -84,7 +84,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
|
|
|
84
84
|
except KeyError:
|
|
85
85
|
raise DerivaMLException(f"Dataset {dataset_rid} not found")
|
|
86
86
|
|
|
87
|
-
def __init__(self, minid: DatasetMinid, bag_path: Path):
|
|
87
|
+
def __init__(self, minid: DatasetMinid, bag_path: Path, dbase_path: Path):
|
|
88
88
|
"""Create a new DatabaseModel.
|
|
89
89
|
|
|
90
90
|
Args:
|
|
@@ -95,8 +95,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
|
|
|
95
95
|
self.bag_path = bag_path
|
|
96
96
|
self.minid = minid
|
|
97
97
|
self.dataset_rid = minid.dataset_rid
|
|
98
|
-
|
|
99
|
-
self.dbase_file = dir_path / f"{minid.version_rid}.db"
|
|
98
|
+
self.dbase_file = dbase_path / f"{minid.version_rid}.db"
|
|
100
99
|
self.dbase = sqlite3.connect(self.dbase_file)
|
|
101
100
|
|
|
102
101
|
super().__init__(
|
|
@@ -67,11 +67,12 @@ class Dataset:
|
|
|
67
67
|
|
|
68
68
|
_Logger = logging.getLogger("deriva_ml")
|
|
69
69
|
|
|
70
|
-
def __init__(self, model: DerivaModel, cache_dir: Path):
|
|
70
|
+
def __init__(self, model: DerivaModel, cache_dir: Path, working_dir: Path):
|
|
71
71
|
self._model = model
|
|
72
72
|
self._ml_schema = ML_SCHEMA
|
|
73
73
|
self.dataset_table = self._model.schemas[self._ml_schema].tables["Dataset"]
|
|
74
74
|
self._cache_dir = cache_dir
|
|
75
|
+
self._working_dir = working_dir
|
|
75
76
|
self._logger = logging.getLogger("deriva_ml")
|
|
76
77
|
|
|
77
78
|
def _is_dataset_rid(self, dataset_rid: RID, deleted: bool = False) -> bool:
|
|
@@ -783,7 +784,6 @@ class Dataset:
|
|
|
783
784
|
snapshot: Optional[Dataset] = None,
|
|
784
785
|
dataset_nesting_depth: Optional[int] = None,
|
|
785
786
|
) -> set[tuple[Table, ...]]:
|
|
786
|
-
|
|
787
787
|
snapshot_catalog = snapshot if snapshot else self
|
|
788
788
|
|
|
789
789
|
dataset_table = snapshot_catalog._model.schemas[self._ml_schema].tables[
|
|
@@ -831,9 +831,7 @@ class Dataset:
|
|
|
831
831
|
nested_paths = set()
|
|
832
832
|
if dataset_rid:
|
|
833
833
|
for c in snapshot_catalog.list_dataset_children(dataset_rid=dataset_rid):
|
|
834
|
-
nested_paths |= self._collect_paths(
|
|
835
|
-
c, snapshot=snapshot_catalog
|
|
836
|
-
)
|
|
834
|
+
nested_paths |= self._collect_paths(c, snapshot=snapshot_catalog)
|
|
837
835
|
else:
|
|
838
836
|
# Initialize nesting depth if not already provided.
|
|
839
837
|
dataset_nesting_depth = (
|
|
@@ -979,7 +977,7 @@ class Dataset:
|
|
|
979
977
|
if dataset.materialize
|
|
980
978
|
else self._download_dataset_minid(minid)
|
|
981
979
|
)
|
|
982
|
-
return DatabaseModel(minid, bag_path).get_dataset()
|
|
980
|
+
return DatabaseModel(minid, bag_path, self._working_dir).get_dataset()
|
|
983
981
|
|
|
984
982
|
def _version_snapshot(self, dataset: DatasetSpec) -> str:
|
|
985
983
|
"""Return a catalog with snapshot for the specified dataset version"""
|
|
@@ -295,7 +295,9 @@ def create_demo_catalog(
|
|
|
295
295
|
dataset_table = deriva_ml.dataset_table
|
|
296
296
|
dataset_table.annotations.update(
|
|
297
297
|
Dataset(
|
|
298
|
-
deriva_ml.model,
|
|
298
|
+
deriva_ml.model,
|
|
299
|
+
cache_dir=deriva_ml.cache_dir,
|
|
300
|
+
working_dir=deriva_ml.working_dir,
|
|
299
301
|
)._generate_dataset_annotations()
|
|
300
302
|
)
|
|
301
303
|
deriva_ml.model.apply()
|
|
@@ -163,7 +163,7 @@ class DerivaML(Dataset):
|
|
|
163
163
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
164
164
|
|
|
165
165
|
# Initialize dataset class.
|
|
166
|
-
super().__init__(self.model, self.cache_dir)
|
|
166
|
+
super().__init__(self.model, self.cache_dir, self.working_dir)
|
|
167
167
|
self._logger = logging.getLogger("deriva_ml")
|
|
168
168
|
self._logger.setLevel(logging_level)
|
|
169
169
|
|
|
@@ -257,7 +257,7 @@ class DerivaML(Dataset):
|
|
|
257
257
|
def _get_notebook_path(self) -> Path | None:
|
|
258
258
|
"""Return the absolute path of the current notebook."""
|
|
259
259
|
|
|
260
|
-
server, session =
|
|
260
|
+
server, session = DerivaML._get_notebook_session()
|
|
261
261
|
if server and session:
|
|
262
262
|
self._check_nbstrip_status()
|
|
263
263
|
relative_path = session["notebook"]["path"]
|
|
@@ -1136,8 +1136,8 @@ class DerivaML(Dataset):
|
|
|
1136
1136
|
return None
|
|
1137
1137
|
|
|
1138
1138
|
def create_workflow(
|
|
1139
|
-
self, name: str, workflow_type: str, description: str = ""
|
|
1140
|
-
) ->
|
|
1139
|
+
self, name: str, workflow_type: str, description: str = ""
|
|
1140
|
+
) -> Workflow:
|
|
1141
1141
|
"""Identify current executing program and return a workflow RID for it
|
|
1142
1142
|
|
|
1143
1143
|
Determine the notebook or script that is currently being executed. Assume that this is
|
|
@@ -1149,10 +1149,21 @@ class DerivaML(Dataset):
|
|
|
1149
1149
|
name: The name of the workflow.
|
|
1150
1150
|
workflow_type: The type of the workflow.
|
|
1151
1151
|
description: The description of the workflow.
|
|
1152
|
-
create: Whether to create a new workflow.
|
|
1153
1152
|
"""
|
|
1154
1153
|
# Make sure type is correct.
|
|
1155
1154
|
self.lookup_term(MLVocab.workflow_type, workflow_type)
|
|
1155
|
+
|
|
1156
|
+
try:
|
|
1157
|
+
subprocess.run(
|
|
1158
|
+
"git rev-parse --is-inside-work-tree",
|
|
1159
|
+
capture_output=True,
|
|
1160
|
+
text=True,
|
|
1161
|
+
shell=True,
|
|
1162
|
+
check=True,
|
|
1163
|
+
)
|
|
1164
|
+
except subprocess.CalledProcessError:
|
|
1165
|
+
raise DerivaMLException("Not executing in a Git repository.")
|
|
1166
|
+
|
|
1156
1167
|
github_url, is_dirty = self._github_url()
|
|
1157
1168
|
|
|
1158
1169
|
if is_dirty:
|
|
@@ -1174,14 +1185,13 @@ class DerivaML(Dataset):
|
|
|
1174
1185
|
shell=True,
|
|
1175
1186
|
).stdout.strip()
|
|
1176
1187
|
|
|
1177
|
-
|
|
1188
|
+
return Workflow(
|
|
1178
1189
|
name=name,
|
|
1179
1190
|
url=github_url,
|
|
1180
1191
|
checksum=checksum,
|
|
1181
1192
|
description=description,
|
|
1182
1193
|
workflow_type=workflow_type,
|
|
1183
1194
|
)
|
|
1184
|
-
return self.add_workflow(workflow) if create else None
|
|
1185
1195
|
|
|
1186
1196
|
def _github_url(self) -> tuple[str, bool]:
|
|
1187
1197
|
"""Return a GitHUB URL for the latest commit of the script from which this routine is called.
|
|
@@ -1238,7 +1248,9 @@ class DerivaML(Dataset):
|
|
|
1238
1248
|
return url, is_dirty
|
|
1239
1249
|
|
|
1240
1250
|
# @validate_call
|
|
1241
|
-
def create_execution(
|
|
1251
|
+
def create_execution(
|
|
1252
|
+
self, configuration: ExecutionConfiguration, dryrun: bool = False
|
|
1253
|
+
) -> "Execution":
|
|
1242
1254
|
"""Create an execution object
|
|
1243
1255
|
|
|
1244
1256
|
Given an execution configuration, initialize the local compute environment to prepare for executing an
|
|
@@ -1249,6 +1261,7 @@ class DerivaML(Dataset):
|
|
|
1249
1261
|
|
|
1250
1262
|
Args:
|
|
1251
1263
|
configuration: ExecutionConfiguration:
|
|
1264
|
+
dryrun: Do not create an execution record or upload results.
|
|
1252
1265
|
|
|
1253
1266
|
Returns:
|
|
1254
1267
|
An execution object.
|
|
@@ -1260,7 +1273,7 @@ class DerivaML(Dataset):
|
|
|
1260
1273
|
"Only one execution can be created for a Deriva ML instance."
|
|
1261
1274
|
)
|
|
1262
1275
|
else:
|
|
1263
|
-
self._execution = Execution(configuration, self)
|
|
1276
|
+
self._execution = Execution(configuration, self, dryrun=dryrun)
|
|
1264
1277
|
return self._execution
|
|
1265
1278
|
|
|
1266
1279
|
# @validate_call
|
|
@@ -30,7 +30,7 @@ from .deriva_definitions import (
|
|
|
30
30
|
from .deriva_ml_base import DerivaML, FeatureRecord
|
|
31
31
|
from .dataset_aux_classes import DatasetSpec, DatasetVersion, VersionPart
|
|
32
32
|
from .dataset_bag import DatasetBag
|
|
33
|
-
from .execution_configuration import ExecutionConfiguration
|
|
33
|
+
from .execution_configuration import ExecutionConfiguration, Workflow
|
|
34
34
|
from .execution_environment import get_execution_environment
|
|
35
35
|
from .upload import (
|
|
36
36
|
execution_metadata_dir,
|
|
@@ -96,6 +96,7 @@ class Execution:
|
|
|
96
96
|
configuration: ExecutionConfiguration,
|
|
97
97
|
ml_object: "DerivaML",
|
|
98
98
|
reload: Optional[RID] = None,
|
|
99
|
+
dry_run: bool = False,
|
|
99
100
|
):
|
|
100
101
|
"""
|
|
101
102
|
|
|
@@ -107,6 +108,7 @@ class Execution:
|
|
|
107
108
|
self.asset_paths: list[Path] = []
|
|
108
109
|
self.configuration = configuration
|
|
109
110
|
self._ml_object = ml_object
|
|
111
|
+
self._logger = ml_object._logger
|
|
110
112
|
self.start_time = None
|
|
111
113
|
self.stop_time = None
|
|
112
114
|
self.status = Status.created
|
|
@@ -117,13 +119,23 @@ class Execution:
|
|
|
117
119
|
|
|
118
120
|
self._working_dir = self._ml_object.working_dir
|
|
119
121
|
self._cache_dir = self._ml_object.cache_dir
|
|
122
|
+
self._dry_run = dry_run
|
|
120
123
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
124
|
+
if isinstance(self.configuration.workflow, Workflow):
|
|
125
|
+
self.workflow_rid = (
|
|
126
|
+
self._ml_object.add_workflow(self.configuration.workflow)
|
|
127
|
+
if not self._dry_run
|
|
128
|
+
else "0000"
|
|
126
129
|
)
|
|
130
|
+
else:
|
|
131
|
+
self.workflow_rid = self.configuration.workflow
|
|
132
|
+
if (
|
|
133
|
+
self._ml_object.resolve_rid(configuration.workflow).table.name
|
|
134
|
+
!= "Workflow"
|
|
135
|
+
):
|
|
136
|
+
raise DerivaMLException(
|
|
137
|
+
"Workflow specified in execution configuration is not a Workflow"
|
|
138
|
+
)
|
|
127
139
|
|
|
128
140
|
for d in self.configuration.datasets:
|
|
129
141
|
if self._ml_object.resolve_rid(d.rid).table.name != "Dataset":
|
|
@@ -142,6 +154,10 @@ class Execution:
|
|
|
142
154
|
schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
|
|
143
155
|
if reload:
|
|
144
156
|
self.execution_rid = reload
|
|
157
|
+
if self.execution_rid == "0000":
|
|
158
|
+
self._dry_run = True
|
|
159
|
+
elif self._dry_run:
|
|
160
|
+
self.execution_rid = "0000"
|
|
145
161
|
else:
|
|
146
162
|
self.execution_rid = schema_path.Execution.insert(
|
|
147
163
|
[
|
|
@@ -189,7 +205,7 @@ class Execution:
|
|
|
189
205
|
self.dataset_rids.append(dataset.rid)
|
|
190
206
|
# Update execution info
|
|
191
207
|
schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
|
|
192
|
-
if self.dataset_rids and not reload:
|
|
208
|
+
if self.dataset_rids and not (reload or self._dry_run):
|
|
193
209
|
schema_path.Dataset_Execution.insert(
|
|
194
210
|
[
|
|
195
211
|
{"Dataset": d, "Execution": self.execution_rid}
|
|
@@ -203,7 +219,7 @@ class Execution:
|
|
|
203
219
|
self._ml_object.download_asset(asset_rid=a, dest_dir=self._asset_dir())
|
|
204
220
|
for a in self.configuration.assets
|
|
205
221
|
]
|
|
206
|
-
if self.asset_paths and not reload:
|
|
222
|
+
if self.asset_paths and not (reload or self._dry_run):
|
|
207
223
|
self._update_execution_asset_table(self.configuration.assets)
|
|
208
224
|
|
|
209
225
|
# Save configuration details for later upload
|
|
@@ -242,6 +258,11 @@ class Execution:
|
|
|
242
258
|
msg: Additional information about the status
|
|
243
259
|
"""
|
|
244
260
|
self.status = status
|
|
261
|
+
self._logger.info(msg)
|
|
262
|
+
|
|
263
|
+
if self._dry_run:
|
|
264
|
+
return
|
|
265
|
+
|
|
245
266
|
self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema].Execution.update(
|
|
246
267
|
[
|
|
247
268
|
{
|
|
@@ -278,7 +299,7 @@ class Execution:
|
|
|
278
299
|
|
|
279
300
|
self.start_time = datetime.now()
|
|
280
301
|
self.uploaded_assets = None
|
|
281
|
-
self.update_status(Status.initializing, "Start
|
|
302
|
+
self.update_status(Status.initializing, "Start execution ...")
|
|
282
303
|
|
|
283
304
|
def execution_stop(self) -> None:
|
|
284
305
|
"""Finish the execution and update the duration and status of execution."""
|
|
@@ -288,13 +309,11 @@ class Execution:
|
|
|
288
309
|
minutes, seconds = divmod(remainder, 60)
|
|
289
310
|
duration = f"{round(hours, 0)}H {round(minutes, 0)}min {round(seconds, 4)}sec"
|
|
290
311
|
|
|
291
|
-
if self._ml_object._is_notebook:
|
|
292
|
-
self._create_notebook_checkpoint()
|
|
293
|
-
|
|
294
312
|
self.update_status(Status.completed, "Algorithm execution ended.")
|
|
295
|
-
self.
|
|
296
|
-
|
|
297
|
-
|
|
313
|
+
if not self._dry_run:
|
|
314
|
+
self._ml_object.pathBuilder.schemas[
|
|
315
|
+
self._ml_object.ml_schema
|
|
316
|
+
].Execution.update([{"RID": self.execution_rid, "Duration": duration}])
|
|
298
317
|
|
|
299
318
|
def _upload_execution_dirs(self) -> dict[str, FileUploadState]:
|
|
300
319
|
"""Upload execution assets at _working_dir/Execution_asset.
|
|
@@ -402,6 +421,8 @@ class Execution:
|
|
|
402
421
|
Uploaded assets with key as assets' suborder name, values as an
|
|
403
422
|
ordered dictionary with RID and metadata in the Execution_Asset table.
|
|
404
423
|
"""
|
|
424
|
+
if self._dry_run:
|
|
425
|
+
return {}
|
|
405
426
|
try:
|
|
406
427
|
uploaded_assets = self._upload_execution_dirs()
|
|
407
428
|
self.update_status(Status.completed, "Successfully end the execution.")
|
|
@@ -49,7 +49,7 @@ class ExecutionConfiguration(BaseModel):
|
|
|
49
49
|
|
|
50
50
|
datasets: conlist(DatasetSpec) = []
|
|
51
51
|
assets: list[RID | str] = [] # List of RIDs to model files.
|
|
52
|
-
workflow: RID
|
|
52
|
+
workflow: RID | Workflow
|
|
53
53
|
description: str = ""
|
|
54
54
|
|
|
55
55
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.10.0
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -16,5 +16,14 @@ Requires-Dist: setuptools-scm<=6.0
|
|
|
16
16
|
Requires-Dist: nbstripout
|
|
17
17
|
Dynamic: license-file
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
# DerivaML
|
|
20
|
+
Deriva-ML is a python library to simplify the process of creating and executing reproducible machine learning workflows
|
|
20
21
|
using a deriva catalog.
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
## Installing the GitHub CLI
|
|
25
|
+
|
|
26
|
+
The script release.sh will create a new release tag in GitHub. This script requires the
|
|
27
|
+
GitHUB CLI be installed.
|
|
28
|
+
|
|
29
|
+
See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
|
deriva_ml-1.9.0/README.md
DELETED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|