deriva-ml 1.6.7__tar.gz → 1.6.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deriva_ml-1.6.7/src/deriva_ml.egg-info → deriva_ml-1.6.8}/PKG-INFO +1 -1
- deriva_ml-1.6.8/src/deriva_ml/VERSION.py +1 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/deriva_definitions.py +4 -1
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/deriva_ml_base.py +99 -8
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/execution.py +64 -245
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/execution_configuration.py +1 -1
- {deriva_ml-1.6.7 → deriva_ml-1.6.8/src/deriva_ml.egg-info}/PKG-INFO +1 -1
- deriva_ml-1.6.8/tests/test_execution.py +148 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/tests/test_upload.py +9 -7
- deriva_ml-1.6.7/src/deriva_ml/VERSION.py +0 -1
- deriva_ml-1.6.7/tests/test_execution.py +0 -118
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/LICENSE +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/README.md +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/pyproject.toml +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/setup.cfg +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/__init__.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/build/lib/schema_setup/__init__.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/build/lib/schema_setup/alter_annotation.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/build/lib/schema_setup/annotation_temp.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/build/lib/schema_setup/create_schema.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/build/lib/schema_setup/table_comments_utils.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/database_model.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/dataset.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/dataset_aux_classes.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/dataset_bag.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/demo_catalog.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/deriva_model.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/execution_environment.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/feature.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/history.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/schema_setup/__init__.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/schema_setup/alter_annotation.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/schema_setup/annotations.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/schema_setup/create_schema.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/schema_setup/policy.json +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/schema_setup/table_comments_utils.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/test_functions.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/upload.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml.egg-info/SOURCES.txt +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml.egg-info/entry_points.txt +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml.egg-info/requires.txt +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml.egg-info/top_level.txt +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/tests/test_basic_tables.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/tests/test_dataset.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/tests/test_download.py +0 -0
- {deriva_ml-1.6.7 → deriva_ml-1.6.8}/tests/test_features.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.6.8"
|
|
@@ -11,14 +11,17 @@ relationships that follow a specific data model.
|
|
|
11
11
|
import getpass
|
|
12
12
|
import logging
|
|
13
13
|
from datetime import datetime
|
|
14
|
+
import hashlib
|
|
14
15
|
from itertools import chain
|
|
15
16
|
from pathlib import Path
|
|
17
|
+
import requests
|
|
16
18
|
from typing import Optional, Any, Iterable, TYPE_CHECKING
|
|
17
19
|
from deriva.core import (
|
|
18
20
|
ErmrestCatalog,
|
|
19
21
|
get_credential,
|
|
20
22
|
urlquote,
|
|
21
23
|
DEFAULT_SESSION_CONFIG,
|
|
24
|
+
format_exception,
|
|
22
25
|
)
|
|
23
26
|
import deriva.core.datapath as datapath
|
|
24
27
|
from deriva.core.datapath import DataPathException
|
|
@@ -27,7 +30,7 @@ from deriva.core.ermrest_model import Key, Table
|
|
|
27
30
|
from deriva.core.hatrac_store import HatracStore
|
|
28
31
|
from pydantic import validate_call, ConfigDict
|
|
29
32
|
|
|
30
|
-
from .execution_configuration import ExecutionConfiguration
|
|
33
|
+
from .execution_configuration import ExecutionConfiguration, Workflow
|
|
31
34
|
from .feature import Feature, FeatureRecord
|
|
32
35
|
from .dataset import Dataset
|
|
33
36
|
from .deriva_model import DerivaModel
|
|
@@ -47,6 +50,7 @@ from .deriva_definitions import (
|
|
|
47
50
|
DerivaMLException,
|
|
48
51
|
ML_SCHEMA,
|
|
49
52
|
VocabularyTerm,
|
|
53
|
+
MLVocab,
|
|
50
54
|
)
|
|
51
55
|
|
|
52
56
|
if TYPE_CHECKING:
|
|
@@ -122,6 +126,7 @@ class DerivaML(Dataset):
|
|
|
122
126
|
self.ml_schema = ml_schema
|
|
123
127
|
self.version = model_version
|
|
124
128
|
self.configuration = None
|
|
129
|
+
self._execution: Optional[Execution] = None
|
|
125
130
|
|
|
126
131
|
self.domain_schema = self.model.domain_schema
|
|
127
132
|
self.project_name = project_name or self.domain_schema
|
|
@@ -145,6 +150,10 @@ class DerivaML(Dataset):
|
|
|
145
150
|
f"Loading dirty model. Consider commiting and tagging: {self.version}"
|
|
146
151
|
)
|
|
147
152
|
|
|
153
|
+
def __del__(self):
|
|
154
|
+
if self._execution and self._execution.status != Status.completed:
|
|
155
|
+
self._execution.update_status(Status.aborted, f"Execution Aborted")
|
|
156
|
+
|
|
148
157
|
@staticmethod
|
|
149
158
|
def _get_session_config():
|
|
150
159
|
""" """
|
|
@@ -187,7 +196,7 @@ class DerivaML(Dataset):
|
|
|
187
196
|
return table_path(
|
|
188
197
|
self.working_dir,
|
|
189
198
|
schema=self.domain_schema,
|
|
190
|
-
table=self.model.
|
|
199
|
+
table=self.model.name_to_table(table).name,
|
|
191
200
|
)
|
|
192
201
|
|
|
193
202
|
def asset_dir(
|
|
@@ -688,19 +697,29 @@ class DerivaML(Dataset):
|
|
|
688
697
|
for v in pb.schemas[table.schema.name].tables[table.name].entities().fetch()
|
|
689
698
|
]
|
|
690
699
|
|
|
691
|
-
|
|
700
|
+
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
|
|
701
|
+
def download_asset(self, asset_rid: RID, dest_dir: Path) -> Path:
|
|
692
702
|
"""Download an asset from a URL and place it in a local directory.
|
|
693
703
|
|
|
694
704
|
Args:
|
|
695
|
-
|
|
696
|
-
|
|
705
|
+
asset_rid: URL of the asset.
|
|
706
|
+
dest_dir: Destination directory for the asset.
|
|
697
707
|
|
|
698
708
|
Returns:
|
|
699
709
|
A Path object to the downloaded asset.
|
|
700
710
|
"""
|
|
711
|
+
table = self.resolve_rid(asset_rid).table
|
|
712
|
+
if not self.model.is_asset(table):
|
|
713
|
+
raise DerivaMLException(f"RID {asset_rid} is not for an asset table.")
|
|
714
|
+
|
|
715
|
+
tpath = self.pathBuilder.schemas[table.schema.name].tables[table.name]
|
|
716
|
+
asset_metadata = list(tpath.filter(tpath.RID == asset_rid).entities())[0]
|
|
717
|
+
asset_url = asset_metadata["URL"]
|
|
718
|
+
asset_filename = dest_dir / asset_metadata["Filename"]
|
|
719
|
+
|
|
701
720
|
hs = HatracStore("https", self.host_name, self.credential)
|
|
702
|
-
hs.get_obj(path=asset_url, destfilename=
|
|
703
|
-
return Path(
|
|
721
|
+
hs.get_obj(path=asset_url, destfilename=asset_filename.as_posix())
|
|
722
|
+
return Path(asset_filename)
|
|
704
723
|
|
|
705
724
|
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
|
|
706
725
|
def upload_assets(
|
|
@@ -761,6 +780,72 @@ class DerivaML(Dataset):
|
|
|
761
780
|
]
|
|
762
781
|
)
|
|
763
782
|
|
|
783
|
+
def list_workflows(self) -> list[Workflow]:
|
|
784
|
+
workflow_path = self.pathBuilder.schemas[self.ml_schema].Workflow
|
|
785
|
+
return [
|
|
786
|
+
Workflow(
|
|
787
|
+
name=w["Name"],
|
|
788
|
+
url=w["URL"],
|
|
789
|
+
workflow_type=w["Workflow_Type"],
|
|
790
|
+
version=w["Version"],
|
|
791
|
+
description=w["Description"],
|
|
792
|
+
)
|
|
793
|
+
for w in workflow_path.entities().fetch()
|
|
794
|
+
]
|
|
795
|
+
|
|
796
|
+
def add_workflow(self, workflow: Workflow) -> RID:
|
|
797
|
+
"""Add a workflow to the Workflow table.
|
|
798
|
+
|
|
799
|
+
Args:
|
|
800
|
+
- url(str): URL of the workflow.
|
|
801
|
+
- workflow_type(str): Type of the workflow.
|
|
802
|
+
- version(str): Version of the workflow.
|
|
803
|
+
- description(str): Description of the workflow.
|
|
804
|
+
|
|
805
|
+
Returns:
|
|
806
|
+
- str: Resource Identifier (RID) of the added workflow.
|
|
807
|
+
|
|
808
|
+
"""
|
|
809
|
+
|
|
810
|
+
# Check to make sure that the workflow is not already in the table. If it's not, add it.
|
|
811
|
+
def get_checksum(url) -> str:
|
|
812
|
+
"""Get the checksum of a file from a URL."""
|
|
813
|
+
try:
|
|
814
|
+
response = requests.get(url)
|
|
815
|
+
response.raise_for_status()
|
|
816
|
+
except Exception:
|
|
817
|
+
raise DerivaMLException(f"Invalid URL: {url}")
|
|
818
|
+
else:
|
|
819
|
+
sha256_hash = hashlib.sha256()
|
|
820
|
+
sha256_hash.update(response.content)
|
|
821
|
+
checksum = "SHA-256: " + sha256_hash.hexdigest()
|
|
822
|
+
return checksum
|
|
823
|
+
|
|
824
|
+
ml_schema_path = self.pathBuilder.schemas[self.ml_schema]
|
|
825
|
+
try:
|
|
826
|
+
url_column = ml_schema_path.Workflow.URL
|
|
827
|
+
workflow_record = list(
|
|
828
|
+
ml_schema_path.Workflow.filter(url_column == workflow.url).entities()
|
|
829
|
+
)[0]
|
|
830
|
+
workflow_rid = workflow_record["RID"]
|
|
831
|
+
except IndexError:
|
|
832
|
+
# Record doesn't exist already
|
|
833
|
+
workflow_record = {
|
|
834
|
+
"URL": workflow.url,
|
|
835
|
+
"Name": workflow.name,
|
|
836
|
+
"Description": workflow.description,
|
|
837
|
+
"Checksum": get_checksum(workflow.url),
|
|
838
|
+
"Version": workflow.version,
|
|
839
|
+
MLVocab.workflow_type: self.lookup_term(
|
|
840
|
+
MLVocab.workflow_type, workflow.workflow_type
|
|
841
|
+
).name,
|
|
842
|
+
}
|
|
843
|
+
workflow_rid = ml_schema_path.Workflow.insert([workflow_record])[0]["RID"]
|
|
844
|
+
except Exception as e:
|
|
845
|
+
error = format_exception(e)
|
|
846
|
+
raise DerivaMLException(f"Failed to insert workflow. Error: {error}")
|
|
847
|
+
return workflow_rid
|
|
848
|
+
|
|
764
849
|
# @validate_call
|
|
765
850
|
def create_execution(self, configuration: ExecutionConfiguration) -> "Execution":
|
|
766
851
|
"""Create an execution object
|
|
@@ -779,7 +864,13 @@ class DerivaML(Dataset):
|
|
|
779
864
|
"""
|
|
780
865
|
from .execution import Execution
|
|
781
866
|
|
|
782
|
-
|
|
867
|
+
if self._execution:
|
|
868
|
+
DerivaMLException(
|
|
869
|
+
f"Only one execution can be created for a Deriva ML instance."
|
|
870
|
+
)
|
|
871
|
+
else:
|
|
872
|
+
self._execution = Execution(configuration, self)
|
|
873
|
+
return self._execution
|
|
783
874
|
|
|
784
875
|
# @validate_call
|
|
785
876
|
def restore_execution(self, execution_rid: Optional[RID] = None) -> "Execution":
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from collections import defaultdict
|
|
2
4
|
import csv
|
|
3
|
-
import hashlib
|
|
4
5
|
import json
|
|
5
6
|
import logging
|
|
6
7
|
import os
|
|
7
8
|
import shutil
|
|
8
9
|
from datetime import datetime
|
|
9
10
|
from pathlib import Path
|
|
10
|
-
import requests
|
|
11
11
|
from tempfile import NamedTemporaryFile
|
|
12
12
|
from typing import Iterable, Any, Optional
|
|
13
13
|
from deriva.core import format_exception
|
|
@@ -96,7 +96,8 @@ class Execution:
|
|
|
96
96
|
self.configuration = configuration
|
|
97
97
|
self._ml_object = ml_object
|
|
98
98
|
self.start_time = None
|
|
99
|
-
self.status = Status.
|
|
99
|
+
self.status = Status.created
|
|
100
|
+
self.uploaded_assets: list[Path] = []
|
|
100
101
|
|
|
101
102
|
self.dataset_rids: list[RID] = []
|
|
102
103
|
self.datasets: list[DatasetBag] = []
|
|
@@ -104,7 +105,27 @@ class Execution:
|
|
|
104
105
|
self._working_dir = self._ml_object.working_dir
|
|
105
106
|
self._cache_dir = self._ml_object.cache_dir
|
|
106
107
|
|
|
107
|
-
self.workflow_rid = self.
|
|
108
|
+
self.workflow_rid = self.configuration.workflow
|
|
109
|
+
|
|
110
|
+
if self._ml_object.resolve_rid(configuration.workflow).table.name != "Workflow":
|
|
111
|
+
raise DerivaMLException(
|
|
112
|
+
f"Workflow specified in execution configuration is not a Workflow"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
for d in self.configuration.datasets:
|
|
116
|
+
if self._ml_object.resolve_rid(d.rid).table.name != "Dataset":
|
|
117
|
+
raise DerivaMLException(
|
|
118
|
+
f"Dataset specified in execution configuration is not a dataset"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
for a in self.configuration.assets:
|
|
122
|
+
if not self._ml_object.model.is_asset(
|
|
123
|
+
self._ml_object.resolve_rid(a).table.name
|
|
124
|
+
):
|
|
125
|
+
raise DerivaMLException(
|
|
126
|
+
f"Asset specified in execution configuration is not a asset table"
|
|
127
|
+
)
|
|
128
|
+
|
|
108
129
|
schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
|
|
109
130
|
if reload:
|
|
110
131
|
self.execution_rid = reload
|
|
@@ -117,47 +138,10 @@ class Execution:
|
|
|
117
138
|
}
|
|
118
139
|
]
|
|
119
140
|
)[0]["RID"]
|
|
120
|
-
self._initialize_execution(reload)
|
|
121
|
-
|
|
122
|
-
def _add_workflow(self) -> RID:
|
|
123
|
-
"""Add a workflow to the Workflow table.
|
|
124
|
-
|
|
125
|
-
Args:
|
|
126
|
-
- url(str): URL of the workflow.
|
|
127
|
-
- workflow_type(str): Type of the workflow.
|
|
128
|
-
- version(str): Version of the workflow.
|
|
129
|
-
- description(str): Description of the workflow.
|
|
130
141
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
"""
|
|
135
|
-
workflow = self.configuration.workflow
|
|
136
|
-
# Check to make sure that the workflow is not already in the table. If it's not, add it.
|
|
137
|
-
ml_schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
|
|
138
|
-
try:
|
|
139
|
-
url_column = ml_schema_path.Workflow.URL
|
|
140
|
-
workflow_record = list(
|
|
141
|
-
ml_schema_path.Workflow.filter(url_column == workflow.url).entities()
|
|
142
|
-
)[0]
|
|
143
|
-
workflow_rid = workflow_record["RID"]
|
|
144
|
-
except IndexError:
|
|
145
|
-
# Record doesn't exist already
|
|
146
|
-
workflow_record = {
|
|
147
|
-
"URL": workflow.url,
|
|
148
|
-
"Name": workflow.name,
|
|
149
|
-
"Description": workflow.description,
|
|
150
|
-
"Checksum": self._get_checksum(workflow.url),
|
|
151
|
-
"Version": workflow.version,
|
|
152
|
-
MLVocab.workflow_type: self._ml_object.lookup_term(
|
|
153
|
-
MLVocab.workflow_type, workflow.workflow_type
|
|
154
|
-
).name,
|
|
155
|
-
}
|
|
156
|
-
workflow_rid = ml_schema_path.Workflow.insert([workflow_record])[0]["RID"]
|
|
157
|
-
except Exception as e:
|
|
158
|
-
error = format_exception(e)
|
|
159
|
-
raise DerivaMLException(f"Failed to insert workflow. Error: {error}")
|
|
160
|
-
return workflow_rid
|
|
142
|
+
# Create a directory for execution rid so we can recover state in case of a crash.
|
|
143
|
+
execution_root(prefix=self._ml_object.working_dir, exec_rid=self.execution_rid)
|
|
144
|
+
self._initialize_execution(reload)
|
|
161
145
|
|
|
162
146
|
def _save_runtime_environment(self):
|
|
163
147
|
|
|
@@ -186,7 +170,9 @@ class Execution:
|
|
|
186
170
|
"""
|
|
187
171
|
# Materialize bdbag
|
|
188
172
|
for dataset in self.configuration.datasets:
|
|
189
|
-
self.update_status(
|
|
173
|
+
self.update_status(
|
|
174
|
+
Status.initializing, f"Materialize bag {dataset.rid}... "
|
|
175
|
+
)
|
|
190
176
|
self.datasets.append(self.download_dataset_bag(dataset))
|
|
191
177
|
self.dataset_rids.append(dataset.rid)
|
|
192
178
|
# Update execution info
|
|
@@ -199,13 +185,14 @@ class Execution:
|
|
|
199
185
|
]
|
|
200
186
|
)
|
|
201
187
|
|
|
202
|
-
# Download
|
|
188
|
+
# Download assets....
|
|
203
189
|
self.update_status(Status.running, "Downloading assets ...")
|
|
204
|
-
asset_path = self._asset_dir().as_posix()
|
|
205
190
|
self.asset_paths = [
|
|
206
|
-
self.
|
|
207
|
-
for
|
|
191
|
+
self._ml_object.download_asset(asset_rid=a, dest_dir=self._asset_dir())
|
|
192
|
+
for a in self.configuration.assets
|
|
208
193
|
]
|
|
194
|
+
if self.asset_paths and not reload:
|
|
195
|
+
self._update_execution_asset_table(self.configuration.assets)
|
|
209
196
|
|
|
210
197
|
# Save configuration details for later upload
|
|
211
198
|
exec_config_path = ExecMetadataVocab.execution_config.value
|
|
@@ -217,29 +204,7 @@ class Execution:
|
|
|
217
204
|
self._save_runtime_environment()
|
|
218
205
|
|
|
219
206
|
self.start_time = datetime.now()
|
|
220
|
-
self.update_status(Status.
|
|
221
|
-
|
|
222
|
-
@staticmethod
|
|
223
|
-
def _get_checksum(url) -> str:
|
|
224
|
-
"""Get the checksum of a file from a URL.
|
|
225
|
-
|
|
226
|
-
Args:
|
|
227
|
-
url:
|
|
228
|
-
|
|
229
|
-
Returns:
|
|
230
|
-
str: Checksum of the file.
|
|
231
|
-
|
|
232
|
-
"""
|
|
233
|
-
try:
|
|
234
|
-
response = requests.get(url)
|
|
235
|
-
response.raise_for_status()
|
|
236
|
-
except Exception:
|
|
237
|
-
raise DerivaMLException(f"Invalid URL: {url}")
|
|
238
|
-
else:
|
|
239
|
-
sha256_hash = hashlib.sha256()
|
|
240
|
-
sha256_hash.update(response.content)
|
|
241
|
-
checksum = "SHA-256: " + sha256_hash.hexdigest()
|
|
242
|
-
return checksum
|
|
207
|
+
self.update_status(Status.pending, "Initialize status finished.")
|
|
243
208
|
|
|
244
209
|
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
|
|
245
210
|
def download_dataset_bag(self, dataset: DatasetSpec) -> DatasetBag:
|
|
@@ -276,8 +241,10 @@ class Execution:
|
|
|
276
241
|
|
|
277
242
|
def execution_start(self) -> None:
|
|
278
243
|
""" """
|
|
244
|
+
|
|
279
245
|
self.start_time = datetime.now()
|
|
280
|
-
self.
|
|
246
|
+
self.uploaded_assets = None
|
|
247
|
+
self.update_status(Status.initializing, f"Start ML algorithm ...")
|
|
281
248
|
|
|
282
249
|
def execution_stop(self) -> None:
|
|
283
250
|
"""Finish the execution and update the duration and status of execution."""
|
|
@@ -318,7 +285,15 @@ class Execution:
|
|
|
318
285
|
for r in results.values()
|
|
319
286
|
if r.state == UploadState.success and "Execution_Asset_Type" in r.result
|
|
320
287
|
]
|
|
288
|
+
execution_metadata = [
|
|
289
|
+
r.result["RID"]
|
|
290
|
+
for r in results.values()
|
|
291
|
+
if r.state == UploadState.success
|
|
292
|
+
and "Execution_Metadata_Type" in r.result
|
|
293
|
+
]
|
|
321
294
|
self._update_execution_asset_table(execution_assets)
|
|
295
|
+
self._update_execution_metadata_table(execution_metadata)
|
|
296
|
+
|
|
322
297
|
except Exception as e:
|
|
323
298
|
error = format_exception(e)
|
|
324
299
|
self.update_status(Status.failed, error)
|
|
@@ -413,53 +388,6 @@ class Execution:
|
|
|
413
388
|
path.mkdir(parents=True, exist_ok=True)
|
|
414
389
|
return path
|
|
415
390
|
|
|
416
|
-
def _download_execution_file(self, file_rid: RID, dest_dir: str = "") -> Path:
|
|
417
|
-
"""Download execution assets.
|
|
418
|
-
|
|
419
|
-
Args:
|
|
420
|
-
file_rid(str): Resource Identifier (RID) of the file.
|
|
421
|
-
dest_dir(str): Destination directory for the downloaded assets.
|
|
422
|
-
|
|
423
|
-
Returns:
|
|
424
|
-
- Path: Path to the downloaded asset.
|
|
425
|
-
|
|
426
|
-
Raises:
|
|
427
|
-
- DerivaMLException: If there is an issue downloading the assets.
|
|
428
|
-
"""
|
|
429
|
-
table = self._ml_object.resolve_rid(file_rid).table
|
|
430
|
-
if not self._ml_object.model.is_asset(table):
|
|
431
|
-
raise DerivaMLException(f"Table {table} is not an asset table.")
|
|
432
|
-
|
|
433
|
-
pb = self._ml_object.pathBuilder
|
|
434
|
-
ml_schema_path = pb.schemas[self._ml_object.ml_schema]
|
|
435
|
-
tpath = pb.schemas[table.schema.name].tables[table.name]
|
|
436
|
-
file_metadata = list(tpath.filter(tpath.RID == file_rid).entities())[0]
|
|
437
|
-
file_url = file_metadata["URL"]
|
|
438
|
-
file_name = file_metadata["Filename"]
|
|
439
|
-
try:
|
|
440
|
-
self.update_status(Status.running, f"Downloading {table.name}...")
|
|
441
|
-
file_path = self._ml_object.download_asset(
|
|
442
|
-
file_url, str(dest_dir) + "/" + file_name
|
|
443
|
-
)
|
|
444
|
-
except Exception as e:
|
|
445
|
-
error = format_exception(e)
|
|
446
|
-
self.update_status(Status.failed, error)
|
|
447
|
-
raise DerivaMLException(
|
|
448
|
-
f"Failed to download the file {file_rid}. Error: {error}"
|
|
449
|
-
)
|
|
450
|
-
|
|
451
|
-
ass_table = table.name + "_Execution"
|
|
452
|
-
ass_table_path = ml_schema_path.tables[ass_table]
|
|
453
|
-
exec_file_exec_entities = ass_table_path.filter(
|
|
454
|
-
ass_table_path.columns[table.name] == file_rid
|
|
455
|
-
).entities()
|
|
456
|
-
exec_list = [e["Execution"] for e in exec_file_exec_entities]
|
|
457
|
-
if self.execution_rid not in exec_list:
|
|
458
|
-
tpath = pb.schemas[self._ml_object.ml_schema].tables[ass_table]
|
|
459
|
-
tpath.insert([{table.name: file_rid, "Execution": self.execution_rid}])
|
|
460
|
-
self.update_status(Status.running, f"Successfully download {table.name}...")
|
|
461
|
-
return Path(file_path)
|
|
462
|
-
|
|
463
391
|
def _clean_folder_contents(self, folder_path: Path):
|
|
464
392
|
"""
|
|
465
393
|
|
|
@@ -477,47 +405,6 @@ class Execution:
|
|
|
477
405
|
error = format_exception(e)
|
|
478
406
|
self.update_status(Status.failed, error)
|
|
479
407
|
|
|
480
|
-
# def _update_execution_metadata_table(
|
|
481
|
-
# self, assets: dict[str, FileUploadState]
|
|
482
|
-
# ) -> None:
|
|
483
|
-
# """Upload execution metadata at _working_dir/Execution_metadata.
|
|
484
|
-
#
|
|
485
|
-
# Args:
|
|
486
|
-
# assets: dict[str:
|
|
487
|
-
# FileUploadState]:
|
|
488
|
-
#
|
|
489
|
-
# Raises:
|
|
490
|
-
# - DerivaMLException: If there is an issue uploading the metadata.
|
|
491
|
-
# """
|
|
492
|
-
# ml_schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
|
|
493
|
-
# a_table = list(
|
|
494
|
-
# self._ml_object.model.schemas[self._ml_object.ml_schema]
|
|
495
|
-
# .tables["Execution_Metadata"]
|
|
496
|
-
# .find_associations()
|
|
497
|
-
# )[0].name
|
|
498
|
-
#
|
|
499
|
-
# def asset_rid(asset) -> str:
|
|
500
|
-
# """
|
|
501
|
-
#
|
|
502
|
-
# Args:
|
|
503
|
-
# asset:
|
|
504
|
-
#
|
|
505
|
-
# Returns:
|
|
506
|
-
#
|
|
507
|
-
# """
|
|
508
|
-
# return (
|
|
509
|
-
# asset.state == UploadState.success
|
|
510
|
-
# and asset.result
|
|
511
|
-
# and asset.result["RID"]
|
|
512
|
-
# )
|
|
513
|
-
#
|
|
514
|
-
# entities = [
|
|
515
|
-
# {"Execution_Metadata": rid, "Execution": self.execution_rid}
|
|
516
|
-
# for asset in assets.values()
|
|
517
|
-
# if (rid := asset_rid(asset))
|
|
518
|
-
# ]
|
|
519
|
-
# ml_schema_path.tables[a_table].insert(entities)
|
|
520
|
-
|
|
521
408
|
def _update_feature_table(
|
|
522
409
|
self,
|
|
523
410
|
target_table: str,
|
|
@@ -568,6 +455,15 @@ class Execution:
|
|
|
568
455
|
entities = [map_path(e) for e in csv.DictReader(feature_values)]
|
|
569
456
|
self._ml_object.domain_path.tables[feature_table].insert(entities)
|
|
570
457
|
|
|
458
|
+
def _update_execution_metadata_table(self, assets: list[RID]) -> None:
|
|
459
|
+
"""Upload execution metadata at _working_dir/Execution_metadata."""
|
|
460
|
+
ml_schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
|
|
461
|
+
entities = [
|
|
462
|
+
{"Execution_Metadata": metadata_rid, "Execution": self.execution_rid}
|
|
463
|
+
for metadata_rid in assets
|
|
464
|
+
]
|
|
465
|
+
ml_schema_path.Execution_Metadata_Execution.insert(entities)
|
|
466
|
+
|
|
571
467
|
def _update_execution_asset_table(self, assets: list[RID]) -> None:
|
|
572
468
|
"""Assets associated with an execution must be linked to an execution entity after they are uploaded into
|
|
573
469
|
the catalog. This routine takes a list of uploaded assets and makes that association.
|
|
@@ -576,17 +472,9 @@ class Execution:
|
|
|
576
472
|
assets: list of RIDS for execution assets.:
|
|
577
473
|
"""
|
|
578
474
|
ml_schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
|
|
579
|
-
asset_exec_entities = ml_schema_path.Execution_Asset_Execution.filter(
|
|
580
|
-
ml_schema_path.Execution_Asset_Execution.Execution == self.execution_rid
|
|
581
|
-
).entities()
|
|
582
|
-
existing_assets = {e["Execution_Asset"] for e in asset_exec_entities}
|
|
583
|
-
|
|
584
|
-
# Now got through the list of recently added assets, and add an entry for this asset if it
|
|
585
|
-
# doesn't already exist.
|
|
586
475
|
entities = [
|
|
587
476
|
{"Execution_Asset": asset_rid, "Execution": self.execution_rid}
|
|
588
477
|
for asset_rid in assets
|
|
589
|
-
if asset_rid not in existing_assets
|
|
590
478
|
]
|
|
591
479
|
ml_schema_path.Execution_Asset_Execution.insert(entities)
|
|
592
480
|
|
|
@@ -741,13 +629,9 @@ class Execution:
|
|
|
741
629
|
self._working_dir, schema=self._ml_object.domain_schema, table=table
|
|
742
630
|
)
|
|
743
631
|
|
|
744
|
-
def execute(self) ->
|
|
745
|
-
"""
|
|
746
|
-
|
|
747
|
-
Returns:
|
|
748
|
-
A DerivaMLExec object
|
|
749
|
-
"""
|
|
750
|
-
return DerivaMLExec(self)
|
|
632
|
+
def execute(self) -> Execution:
|
|
633
|
+
"""Initiate an execution with provided configuration. Can be used in a context manager."""
|
|
634
|
+
return self
|
|
751
635
|
|
|
752
636
|
@validate_call
|
|
753
637
|
def write_feature_file(self, features: Iterable[FeatureRecord]) -> None:
|
|
@@ -801,20 +685,6 @@ class Execution:
|
|
|
801
685
|
]
|
|
802
686
|
return "\n".join(items)
|
|
803
687
|
|
|
804
|
-
|
|
805
|
-
class DerivaMLExec:
|
|
806
|
-
"""Context manager for managing DerivaML execution.
|
|
807
|
-
|
|
808
|
-
Provides status updates. For convenience, asset discovery and creation functions from the
|
|
809
|
-
Execution object are provided.
|
|
810
|
-
"""
|
|
811
|
-
|
|
812
|
-
def __init__(self, execution: Execution):
|
|
813
|
-
self.execution = execution
|
|
814
|
-
self.execution_rid = execution.execution_rid
|
|
815
|
-
self.start_time = datetime.now()
|
|
816
|
-
self.uploaded_assets = None
|
|
817
|
-
|
|
818
688
|
def __enter__(self):
|
|
819
689
|
"""
|
|
820
690
|
Method invoked when entering the context.
|
|
@@ -823,7 +693,7 @@ class DerivaMLExec:
|
|
|
823
693
|
- self: The instance itself.
|
|
824
694
|
|
|
825
695
|
"""
|
|
826
|
-
self.
|
|
696
|
+
self.execution_start()
|
|
827
697
|
return self
|
|
828
698
|
|
|
829
699
|
def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> bool:
|
|
@@ -839,11 +709,11 @@ class DerivaMLExec:
|
|
|
839
709
|
bool: True if execution completed successfully, False otherwise.
|
|
840
710
|
"""
|
|
841
711
|
if not exc_type:
|
|
842
|
-
self.
|
|
843
|
-
self.
|
|
712
|
+
self.update_status(Status.running, "Successfully run Ml.")
|
|
713
|
+
self.execution_stop()
|
|
844
714
|
return True
|
|
845
715
|
else:
|
|
846
|
-
self.
|
|
716
|
+
self.update_status(
|
|
847
717
|
Status.failed,
|
|
848
718
|
f"Exception type: {exc_type}, Exception value: {exc_value}",
|
|
849
719
|
)
|
|
@@ -851,54 +721,3 @@ class DerivaMLExec:
|
|
|
851
721
|
f"Exception type: {exc_type}, Exception value: {exc_value}, Exception traceback: {exc_tb}"
|
|
852
722
|
)
|
|
853
723
|
return False
|
|
854
|
-
|
|
855
|
-
def execution_asset_path(self, asset_type: str) -> Path:
|
|
856
|
-
"""Return path to where execution assets of specified type should be placed.
|
|
857
|
-
|
|
858
|
-
Args:
|
|
859
|
-
asset_type: str:
|
|
860
|
-
|
|
861
|
-
Returns:
|
|
862
|
-
Path to the directory in which to place asset files.
|
|
863
|
-
"""
|
|
864
|
-
return self.execution.execution_asset_path(asset_type)
|
|
865
|
-
|
|
866
|
-
def execution_metadata_path(self, metadata_type: str) -> Path:
|
|
867
|
-
"""Return path to where execution metadata of specified type should be placed.
|
|
868
|
-
|
|
869
|
-
Args:
|
|
870
|
-
metadata_type: Term from metadata type vocabulary.
|
|
871
|
-
|
|
872
|
-
Returns:
|
|
873
|
-
Path to the directory in which to place metadata files.
|
|
874
|
-
"""
|
|
875
|
-
return self.execution.execution_metadata_path(metadata_type)
|
|
876
|
-
|
|
877
|
-
def feature_paths(
|
|
878
|
-
self, table: Table | str, feature_name: str
|
|
879
|
-
) -> tuple[Path, dict[str, Path]]:
|
|
880
|
-
"""Return the file path of where to place feature values, and assets for the named feature and table.
|
|
881
|
-
|
|
882
|
-
A side effect of calling this routine is that the directories in which to place the feature values and assets
|
|
883
|
-
will be created
|
|
884
|
-
|
|
885
|
-
Args:
|
|
886
|
-
table: The table with which the feature is associated.
|
|
887
|
-
feature_name: Name of the feature
|
|
888
|
-
|
|
889
|
-
Returns:
|
|
890
|
-
A tuple whose first element is the path for the feature values and whose second element is a dictionary
|
|
891
|
-
of associated asset table names and corresponding paths.
|
|
892
|
-
"""
|
|
893
|
-
return self.execution.feature_paths(table, feature_name)
|
|
894
|
-
|
|
895
|
-
def table_path(self, table: Table | str) -> Path:
|
|
896
|
-
"""Path in the local file system for tables to be uploaded as part of the execution.
|
|
897
|
-
|
|
898
|
-
Args:
|
|
899
|
-
table: Table|str:
|
|
900
|
-
|
|
901
|
-
Returns:
|
|
902
|
-
|
|
903
|
-
"""
|
|
904
|
-
return self.execution.table_path(table)
|
|
@@ -49,7 +49,7 @@ class ExecutionConfiguration(BaseModel):
|
|
|
49
49
|
|
|
50
50
|
datasets: conlist(DatasetSpec) = []
|
|
51
51
|
assets: list[RID | str] = [] # List of RIDs to model files.
|
|
52
|
-
workflow:
|
|
52
|
+
workflow: RID
|
|
53
53
|
description: str = ""
|
|
54
54
|
|
|
55
55
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
from idlelib.run import manage_socket
|
|
2
|
+
|
|
3
|
+
from derivaml_test import TestDerivaML
|
|
4
|
+
from deriva_ml import (
|
|
5
|
+
MLVocab as vc,
|
|
6
|
+
Workflow,
|
|
7
|
+
ExecutionConfiguration,
|
|
8
|
+
DatasetSpec,
|
|
9
|
+
DerivaML,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestExecution(TestDerivaML):
|
|
14
|
+
def test_execution_no_download(self):
|
|
15
|
+
self.ml_instance.add_term(
|
|
16
|
+
vc.workflow_type,
|
|
17
|
+
"Manual Workflow",
|
|
18
|
+
description="Initial setup of Model File",
|
|
19
|
+
)
|
|
20
|
+
self.ml_instance.add_term(
|
|
21
|
+
vc.execution_asset_type,
|
|
22
|
+
"API_Model",
|
|
23
|
+
description="Model for our API workflow",
|
|
24
|
+
)
|
|
25
|
+
self.ml_instance.add_term(
|
|
26
|
+
vc.workflow_type,
|
|
27
|
+
"ML Demo",
|
|
28
|
+
description="A ML Workflow that uses Deriva ML API",
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
api_workflow = self.ml_instance.add_workflow(
|
|
32
|
+
Workflow(
|
|
33
|
+
name="Manual Workflow",
|
|
34
|
+
url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/tests/test_execution.py",
|
|
35
|
+
workflow_type="Manual Workflow",
|
|
36
|
+
description="A manual operation",
|
|
37
|
+
)
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
manual_execution = self.ml_instance.create_execution(
|
|
41
|
+
ExecutionConfiguration(
|
|
42
|
+
description="Sample Execution", workflow=api_workflow
|
|
43
|
+
)
|
|
44
|
+
)
|
|
45
|
+
with manual_execution as e:
|
|
46
|
+
pass
|
|
47
|
+
manual_execution.upload_execution_outputs()
|
|
48
|
+
|
|
49
|
+
def test_execution_download(self):
|
|
50
|
+
self.populate_catalog()
|
|
51
|
+
double_nested, nested, datasets = self.create_nested_dataset()
|
|
52
|
+
|
|
53
|
+
self.ml_instance.add_term(
|
|
54
|
+
vc.execution_asset_type,
|
|
55
|
+
"API_Model",
|
|
56
|
+
description="Model for our API workflow",
|
|
57
|
+
)
|
|
58
|
+
self.ml_instance.add_term(
|
|
59
|
+
vc.workflow_type,
|
|
60
|
+
"ML Demo",
|
|
61
|
+
description="A ML Workflow that uses Deriva ML API",
|
|
62
|
+
)
|
|
63
|
+
api_workflow = self.ml_instance.add_workflow(
|
|
64
|
+
Workflow(
|
|
65
|
+
name="ML Demo",
|
|
66
|
+
url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml",
|
|
67
|
+
workflow_type="ML Demo",
|
|
68
|
+
description="A workflow that uses Deriva ML",
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
execution_model = self.create_execution_asset(api_workflow)
|
|
72
|
+
|
|
73
|
+
config = ExecutionConfiguration(
|
|
74
|
+
datasets=[
|
|
75
|
+
DatasetSpec(
|
|
76
|
+
rid=nested[0],
|
|
77
|
+
version=self.ml_instance.dataset_version(nested[0]),
|
|
78
|
+
),
|
|
79
|
+
DatasetSpec(
|
|
80
|
+
rid=nested[1],
|
|
81
|
+
version=self.ml_instance.dataset_version(nested[1]),
|
|
82
|
+
),
|
|
83
|
+
],
|
|
84
|
+
assets=[execution_model],
|
|
85
|
+
description="Sample Execution",
|
|
86
|
+
workflow=api_workflow,
|
|
87
|
+
)
|
|
88
|
+
exec = self.ml_instance.create_execution(config)
|
|
89
|
+
with exec as e:
|
|
90
|
+
print(e.asset_paths)
|
|
91
|
+
print(e.datasets)
|
|
92
|
+
self.assertEqual(1, len(e.asset_paths))
|
|
93
|
+
self.assertEqual(2, len(e.datasets))
|
|
94
|
+
exec.upload_execution_outputs()
|
|
95
|
+
pb = self.ml_instance.pathBuilder.schemas[self.ml_instance.ml_schema]
|
|
96
|
+
execution_asset_execution = pb.Execution_Asset_Execution
|
|
97
|
+
execution_metadata_execution = pb.Execution_Metadata_Execution
|
|
98
|
+
execution_asset = pb.Execution_Asset
|
|
99
|
+
execution_metadata = pb.Execution_Metadata
|
|
100
|
+
|
|
101
|
+
assets_execution = [
|
|
102
|
+
{
|
|
103
|
+
"RID": a["RID"],
|
|
104
|
+
"Execution_Asset": a["Execution_Asset"],
|
|
105
|
+
"Execution": a["Execution"],
|
|
106
|
+
}
|
|
107
|
+
for a in execution_asset_execution.entities().fetch()
|
|
108
|
+
if a["Execution"] == exec.execution_rid
|
|
109
|
+
]
|
|
110
|
+
metadata_execution = [
|
|
111
|
+
{
|
|
112
|
+
"RID": a["RID"],
|
|
113
|
+
"Execution": a["Execution"],
|
|
114
|
+
"Execution_Metadata": a["Execution_Metadata"],
|
|
115
|
+
}
|
|
116
|
+
for a in execution_metadata_execution.entities().fetch()
|
|
117
|
+
if a["Execution"] == exec.execution_rid
|
|
118
|
+
]
|
|
119
|
+
execution_assets = [
|
|
120
|
+
{"RID": a["RID"], "Filename": a["Filename"]}
|
|
121
|
+
for a in execution_asset.entities().fetch()
|
|
122
|
+
]
|
|
123
|
+
execution_metadata = [
|
|
124
|
+
{"RID": a["RID"], "Filename": a["Filename"]}
|
|
125
|
+
for a in execution_metadata.entities().fetch()
|
|
126
|
+
]
|
|
127
|
+
print(assets_execution)
|
|
128
|
+
print(metadata_execution)
|
|
129
|
+
print(execution_assets)
|
|
130
|
+
print(execution_metadata)
|
|
131
|
+
self.assertEqual(1, len(assets_execution))
|
|
132
|
+
self.assertEqual(2, len(metadata_execution))
|
|
133
|
+
|
|
134
|
+
def create_execution_asset(self, api_workflow):
|
|
135
|
+
manual_execution = self.ml_instance.create_execution(
|
|
136
|
+
ExecutionConfiguration(
|
|
137
|
+
description="Sample Execution", workflow=api_workflow
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
model_file = (
|
|
141
|
+
manual_execution.execution_asset_path("API_Model") / "modelfile.txt"
|
|
142
|
+
)
|
|
143
|
+
with open(model_file, "w") as fp:
|
|
144
|
+
fp.write(f"My model")
|
|
145
|
+
# Now upload the file and retrieve the RID of the new asset from the returned results.
|
|
146
|
+
uploaded_assets = manual_execution.upload_execution_outputs()
|
|
147
|
+
self.ml_instance._execution = None
|
|
148
|
+
return uploaded_assets["API_Model/modelfile.txt"].result["RID"]
|
|
@@ -71,13 +71,14 @@ class TestUpload(TestDerivaML):
|
|
|
71
71
|
description="Model for our API workflow",
|
|
72
72
|
)
|
|
73
73
|
|
|
74
|
-
api_workflow =
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
74
|
+
api_workflow = self.ml_instance.add_workflow(
|
|
75
|
+
Workflow(
|
|
76
|
+
name="Manual Workflow",
|
|
77
|
+
url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/tests/test_upload.py",
|
|
78
|
+
workflow_type="Manual Workflow",
|
|
79
|
+
description="A manual operation",
|
|
80
|
+
)
|
|
79
81
|
)
|
|
80
|
-
|
|
81
82
|
manual_execution = self.ml_instance.create_execution(
|
|
82
83
|
ExecutionConfiguration(
|
|
83
84
|
description="Sample Execution", workflow=api_workflow
|
|
@@ -95,6 +96,7 @@ class TestUpload(TestDerivaML):
|
|
|
95
96
|
uploaded_assets = manual_execution.upload_execution_outputs()
|
|
96
97
|
path = self.ml_instance.catalog.getPathBuilder().schemas["deriva-ml"]
|
|
97
98
|
self.assertEqual(1, len(list(path.Execution_Asset.entities().fetch())))
|
|
99
|
+
|
|
98
100
|
execution_metadata = list(path.Execution_Metadata.entities().fetch())
|
|
99
|
-
print([m
|
|
101
|
+
print([m for m in execution_metadata])
|
|
100
102
|
self.assertEqual(2, len(execution_metadata))
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.6.7"
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
from derivaml_test import TestDerivaML
|
|
2
|
-
from deriva_ml import MLVocab as vc, Workflow, ExecutionConfiguration, DatasetSpec
|
|
3
|
-
from deriva_ml.demo_catalog import (
|
|
4
|
-
reset_demo_catalog,
|
|
5
|
-
populate_demo_catalog,
|
|
6
|
-
create_demo_datasets,
|
|
7
|
-
)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class TestExecution(TestDerivaML):
|
|
11
|
-
def test_execution_no_download(self):
|
|
12
|
-
self.ml_instance.add_term(
|
|
13
|
-
vc.workflow_type,
|
|
14
|
-
"Manual Workflow",
|
|
15
|
-
description="Initial setup of Model File",
|
|
16
|
-
)
|
|
17
|
-
self.ml_instance.add_term(
|
|
18
|
-
vc.execution_asset_type,
|
|
19
|
-
"API_Model",
|
|
20
|
-
description="Model for our API workflow",
|
|
21
|
-
)
|
|
22
|
-
self.ml_instance.add_term(
|
|
23
|
-
vc.workflow_type,
|
|
24
|
-
"ML Demo",
|
|
25
|
-
description="A ML Workflow that uses Deriva ML API",
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
api_workflow = Workflow(
|
|
29
|
-
name="Manual Workflow",
|
|
30
|
-
url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/tests/test_execution.py",
|
|
31
|
-
workflow_type="Manual Workflow",
|
|
32
|
-
description="A manual operation",
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
manual_execution = self.ml_instance.create_execution(
|
|
36
|
-
ExecutionConfiguration(
|
|
37
|
-
description="Sample Execution", workflow=api_workflow
|
|
38
|
-
)
|
|
39
|
-
)
|
|
40
|
-
manual_execution.upload_execution_outputs()
|
|
41
|
-
|
|
42
|
-
def test_execution_download(self):
|
|
43
|
-
populate_demo_catalog(self.ml_instance, self.domain_schema)
|
|
44
|
-
create_demo_datasets(self.ml_instance)
|
|
45
|
-
exec_config = execution_test(self.ml_instance)
|
|
46
|
-
exec = self.ml_instance.create_execution(exec_config)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def execution_test(ml_instance):
|
|
50
|
-
training_dataset_rid = [
|
|
51
|
-
ds["RID"]
|
|
52
|
-
for ds in ml_instance.find_datasets()
|
|
53
|
-
if "Training" in ds["Dataset_Type"]
|
|
54
|
-
][0]
|
|
55
|
-
testing_dataset_rid = [
|
|
56
|
-
ds["RID"]
|
|
57
|
-
for ds in ml_instance.find_datasets()
|
|
58
|
-
if "Testing" in ds["Dataset_Type"]
|
|
59
|
-
][0]
|
|
60
|
-
|
|
61
|
-
nested_dataset_rid = [
|
|
62
|
-
ds["RID"]
|
|
63
|
-
for ds in ml_instance.find_datasets()
|
|
64
|
-
if "Partitioned" in ds["Dataset_Type"]
|
|
65
|
-
][0]
|
|
66
|
-
|
|
67
|
-
ml_instance.add_term(
|
|
68
|
-
vc.workflow_type, "Manual Workflow", description="Initial setup of Model File"
|
|
69
|
-
)
|
|
70
|
-
ml_instance.add_term(
|
|
71
|
-
vc.execution_asset_type, "API_Model", description="Model for our API workflow"
|
|
72
|
-
)
|
|
73
|
-
ml_instance.add_term(
|
|
74
|
-
vc.workflow_type, "ML Demo", description="A ML Workflow that uses Deriva ML API"
|
|
75
|
-
)
|
|
76
|
-
api_workflow = Workflow(
|
|
77
|
-
name="Manual Workflow",
|
|
78
|
-
url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/docs/Notebooks/DerivaML%20Execution.ipynb",
|
|
79
|
-
workflow_type="Manual Workflow",
|
|
80
|
-
description="A manual operation",
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
manual_execution = ml_instance.create_execution(
|
|
84
|
-
ExecutionConfiguration(description="Sample Execution", workflow=api_workflow)
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
# Now lets create model configuration for our program.
|
|
88
|
-
model_file = manual_execution.execution_asset_path("API_Model") / "modelfile.txt"
|
|
89
|
-
with open(model_file, "w") as fp:
|
|
90
|
-
fp.write(f"My model")
|
|
91
|
-
|
|
92
|
-
# Now upload the file and retrieve the RID of the new asset from the returned results.
|
|
93
|
-
uploaded_assets = manual_execution.upload_execution_outputs()
|
|
94
|
-
|
|
95
|
-
training_model_rid = uploaded_assets["API_Model/modelfile.txt"].result["RID"]
|
|
96
|
-
api_workflow = Workflow(
|
|
97
|
-
name="ML Demo",
|
|
98
|
-
url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml",
|
|
99
|
-
workflow_type="ML Demo",
|
|
100
|
-
description="A workflow that uses Deriva ML",
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
config = ExecutionConfiguration(
|
|
104
|
-
datasets=[
|
|
105
|
-
DatasetSpec(
|
|
106
|
-
rid=nested_dataset_rid,
|
|
107
|
-
version=ml_instance.dataset_version(nested_dataset_rid),
|
|
108
|
-
),
|
|
109
|
-
DatasetSpec(
|
|
110
|
-
rid=testing_dataset_rid,
|
|
111
|
-
version=ml_instance.dataset_version(testing_dataset_rid),
|
|
112
|
-
),
|
|
113
|
-
],
|
|
114
|
-
assets=[training_model_rid],
|
|
115
|
-
description="Sample Execution",
|
|
116
|
-
workflow=api_workflow,
|
|
117
|
-
)
|
|
118
|
-
return config
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/build/lib/schema_setup/alter_annotation.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deriva_ml-1.6.7 → deriva_ml-1.6.8}/src/deriva_ml/build/lib/schema_setup/table_comments_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|