deriva-ml 1.17.10__py3-none-any.whl → 1.17.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +43 -1
- deriva_ml/asset/__init__.py +17 -0
- deriva_ml/asset/asset.py +357 -0
- deriva_ml/asset/aux_classes.py +100 -0
- deriva_ml/bump_version.py +254 -11
- deriva_ml/catalog/__init__.py +21 -0
- deriva_ml/catalog/clone.py +1199 -0
- deriva_ml/catalog/localize.py +426 -0
- deriva_ml/core/__init__.py +29 -0
- deriva_ml/core/base.py +817 -1067
- deriva_ml/core/config.py +169 -21
- deriva_ml/core/constants.py +120 -19
- deriva_ml/core/definitions.py +123 -13
- deriva_ml/core/enums.py +47 -73
- deriva_ml/core/ermrest.py +226 -193
- deriva_ml/core/exceptions.py +297 -14
- deriva_ml/core/filespec.py +99 -28
- deriva_ml/core/logging_config.py +225 -0
- deriva_ml/core/mixins/__init__.py +42 -0
- deriva_ml/core/mixins/annotation.py +915 -0
- deriva_ml/core/mixins/asset.py +384 -0
- deriva_ml/core/mixins/dataset.py +237 -0
- deriva_ml/core/mixins/execution.py +408 -0
- deriva_ml/core/mixins/feature.py +365 -0
- deriva_ml/core/mixins/file.py +263 -0
- deriva_ml/core/mixins/path_builder.py +145 -0
- deriva_ml/core/mixins/rid_resolution.py +204 -0
- deriva_ml/core/mixins/vocabulary.py +400 -0
- deriva_ml/core/mixins/workflow.py +322 -0
- deriva_ml/core/validation.py +389 -0
- deriva_ml/dataset/__init__.py +2 -1
- deriva_ml/dataset/aux_classes.py +20 -4
- deriva_ml/dataset/catalog_graph.py +575 -0
- deriva_ml/dataset/dataset.py +1242 -1008
- deriva_ml/dataset/dataset_bag.py +1311 -182
- deriva_ml/dataset/history.py +27 -14
- deriva_ml/dataset/upload.py +225 -38
- deriva_ml/demo_catalog.py +126 -110
- deriva_ml/execution/__init__.py +46 -2
- deriva_ml/execution/base_config.py +639 -0
- deriva_ml/execution/execution.py +543 -242
- deriva_ml/execution/execution_configuration.py +26 -11
- deriva_ml/execution/execution_record.py +592 -0
- deriva_ml/execution/find_caller.py +298 -0
- deriva_ml/execution/model_protocol.py +175 -0
- deriva_ml/execution/multirun_config.py +153 -0
- deriva_ml/execution/runner.py +595 -0
- deriva_ml/execution/workflow.py +223 -34
- deriva_ml/experiment/__init__.py +8 -0
- deriva_ml/experiment/experiment.py +411 -0
- deriva_ml/feature.py +6 -1
- deriva_ml/install_kernel.py +143 -6
- deriva_ml/interfaces.py +862 -0
- deriva_ml/model/__init__.py +99 -0
- deriva_ml/model/annotations.py +1278 -0
- deriva_ml/model/catalog.py +286 -60
- deriva_ml/model/database.py +144 -649
- deriva_ml/model/deriva_ml_database.py +308 -0
- deriva_ml/model/handles.py +14 -0
- deriva_ml/run_model.py +319 -0
- deriva_ml/run_notebook.py +507 -38
- deriva_ml/schema/__init__.py +18 -2
- deriva_ml/schema/annotations.py +62 -33
- deriva_ml/schema/create_schema.py +169 -69
- deriva_ml/schema/validation.py +601 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -4
- deriva_ml-1.17.11.dist-info/RECORD +77 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +1 -0
- deriva_ml/protocols/dataset.py +0 -19
- deriva_ml/test.py +0 -94
- deriva_ml-1.17.10.dist-info/RECORD +0 -45
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
deriva_ml/demo_catalog.py
CHANGED
|
@@ -1,10 +1,17 @@
|
|
|
1
|
+
# type: ignore[arg-type, call-arg]
|
|
2
|
+
"""Demo catalog utilities for DerivaML testing and examples.
|
|
3
|
+
|
|
4
|
+
This module creates demo catalogs with sample data for testing. It uses
|
|
5
|
+
dynamically created Pydantic models for features, which cannot be statically
|
|
6
|
+
typed - hence the type ignore above.
|
|
7
|
+
"""
|
|
1
8
|
from __future__ import annotations
|
|
2
9
|
|
|
3
10
|
import atexit
|
|
4
11
|
import itertools
|
|
5
12
|
import logging
|
|
6
|
-
import os
|
|
7
13
|
import string
|
|
14
|
+
import subprocess
|
|
8
15
|
from collections.abc import Iterator, Sequence
|
|
9
16
|
from datetime import datetime
|
|
10
17
|
from numbers import Integral
|
|
@@ -13,24 +20,29 @@ from random import choice, randint, random
|
|
|
13
20
|
from tempfile import TemporaryDirectory
|
|
14
21
|
|
|
15
22
|
from deriva.core import BaseCLI, ErmrestCatalog
|
|
16
|
-
from deriva.core.ermrest_model import
|
|
23
|
+
from deriva.core.ermrest_model import Schema, Table
|
|
24
|
+
from deriva.core.typed import BuiltinType, ColumnDef, SchemaDef, TableDef
|
|
17
25
|
from pydantic import BaseModel, ConfigDict
|
|
18
26
|
from requests.exceptions import HTTPError
|
|
19
27
|
|
|
20
|
-
from deriva_ml import DerivaML, MLVocab
|
|
28
|
+
from deriva_ml import DerivaML, DerivaMLException, MLVocab
|
|
21
29
|
from deriva_ml.core.definitions import RID, BuiltinTypes, ColumnDefinition
|
|
30
|
+
from deriva_ml.dataset import Dataset
|
|
22
31
|
from deriva_ml.dataset.aux_classes import DatasetVersion
|
|
23
|
-
from deriva_ml.execution.execution import Execution,
|
|
24
|
-
from deriva_ml.execution.execution_configuration import ExecutionConfiguration
|
|
32
|
+
from deriva_ml.execution.execution import Execution, ExecutionConfiguration
|
|
25
33
|
from deriva_ml.schema import (
|
|
26
34
|
create_ml_catalog,
|
|
27
35
|
)
|
|
28
|
-
from deriva_ml.schema.annotations import catalog_annotation
|
|
29
36
|
|
|
30
37
|
try:
|
|
38
|
+
from pprint import pformat
|
|
39
|
+
|
|
31
40
|
from icecream import ic
|
|
32
41
|
|
|
33
|
-
ic.configureOutput(
|
|
42
|
+
ic.configureOutput(
|
|
43
|
+
includeContext=True,
|
|
44
|
+
argToStringFunction=lambda x: pformat(x.model_dump() if hasattr(x, "model_dump") else x, width=80, depth=10),
|
|
45
|
+
)
|
|
34
46
|
except ImportError: # Graceful fallback if IceCream isn't installed.
|
|
35
47
|
ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
|
|
36
48
|
|
|
@@ -38,39 +50,24 @@ except ImportError: # Graceful fallback if IceCream isn't installed.
|
|
|
38
50
|
TEST_DATASET_SIZE = 12
|
|
39
51
|
|
|
40
52
|
|
|
41
|
-
def populate_demo_catalog(
|
|
53
|
+
def populate_demo_catalog(execution: Execution) -> None:
|
|
42
54
|
# Delete any vocabularies and features.
|
|
43
|
-
|
|
55
|
+
ml_instance = execution._ml_object
|
|
56
|
+
domain_schema = ml_instance.domain_path()
|
|
44
57
|
subject = domain_schema.tables["Subject"]
|
|
45
58
|
ss = subject.insert([{"Name": f"Thing{t + 1}"} for t in range(TEST_DATASET_SIZE)])
|
|
59
|
+
for s in ss:
|
|
60
|
+
image_file = execution.asset_file_path(
|
|
61
|
+
"Image",
|
|
62
|
+
f"test_{s['RID']}.txt",
|
|
63
|
+
Subject=s["RID"],
|
|
64
|
+
Acquisition_Time=datetime.now(),
|
|
65
|
+
Acquisition_Date=datetime.now().date(),
|
|
66
|
+
)
|
|
67
|
+
with image_file.open("w") as f:
|
|
68
|
+
f.write(f"Hello there {random()}\n")
|
|
46
69
|
|
|
47
|
-
|
|
48
|
-
MLVocab.workflow_type,
|
|
49
|
-
"Demo Catalog Creation",
|
|
50
|
-
description="A workflow demonstrating how to create a demo catalog.",
|
|
51
|
-
)
|
|
52
|
-
workflow = Workflow(
|
|
53
|
-
name="Demo Catalog",
|
|
54
|
-
workflow_type="Demo Catalog Creation",
|
|
55
|
-
url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/src/deriva_ml/demo_catalog.py",
|
|
56
|
-
version="1.0.0",
|
|
57
|
-
checksum="27",
|
|
58
|
-
git_root=Path(),
|
|
59
|
-
)
|
|
60
|
-
execution = ml_instance.create_execution(ExecutionConfiguration(workflow=workflow))
|
|
61
|
-
|
|
62
|
-
with execution.execute() as e:
|
|
63
|
-
for s in ss:
|
|
64
|
-
image_file = e.asset_file_path(
|
|
65
|
-
"Image",
|
|
66
|
-
f"test_{s['RID']}.txt",
|
|
67
|
-
Subject=s["RID"],
|
|
68
|
-
Acquisition_Time=datetime.now(),
|
|
69
|
-
Acquisition_Date=datetime.now().date(),
|
|
70
|
-
)
|
|
71
|
-
with image_file.open("w") as f:
|
|
72
|
-
f.write(f"Hello there {random()}\n")
|
|
73
|
-
execution.upload_execution_outputs()
|
|
70
|
+
execution.upload_execution_outputs()
|
|
74
71
|
|
|
75
72
|
|
|
76
73
|
class DatasetDescription(BaseModel):
|
|
@@ -81,7 +78,7 @@ class DatasetDescription(BaseModel):
|
|
|
81
78
|
] # Either a list of nested dataset, or then number of elements to add
|
|
82
79
|
member_rids: dict[str, list[RID]] = {} # The rids of the members of the dataset.
|
|
83
80
|
version: DatasetVersion = DatasetVersion(1, 0, 0) # The initial version.
|
|
84
|
-
|
|
81
|
+
dataset: Dataset = None # RID of dataset that was created.
|
|
85
82
|
|
|
86
83
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
87
84
|
|
|
@@ -95,7 +92,8 @@ def create_datasets(
|
|
|
95
92
|
Create a dataset per `spec`, then add child members (either by slicing
|
|
96
93
|
off pre-generated RIDs or by recursing on nested specs).
|
|
97
94
|
"""
|
|
98
|
-
|
|
95
|
+
# Create unpinned dataset.
|
|
96
|
+
dataset = client.create_dataset(
|
|
99
97
|
dataset_types=spec.types,
|
|
100
98
|
description=spec.description,
|
|
101
99
|
version=spec.version,
|
|
@@ -105,9 +103,10 @@ def create_datasets(
|
|
|
105
103
|
description=spec.description,
|
|
106
104
|
members={},
|
|
107
105
|
types=spec.types,
|
|
108
|
-
|
|
106
|
+
dataset=dataset,
|
|
109
107
|
version=spec.version,
|
|
110
108
|
)
|
|
109
|
+
|
|
111
110
|
dataset_rids = {}
|
|
112
111
|
for member_type, value in spec.members.items():
|
|
113
112
|
if isinstance(value, Sequence) and not isinstance(value, (str, bytes)):
|
|
@@ -116,7 +115,7 @@ def create_datasets(
|
|
|
116
115
|
for child_spec in nested_specs:
|
|
117
116
|
child_ds = create_datasets(client, child_spec, member_rids)
|
|
118
117
|
result_spec.members.setdefault(member_type, []).append(child_ds)
|
|
119
|
-
rids.append(child_ds.
|
|
118
|
+
rids.append(child_ds.dataset.dataset_rid)
|
|
120
119
|
elif isinstance(value, Integral):
|
|
121
120
|
count = int(value)
|
|
122
121
|
# take exactly `count` RIDs (or an empty list if count <= 0)
|
|
@@ -132,7 +131,7 @@ def create_datasets(
|
|
|
132
131
|
if rids:
|
|
133
132
|
dataset_rids[member_type] = rids
|
|
134
133
|
result_spec.member_rids.setdefault(member_type, []).extend(rids)
|
|
135
|
-
|
|
134
|
+
dataset.add_dataset_members(dataset_rids, description="Added by create_datasets")
|
|
136
135
|
|
|
137
136
|
return result_spec
|
|
138
137
|
|
|
@@ -147,7 +146,7 @@ def dataset_spec() -> DatasetDescription:
|
|
|
147
146
|
training_dataset = DatasetDescription(
|
|
148
147
|
description="A dataset that is nested",
|
|
149
148
|
members={"Dataset": [dataset, dataset], "Image": 2},
|
|
150
|
-
types=["
|
|
149
|
+
types=["Training"],
|
|
151
150
|
)
|
|
152
151
|
|
|
153
152
|
testing_dataset = DatasetDescription(
|
|
@@ -164,39 +163,37 @@ def dataset_spec() -> DatasetDescription:
|
|
|
164
163
|
return double_nested_dataset
|
|
165
164
|
|
|
166
165
|
|
|
167
|
-
def create_demo_datasets(
|
|
166
|
+
def create_demo_datasets(execution: Execution) -> DatasetDescription:
|
|
168
167
|
"""Create datasets from a populated catalog."""
|
|
168
|
+
ml_instance = execution._ml_object
|
|
169
169
|
ml_instance.add_dataset_element_type("Subject")
|
|
170
170
|
ml_instance.add_dataset_element_type("Image")
|
|
171
171
|
|
|
172
|
-
_type_rid = ml_instance.add_term(
|
|
173
|
-
|
|
174
|
-
|
|
172
|
+
_type_rid = ml_instance.add_term(
|
|
173
|
+
"Dataset_Type", "Complete", synonyms=["Whole", "complete", "whole"], description="A test"
|
|
174
|
+
)
|
|
175
|
+
_training_rid = ml_instance.add_term(
|
|
176
|
+
"Dataset_Type", "Training", synonyms=["Train", "train", "training"], description="A training set"
|
|
177
|
+
)
|
|
178
|
+
_testing_rid = ml_instance.add_term(
|
|
179
|
+
"Dataset_Type", "Testing", synonyms=["Test", "test", "testing"], description="A testing set"
|
|
180
|
+
)
|
|
175
181
|
|
|
176
|
-
table_path = ml_instance.
|
|
182
|
+
table_path = ml_instance.domain_path().tables["Subject"]
|
|
177
183
|
subject_rids = [i["RID"] for i in table_path.entities().fetch()]
|
|
178
|
-
table_path = ml_instance.catalog.getPathBuilder().schemas[ml_instance.domain_schema].tables["Image"]
|
|
179
|
-
image_rids = [i["RID"] for i in table_path.entities().fetch()]
|
|
180
184
|
|
|
181
|
-
ml_instance.
|
|
182
|
-
|
|
183
|
-
"Create Dataset Workflow",
|
|
184
|
-
description="A Workflow that creates a new dataset.",
|
|
185
|
-
)
|
|
186
|
-
dataset_workflow = ml_instance.create_workflow(name="API Workflow", workflow_type="Create Dataset Workflow")
|
|
187
|
-
|
|
188
|
-
dataset_execution = ml_instance.create_execution(
|
|
189
|
-
ExecutionConfiguration(workflow=dataset_workflow, description="Create Dataset")
|
|
190
|
-
)
|
|
185
|
+
table_path = ml_instance.domain_path().tables["Image"]
|
|
186
|
+
image_rids = [i["RID"] for i in table_path.entities().fetch()]
|
|
191
187
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
dataset = create_datasets(exe, spec, {"Subject": iter(subject_rids), "Image": iter(image_rids)})
|
|
188
|
+
spec = dataset_spec()
|
|
189
|
+
dataset = create_datasets(execution, spec, {"Subject": iter(subject_rids), "Image": iter(image_rids)})
|
|
195
190
|
return dataset
|
|
196
191
|
|
|
197
192
|
|
|
198
|
-
def create_demo_features(
|
|
199
|
-
ml_instance
|
|
193
|
+
def create_demo_features(execution: Execution) -> None:
|
|
194
|
+
ml_instance = execution._ml_object
|
|
195
|
+
# Use update_navbar=False for batch creation, then call apply_catalog_annotations() once at the end
|
|
196
|
+
ml_instance.create_vocabulary("SubjectHealth", "A vocab", update_navbar=False)
|
|
200
197
|
ml_instance.add_term(
|
|
201
198
|
"SubjectHealth",
|
|
202
199
|
"Sick",
|
|
@@ -207,10 +204,12 @@ def create_demo_features(ml_instance: DerivaML) -> None:
|
|
|
207
204
|
"Well",
|
|
208
205
|
description="The subject self reports that they feel well",
|
|
209
206
|
)
|
|
210
|
-
ml_instance.create_vocabulary("ImageQuality", "Controlled vocabulary for image quality")
|
|
207
|
+
ml_instance.create_vocabulary("ImageQuality", "Controlled vocabulary for image quality", update_navbar=False)
|
|
211
208
|
ml_instance.add_term("ImageQuality", "Good", description="The image is good")
|
|
212
209
|
ml_instance.add_term("ImageQuality", "Bad", description="The image is bad")
|
|
213
|
-
box_asset = ml_instance.create_asset(
|
|
210
|
+
box_asset = ml_instance.create_asset(
|
|
211
|
+
"BoundingBox", comment="A file that contains a cropped version of a image", update_navbar=False
|
|
212
|
+
)
|
|
214
213
|
|
|
215
214
|
ml_instance.create_feature(
|
|
216
215
|
"Subject",
|
|
@@ -218,9 +217,13 @@ def create_demo_features(ml_instance: DerivaML) -> None:
|
|
|
218
217
|
terms=["SubjectHealth"],
|
|
219
218
|
metadata=[ColumnDefinition(name="Scale", type=BuiltinTypes.int2, nullok=True)],
|
|
220
219
|
optional=["Scale"],
|
|
220
|
+
update_navbar=False,
|
|
221
221
|
)
|
|
222
|
-
ml_instance.create_feature("Image", "BoundingBox", assets=[box_asset])
|
|
223
|
-
ml_instance.create_feature("Image", "Quality", terms=["ImageQuality"])
|
|
222
|
+
ml_instance.create_feature("Image", "BoundingBox", assets=[box_asset], update_navbar=False)
|
|
223
|
+
ml_instance.create_feature("Image", "Quality", terms=["ImageQuality"], update_navbar=False)
|
|
224
|
+
|
|
225
|
+
# Update navbar once after all tables are created
|
|
226
|
+
ml_instance.apply_catalog_annotations()
|
|
224
227
|
|
|
225
228
|
ImageQualityFeature = ml_instance.feature_record_class("Image", "Quality")
|
|
226
229
|
ImageBoundingboxFeature = ml_instance.feature_record_class("Image", "BoundingBox")
|
|
@@ -228,24 +231,12 @@ def create_demo_features(ml_instance: DerivaML) -> None:
|
|
|
228
231
|
|
|
229
232
|
# Get the workflow for this notebook
|
|
230
233
|
|
|
231
|
-
ml_instance.
|
|
232
|
-
|
|
233
|
-
"Feature Notebook Workflow",
|
|
234
|
-
description="A Workflow that uses Deriva ML API",
|
|
235
|
-
)
|
|
236
|
-
ml_instance.add_term(MLVocab.asset_type, "API_Model", description="Model for our Notebook workflow")
|
|
237
|
-
notebook_workflow = ml_instance.create_workflow(name="API Workflow", workflow_type="Feature Notebook Workflow")
|
|
238
|
-
|
|
239
|
-
feature_execution = ml_instance.create_execution(
|
|
240
|
-
ExecutionConfiguration(workflow=notebook_workflow, description="Our Sample Workflow instance")
|
|
241
|
-
)
|
|
242
|
-
|
|
243
|
-
subject_rids = [i["RID"] for i in ml_instance.domain_path.tables["Subject"].entities().fetch()]
|
|
244
|
-
image_rids = [i["RID"] for i in ml_instance.domain_path.tables["Image"].entities().fetch()]
|
|
234
|
+
subject_rids = [i["RID"] for i in ml_instance.domain_path().tables["Subject"].entities().fetch()]
|
|
235
|
+
image_rids = [i["RID"] for i in ml_instance.domain_path().tables["Image"].entities().fetch()]
|
|
245
236
|
_subject_feature_list = [
|
|
246
237
|
SubjectWellnessFeature(
|
|
247
238
|
Subject=subject_rid,
|
|
248
|
-
Execution=
|
|
239
|
+
Execution=execution.execution_rid,
|
|
249
240
|
SubjectHealth=choice(["Well", "Sick"]),
|
|
250
241
|
Scale=randint(1, 10),
|
|
251
242
|
)
|
|
@@ -255,7 +246,7 @@ def create_demo_features(ml_instance: DerivaML) -> None:
|
|
|
255
246
|
# Create a new set of images. For fun, lets wrap this in an execution so we get status updates
|
|
256
247
|
bounding_box_files = []
|
|
257
248
|
for i in range(10):
|
|
258
|
-
bounding_box_file =
|
|
249
|
+
bounding_box_file = execution.asset_file_path("BoundingBox", f"box{i}.txt")
|
|
259
250
|
with bounding_box_file.open("w") as fp:
|
|
260
251
|
fp.write(f"Hi there {i}")
|
|
261
252
|
bounding_box_files.append(bounding_box_file)
|
|
@@ -285,12 +276,9 @@ def create_demo_features(ml_instance: DerivaML) -> None:
|
|
|
285
276
|
for subject_rid in subject_rids
|
|
286
277
|
]
|
|
287
278
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
execution.add_features(subject_feature_list)
|
|
292
|
-
|
|
293
|
-
feature_execution.upload_execution_outputs()
|
|
279
|
+
execution.add_features(image_bounding_box_feature_list)
|
|
280
|
+
execution.add_features(image_quality_feature_list)
|
|
281
|
+
execution.add_features(subject_feature_list)
|
|
294
282
|
|
|
295
283
|
|
|
296
284
|
def create_demo_files(ml_instance: DerivaML):
|
|
@@ -350,21 +338,25 @@ def create_domain_schema(catalog: ErmrestCatalog, sname: str) -> None:
|
|
|
350
338
|
else:
|
|
351
339
|
raise e
|
|
352
340
|
|
|
353
|
-
domain_schema = model.create_schema(
|
|
341
|
+
domain_schema = model.create_schema(
|
|
342
|
+
SchemaDef(name=sname, annotations={"name_style": {"underline_space": True}})
|
|
343
|
+
)
|
|
354
344
|
subject_table = domain_schema.create_table(
|
|
355
|
-
|
|
345
|
+
TableDef(name="Subject", columns=[ColumnDef("Name", BuiltinType.text)])
|
|
356
346
|
)
|
|
357
347
|
with TemporaryDirectory() as tmpdir:
|
|
358
348
|
ml_instance = DerivaML(hostname=catalog.deriva_server.server, catalog_id=catalog.catalog_id, working_dir=tmpdir)
|
|
349
|
+
# Use update_navbar=False since we call apply_catalog_annotations() explicitly at the end
|
|
359
350
|
ml_instance.create_asset(
|
|
360
351
|
"Image",
|
|
361
352
|
column_defs=[
|
|
362
|
-
|
|
363
|
-
|
|
353
|
+
ColumnDef("Acquisition_Time", BuiltinType.timestamp),
|
|
354
|
+
ColumnDef("Acquisition_Date", BuiltinType.date),
|
|
364
355
|
],
|
|
365
356
|
referenced_tables=[subject_table],
|
|
357
|
+
update_navbar=False,
|
|
366
358
|
)
|
|
367
|
-
|
|
359
|
+
ml_instance.apply_catalog_annotations()
|
|
368
360
|
|
|
369
361
|
|
|
370
362
|
def destroy_demo_catalog(catalog):
|
|
@@ -395,27 +387,51 @@ def create_demo_catalog(
|
|
|
395
387
|
|
|
396
388
|
try:
|
|
397
389
|
with TemporaryDirectory() as tmpdir:
|
|
398
|
-
|
|
390
|
+
try:
|
|
391
|
+
subprocess.run(
|
|
392
|
+
"git clone https://github.com/informatics-isi-edu/deriva-ml.git",
|
|
393
|
+
capture_output=True,
|
|
394
|
+
text=True,
|
|
395
|
+
shell=True,
|
|
396
|
+
check=True,
|
|
397
|
+
cwd=tmpdir,
|
|
398
|
+
)
|
|
399
|
+
except subprocess.CalledProcessError:
|
|
400
|
+
raise DerivaMLException("Cannot clone deriva-ml repo from GitHub.")
|
|
401
|
+
|
|
399
402
|
create_domain_schema(test_catalog, domain_schema)
|
|
400
403
|
|
|
401
|
-
ml_instance = DerivaML(
|
|
402
|
-
hostname,
|
|
403
|
-
catalog_id=test_catalog.catalog_id,
|
|
404
|
-
domain_schema=domain_schema,
|
|
405
|
-
working_dir=tmpdir,
|
|
406
|
-
logging_level=logging_level,
|
|
407
|
-
)
|
|
408
404
|
if populate or create_features or create_datasets:
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
405
|
+
ml_instance = DerivaML(
|
|
406
|
+
hostname,
|
|
407
|
+
catalog_id=test_catalog.catalog_id,
|
|
408
|
+
default_schema=domain_schema,
|
|
409
|
+
working_dir=tmpdir,
|
|
410
|
+
logging_level=logging_level,
|
|
411
|
+
)
|
|
412
|
+
ml_instance.add_term(
|
|
413
|
+
MLVocab.workflow_type,
|
|
414
|
+
"Demo Catalog Creation",
|
|
415
|
+
description="A Workflow that creates a new catalog and populates it with demo data.",
|
|
416
|
+
)
|
|
417
|
+
populate_workflow = ml_instance.create_workflow(
|
|
418
|
+
name="Demo Creation", workflow_type="Demo Catalog Creation"
|
|
419
|
+
)
|
|
420
|
+
execution = ml_instance.create_execution(
|
|
421
|
+
workflow=populate_workflow, configuration=ExecutionConfiguration()
|
|
422
|
+
)
|
|
423
|
+
with execution.execute() as exe:
|
|
424
|
+
populate_demo_catalog(exe)
|
|
425
|
+
if create_features:
|
|
426
|
+
create_demo_features(exe)
|
|
427
|
+
if create_datasets:
|
|
428
|
+
create_demo_datasets(exe)
|
|
429
|
+
execution.upload_execution_outputs()
|
|
414
430
|
|
|
415
|
-
except Exception:
|
|
431
|
+
except Exception as e:
|
|
416
432
|
# on failure, delete catalog and re-raise exception
|
|
417
433
|
test_catalog.delete_ermrest_catalog(really=True)
|
|
418
|
-
raise
|
|
434
|
+
raise e
|
|
419
435
|
return test_catalog
|
|
420
436
|
|
|
421
437
|
|
deriva_ml/execution/__init__.py
CHANGED
|
@@ -1,8 +1,30 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING
|
|
2
2
|
|
|
3
3
|
# Safe imports - no circular dependencies
|
|
4
|
-
from deriva_ml.execution.
|
|
4
|
+
from deriva_ml.execution.base_config import (
|
|
5
|
+
BaseConfig,
|
|
6
|
+
DerivaBaseConfig,
|
|
7
|
+
base_defaults,
|
|
8
|
+
get_notebook_configuration,
|
|
9
|
+
# New simplified API
|
|
10
|
+
notebook_config,
|
|
11
|
+
load_configs,
|
|
12
|
+
run_notebook,
|
|
13
|
+
# Config metadata helpers
|
|
14
|
+
DescribedList,
|
|
15
|
+
with_description,
|
|
16
|
+
)
|
|
17
|
+
from deriva_ml.execution.multirun_config import (
|
|
18
|
+
MultirunSpec,
|
|
19
|
+
multirun_config,
|
|
20
|
+
get_multirun_config,
|
|
21
|
+
list_multirun_configs,
|
|
22
|
+
get_all_multirun_configs,
|
|
23
|
+
)
|
|
24
|
+
from deriva_ml.execution.execution_configuration import AssetRID, ExecutionConfiguration
|
|
5
25
|
from deriva_ml.execution.workflow import Workflow
|
|
26
|
+
from deriva_ml.execution.runner import run_model, create_model_config, reset_multirun_state
|
|
27
|
+
from deriva_ml.execution.model_protocol import DerivaMLModel
|
|
6
28
|
|
|
7
29
|
if TYPE_CHECKING:
|
|
8
30
|
from deriva_ml.execution.execution import Execution
|
|
@@ -22,5 +44,27 @@ __all__ = [
|
|
|
22
44
|
"Execution", # Lazy-loaded
|
|
23
45
|
"ExecutionConfiguration",
|
|
24
46
|
"Workflow",
|
|
25
|
-
"
|
|
47
|
+
"AssetRID",
|
|
48
|
+
"run_model",
|
|
49
|
+
"create_model_config",
|
|
50
|
+
"reset_multirun_state",
|
|
51
|
+
"DerivaMLModel",
|
|
52
|
+
# Base configuration
|
|
53
|
+
"BaseConfig",
|
|
54
|
+
"DerivaBaseConfig",
|
|
55
|
+
"base_defaults",
|
|
56
|
+
"get_notebook_configuration",
|
|
57
|
+
# Simplified API
|
|
58
|
+
"notebook_config",
|
|
59
|
+
"load_configs",
|
|
60
|
+
"run_notebook",
|
|
61
|
+
# Config metadata helpers
|
|
62
|
+
"DescribedList",
|
|
63
|
+
"with_description",
|
|
64
|
+
# Multirun configuration
|
|
65
|
+
"MultirunSpec",
|
|
66
|
+
"multirun_config",
|
|
67
|
+
"get_multirun_config",
|
|
68
|
+
"list_multirun_configs",
|
|
69
|
+
"get_all_multirun_configs",
|
|
26
70
|
]
|