deriva-ml 1.17.9__py3-none-any.whl → 1.17.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +43 -1
- deriva_ml/asset/__init__.py +17 -0
- deriva_ml/asset/asset.py +357 -0
- deriva_ml/asset/aux_classes.py +100 -0
- deriva_ml/bump_version.py +254 -11
- deriva_ml/catalog/__init__.py +21 -0
- deriva_ml/catalog/clone.py +1199 -0
- deriva_ml/catalog/localize.py +426 -0
- deriva_ml/core/__init__.py +29 -0
- deriva_ml/core/base.py +817 -1067
- deriva_ml/core/config.py +169 -21
- deriva_ml/core/constants.py +120 -19
- deriva_ml/core/definitions.py +123 -13
- deriva_ml/core/enums.py +47 -73
- deriva_ml/core/ermrest.py +226 -193
- deriva_ml/core/exceptions.py +297 -14
- deriva_ml/core/filespec.py +99 -28
- deriva_ml/core/logging_config.py +225 -0
- deriva_ml/core/mixins/__init__.py +42 -0
- deriva_ml/core/mixins/annotation.py +915 -0
- deriva_ml/core/mixins/asset.py +384 -0
- deriva_ml/core/mixins/dataset.py +237 -0
- deriva_ml/core/mixins/execution.py +408 -0
- deriva_ml/core/mixins/feature.py +365 -0
- deriva_ml/core/mixins/file.py +263 -0
- deriva_ml/core/mixins/path_builder.py +145 -0
- deriva_ml/core/mixins/rid_resolution.py +204 -0
- deriva_ml/core/mixins/vocabulary.py +400 -0
- deriva_ml/core/mixins/workflow.py +322 -0
- deriva_ml/core/validation.py +389 -0
- deriva_ml/dataset/__init__.py +2 -1
- deriva_ml/dataset/aux_classes.py +20 -4
- deriva_ml/dataset/catalog_graph.py +575 -0
- deriva_ml/dataset/dataset.py +1242 -1008
- deriva_ml/dataset/dataset_bag.py +1311 -182
- deriva_ml/dataset/history.py +27 -14
- deriva_ml/dataset/upload.py +225 -38
- deriva_ml/demo_catalog.py +186 -105
- deriva_ml/execution/__init__.py +46 -2
- deriva_ml/execution/base_config.py +639 -0
- deriva_ml/execution/execution.py +545 -244
- deriva_ml/execution/execution_configuration.py +26 -11
- deriva_ml/execution/execution_record.py +592 -0
- deriva_ml/execution/find_caller.py +298 -0
- deriva_ml/execution/model_protocol.py +175 -0
- deriva_ml/execution/multirun_config.py +153 -0
- deriva_ml/execution/runner.py +595 -0
- deriva_ml/execution/workflow.py +224 -35
- deriva_ml/experiment/__init__.py +8 -0
- deriva_ml/experiment/experiment.py +411 -0
- deriva_ml/feature.py +6 -1
- deriva_ml/install_kernel.py +143 -6
- deriva_ml/interfaces.py +862 -0
- deriva_ml/model/__init__.py +99 -0
- deriva_ml/model/annotations.py +1278 -0
- deriva_ml/model/catalog.py +286 -60
- deriva_ml/model/database.py +144 -649
- deriva_ml/model/deriva_ml_database.py +308 -0
- deriva_ml/model/handles.py +14 -0
- deriva_ml/run_model.py +319 -0
- deriva_ml/run_notebook.py +507 -38
- deriva_ml/schema/__init__.py +18 -2
- deriva_ml/schema/annotations.py +62 -33
- deriva_ml/schema/create_schema.py +169 -69
- deriva_ml/schema/validation.py +601 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -5
- deriva_ml-1.17.11.dist-info/RECORD +77 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +2 -0
- deriva_ml/protocols/dataset.py +0 -19
- deriva_ml/test.py +0 -94
- deriva_ml-1.17.9.dist-info/RECORD +0 -45
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
deriva_ml/demo_catalog.py
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
|
+
# type: ignore[arg-type, call-arg]
|
|
2
|
+
"""Demo catalog utilities for DerivaML testing and examples.
|
|
3
|
+
|
|
4
|
+
This module creates demo catalogs with sample data for testing. It uses
|
|
5
|
+
dynamically created Pydantic models for features, which cannot be statically
|
|
6
|
+
typed - hence the type ignore above.
|
|
7
|
+
"""
|
|
1
8
|
from __future__ import annotations
|
|
2
9
|
|
|
3
10
|
import atexit
|
|
4
11
|
import itertools
|
|
5
12
|
import logging
|
|
6
13
|
import string
|
|
14
|
+
import subprocess
|
|
7
15
|
from collections.abc import Iterator, Sequence
|
|
8
16
|
from datetime import datetime
|
|
9
17
|
from numbers import Integral
|
|
@@ -11,25 +19,30 @@ from pathlib import Path
|
|
|
11
19
|
from random import choice, randint, random
|
|
12
20
|
from tempfile import TemporaryDirectory
|
|
13
21
|
|
|
14
|
-
from deriva.core import ErmrestCatalog
|
|
15
|
-
from deriva.core.ermrest_model import
|
|
22
|
+
from deriva.core import BaseCLI, ErmrestCatalog
|
|
23
|
+
from deriva.core.ermrest_model import Schema, Table
|
|
24
|
+
from deriva.core.typed import BuiltinType, ColumnDef, SchemaDef, TableDef
|
|
16
25
|
from pydantic import BaseModel, ConfigDict
|
|
17
26
|
from requests.exceptions import HTTPError
|
|
18
27
|
|
|
19
|
-
from deriva_ml import DerivaML, MLVocab
|
|
28
|
+
from deriva_ml import DerivaML, DerivaMLException, MLVocab
|
|
20
29
|
from deriva_ml.core.definitions import RID, BuiltinTypes, ColumnDefinition
|
|
30
|
+
from deriva_ml.dataset import Dataset
|
|
21
31
|
from deriva_ml.dataset.aux_classes import DatasetVersion
|
|
22
|
-
from deriva_ml.execution.execution import Execution
|
|
23
|
-
from deriva_ml.execution.execution_configuration import ExecutionConfiguration
|
|
32
|
+
from deriva_ml.execution.execution import Execution, ExecutionConfiguration
|
|
24
33
|
from deriva_ml.schema import (
|
|
25
34
|
create_ml_catalog,
|
|
26
35
|
)
|
|
27
|
-
from deriva_ml.schema.annotations import catalog_annotation
|
|
28
36
|
|
|
29
37
|
try:
|
|
38
|
+
from pprint import pformat
|
|
39
|
+
|
|
30
40
|
from icecream import ic
|
|
31
41
|
|
|
32
|
-
ic.configureOutput(
|
|
42
|
+
ic.configureOutput(
|
|
43
|
+
includeContext=True,
|
|
44
|
+
argToStringFunction=lambda x: pformat(x.model_dump() if hasattr(x, "model_dump") else x, width=80, depth=10),
|
|
45
|
+
)
|
|
33
46
|
except ImportError: # Graceful fallback if IceCream isn't installed.
|
|
34
47
|
ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
|
|
35
48
|
|
|
@@ -37,34 +50,24 @@ except ImportError: # Graceful fallback if IceCream isn't installed.
|
|
|
37
50
|
TEST_DATASET_SIZE = 12
|
|
38
51
|
|
|
39
52
|
|
|
40
|
-
def populate_demo_catalog(
|
|
53
|
+
def populate_demo_catalog(execution: Execution) -> None:
|
|
41
54
|
# Delete any vocabularies and features.
|
|
42
|
-
|
|
55
|
+
ml_instance = execution._ml_object
|
|
56
|
+
domain_schema = ml_instance.domain_path()
|
|
43
57
|
subject = domain_schema.tables["Subject"]
|
|
44
58
|
ss = subject.insert([{"Name": f"Thing{t + 1}"} for t in range(TEST_DATASET_SIZE)])
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
ExecutionConfiguration(
|
|
53
|
-
workflow=ml_instance.create_workflow(name="Demo Catalog", workflow_type="Demo Catalog Creation")
|
|
59
|
+
for s in ss:
|
|
60
|
+
image_file = execution.asset_file_path(
|
|
61
|
+
"Image",
|
|
62
|
+
f"test_{s['RID']}.txt",
|
|
63
|
+
Subject=s["RID"],
|
|
64
|
+
Acquisition_Time=datetime.now(),
|
|
65
|
+
Acquisition_Date=datetime.now().date(),
|
|
54
66
|
)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
"Image",
|
|
60
|
-
f"test_{s['RID']}.txt",
|
|
61
|
-
Subject=s["RID"],
|
|
62
|
-
Acquisition_Time=datetime.now(),
|
|
63
|
-
Acquisition_Date=datetime.now().date(),
|
|
64
|
-
)
|
|
65
|
-
with image_file.open("w") as f:
|
|
66
|
-
f.write(f"Hello there {random()}\n")
|
|
67
|
-
execution.upload_execution_outputs()
|
|
67
|
+
with image_file.open("w") as f:
|
|
68
|
+
f.write(f"Hello there {random()}\n")
|
|
69
|
+
|
|
70
|
+
execution.upload_execution_outputs()
|
|
68
71
|
|
|
69
72
|
|
|
70
73
|
class DatasetDescription(BaseModel):
|
|
@@ -75,7 +78,7 @@ class DatasetDescription(BaseModel):
|
|
|
75
78
|
] # Either a list of nested dataset, or then number of elements to add
|
|
76
79
|
member_rids: dict[str, list[RID]] = {} # The rids of the members of the dataset.
|
|
77
80
|
version: DatasetVersion = DatasetVersion(1, 0, 0) # The initial version.
|
|
78
|
-
|
|
81
|
+
dataset: Dataset = None # RID of dataset that was created.
|
|
79
82
|
|
|
80
83
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
81
84
|
|
|
@@ -89,7 +92,8 @@ def create_datasets(
|
|
|
89
92
|
Create a dataset per `spec`, then add child members (either by slicing
|
|
90
93
|
off pre-generated RIDs or by recursing on nested specs).
|
|
91
94
|
"""
|
|
92
|
-
|
|
95
|
+
# Create unpinned dataset.
|
|
96
|
+
dataset = client.create_dataset(
|
|
93
97
|
dataset_types=spec.types,
|
|
94
98
|
description=spec.description,
|
|
95
99
|
version=spec.version,
|
|
@@ -99,9 +103,10 @@ def create_datasets(
|
|
|
99
103
|
description=spec.description,
|
|
100
104
|
members={},
|
|
101
105
|
types=spec.types,
|
|
102
|
-
|
|
106
|
+
dataset=dataset,
|
|
103
107
|
version=spec.version,
|
|
104
108
|
)
|
|
109
|
+
|
|
105
110
|
dataset_rids = {}
|
|
106
111
|
for member_type, value in spec.members.items():
|
|
107
112
|
if isinstance(value, Sequence) and not isinstance(value, (str, bytes)):
|
|
@@ -110,7 +115,7 @@ def create_datasets(
|
|
|
110
115
|
for child_spec in nested_specs:
|
|
111
116
|
child_ds = create_datasets(client, child_spec, member_rids)
|
|
112
117
|
result_spec.members.setdefault(member_type, []).append(child_ds)
|
|
113
|
-
rids.append(child_ds.
|
|
118
|
+
rids.append(child_ds.dataset.dataset_rid)
|
|
114
119
|
elif isinstance(value, Integral):
|
|
115
120
|
count = int(value)
|
|
116
121
|
# take exactly `count` RIDs (or an empty list if count <= 0)
|
|
@@ -126,7 +131,7 @@ def create_datasets(
|
|
|
126
131
|
if rids:
|
|
127
132
|
dataset_rids[member_type] = rids
|
|
128
133
|
result_spec.member_rids.setdefault(member_type, []).extend(rids)
|
|
129
|
-
|
|
134
|
+
dataset.add_dataset_members(dataset_rids, description="Added by create_datasets")
|
|
130
135
|
|
|
131
136
|
return result_spec
|
|
132
137
|
|
|
@@ -141,7 +146,7 @@ def dataset_spec() -> DatasetDescription:
|
|
|
141
146
|
training_dataset = DatasetDescription(
|
|
142
147
|
description="A dataset that is nested",
|
|
143
148
|
members={"Dataset": [dataset, dataset], "Image": 2},
|
|
144
|
-
types=["
|
|
149
|
+
types=["Training"],
|
|
145
150
|
)
|
|
146
151
|
|
|
147
152
|
testing_dataset = DatasetDescription(
|
|
@@ -158,39 +163,37 @@ def dataset_spec() -> DatasetDescription:
|
|
|
158
163
|
return double_nested_dataset
|
|
159
164
|
|
|
160
165
|
|
|
161
|
-
def create_demo_datasets(
|
|
166
|
+
def create_demo_datasets(execution: Execution) -> DatasetDescription:
|
|
162
167
|
"""Create datasets from a populated catalog."""
|
|
168
|
+
ml_instance = execution._ml_object
|
|
163
169
|
ml_instance.add_dataset_element_type("Subject")
|
|
164
170
|
ml_instance.add_dataset_element_type("Image")
|
|
165
171
|
|
|
166
|
-
_type_rid = ml_instance.add_term(
|
|
167
|
-
|
|
168
|
-
|
|
172
|
+
_type_rid = ml_instance.add_term(
|
|
173
|
+
"Dataset_Type", "Complete", synonyms=["Whole", "complete", "whole"], description="A test"
|
|
174
|
+
)
|
|
175
|
+
_training_rid = ml_instance.add_term(
|
|
176
|
+
"Dataset_Type", "Training", synonyms=["Train", "train", "training"], description="A training set"
|
|
177
|
+
)
|
|
178
|
+
_testing_rid = ml_instance.add_term(
|
|
179
|
+
"Dataset_Type", "Testing", synonyms=["Test", "test", "testing"], description="A testing set"
|
|
180
|
+
)
|
|
169
181
|
|
|
170
|
-
table_path = ml_instance.
|
|
182
|
+
table_path = ml_instance.domain_path().tables["Subject"]
|
|
171
183
|
subject_rids = [i["RID"] for i in table_path.entities().fetch()]
|
|
172
|
-
table_path = ml_instance.catalog.getPathBuilder().schemas[ml_instance.domain_schema].tables["Image"]
|
|
173
|
-
image_rids = [i["RID"] for i in table_path.entities().fetch()]
|
|
174
184
|
|
|
175
|
-
ml_instance.
|
|
176
|
-
|
|
177
|
-
"Create Dataset Workflow",
|
|
178
|
-
description="A Workflow that creates a new dataset.",
|
|
179
|
-
)
|
|
180
|
-
dataset_workflow = ml_instance.create_workflow(name="API Workflow", workflow_type="Create Dataset Workflow")
|
|
181
|
-
|
|
182
|
-
dataset_execution = ml_instance.create_execution(
|
|
183
|
-
ExecutionConfiguration(workflow=dataset_workflow, description="Create Dataset")
|
|
184
|
-
)
|
|
185
|
+
table_path = ml_instance.domain_path().tables["Image"]
|
|
186
|
+
image_rids = [i["RID"] for i in table_path.entities().fetch()]
|
|
185
187
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
dataset = create_datasets(exe, spec, {"Subject": iter(subject_rids), "Image": iter(image_rids)})
|
|
188
|
+
spec = dataset_spec()
|
|
189
|
+
dataset = create_datasets(execution, spec, {"Subject": iter(subject_rids), "Image": iter(image_rids)})
|
|
189
190
|
return dataset
|
|
190
191
|
|
|
191
192
|
|
|
192
|
-
def create_demo_features(
|
|
193
|
-
ml_instance
|
|
193
|
+
def create_demo_features(execution: Execution) -> None:
|
|
194
|
+
ml_instance = execution._ml_object
|
|
195
|
+
# Use update_navbar=False for batch creation, then call apply_catalog_annotations() once at the end
|
|
196
|
+
ml_instance.create_vocabulary("SubjectHealth", "A vocab", update_navbar=False)
|
|
194
197
|
ml_instance.add_term(
|
|
195
198
|
"SubjectHealth",
|
|
196
199
|
"Sick",
|
|
@@ -201,10 +204,12 @@ def create_demo_features(ml_instance: DerivaML) -> None:
|
|
|
201
204
|
"Well",
|
|
202
205
|
description="The subject self reports that they feel well",
|
|
203
206
|
)
|
|
204
|
-
ml_instance.create_vocabulary("ImageQuality", "Controlled vocabulary for image quality")
|
|
207
|
+
ml_instance.create_vocabulary("ImageQuality", "Controlled vocabulary for image quality", update_navbar=False)
|
|
205
208
|
ml_instance.add_term("ImageQuality", "Good", description="The image is good")
|
|
206
209
|
ml_instance.add_term("ImageQuality", "Bad", description="The image is bad")
|
|
207
|
-
box_asset = ml_instance.create_asset(
|
|
210
|
+
box_asset = ml_instance.create_asset(
|
|
211
|
+
"BoundingBox", comment="A file that contains a cropped version of a image", update_navbar=False
|
|
212
|
+
)
|
|
208
213
|
|
|
209
214
|
ml_instance.create_feature(
|
|
210
215
|
"Subject",
|
|
@@ -212,9 +217,13 @@ def create_demo_features(ml_instance: DerivaML) -> None:
|
|
|
212
217
|
terms=["SubjectHealth"],
|
|
213
218
|
metadata=[ColumnDefinition(name="Scale", type=BuiltinTypes.int2, nullok=True)],
|
|
214
219
|
optional=["Scale"],
|
|
220
|
+
update_navbar=False,
|
|
215
221
|
)
|
|
216
|
-
ml_instance.create_feature("Image", "BoundingBox", assets=[box_asset])
|
|
217
|
-
ml_instance.create_feature("Image", "Quality", terms=["ImageQuality"])
|
|
222
|
+
ml_instance.create_feature("Image", "BoundingBox", assets=[box_asset], update_navbar=False)
|
|
223
|
+
ml_instance.create_feature("Image", "Quality", terms=["ImageQuality"], update_navbar=False)
|
|
224
|
+
|
|
225
|
+
# Update navbar once after all tables are created
|
|
226
|
+
ml_instance.apply_catalog_annotations()
|
|
218
227
|
|
|
219
228
|
ImageQualityFeature = ml_instance.feature_record_class("Image", "Quality")
|
|
220
229
|
ImageBoundingboxFeature = ml_instance.feature_record_class("Image", "BoundingBox")
|
|
@@ -222,24 +231,12 @@ def create_demo_features(ml_instance: DerivaML) -> None:
|
|
|
222
231
|
|
|
223
232
|
# Get the workflow for this notebook
|
|
224
233
|
|
|
225
|
-
ml_instance.
|
|
226
|
-
|
|
227
|
-
"Feature Notebook Workflow",
|
|
228
|
-
description="A Workflow that uses Deriva ML API",
|
|
229
|
-
)
|
|
230
|
-
ml_instance.add_term(MLVocab.asset_type, "API_Model", description="Model for our Notebook workflow")
|
|
231
|
-
notebook_workflow = ml_instance.create_workflow(name="API Workflow", workflow_type="Feature Notebook Workflow")
|
|
232
|
-
|
|
233
|
-
feature_execution = ml_instance.create_execution(
|
|
234
|
-
ExecutionConfiguration(workflow=notebook_workflow, description="Our Sample Workflow instance")
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
subject_rids = [i["RID"] for i in ml_instance.domain_path.tables["Subject"].entities().fetch()]
|
|
238
|
-
image_rids = [i["RID"] for i in ml_instance.domain_path.tables["Image"].entities().fetch()]
|
|
234
|
+
subject_rids = [i["RID"] for i in ml_instance.domain_path().tables["Subject"].entities().fetch()]
|
|
235
|
+
image_rids = [i["RID"] for i in ml_instance.domain_path().tables["Image"].entities().fetch()]
|
|
239
236
|
_subject_feature_list = [
|
|
240
237
|
SubjectWellnessFeature(
|
|
241
238
|
Subject=subject_rid,
|
|
242
|
-
Execution=
|
|
239
|
+
Execution=execution.execution_rid,
|
|
243
240
|
SubjectHealth=choice(["Well", "Sick"]),
|
|
244
241
|
Scale=randint(1, 10),
|
|
245
242
|
)
|
|
@@ -249,7 +246,7 @@ def create_demo_features(ml_instance: DerivaML) -> None:
|
|
|
249
246
|
# Create a new set of images. For fun, lets wrap this in an execution so we get status updates
|
|
250
247
|
bounding_box_files = []
|
|
251
248
|
for i in range(10):
|
|
252
|
-
bounding_box_file =
|
|
249
|
+
bounding_box_file = execution.asset_file_path("BoundingBox", f"box{i}.txt")
|
|
253
250
|
with bounding_box_file.open("w") as fp:
|
|
254
251
|
fp.write(f"Hi there {i}")
|
|
255
252
|
bounding_box_files.append(bounding_box_file)
|
|
@@ -279,12 +276,9 @@ def create_demo_features(ml_instance: DerivaML) -> None:
|
|
|
279
276
|
for subject_rid in subject_rids
|
|
280
277
|
]
|
|
281
278
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
execution.add_features(subject_feature_list)
|
|
286
|
-
|
|
287
|
-
feature_execution.upload_execution_outputs()
|
|
279
|
+
execution.add_features(image_bounding_box_feature_list)
|
|
280
|
+
execution.add_features(image_quality_feature_list)
|
|
281
|
+
execution.add_features(subject_feature_list)
|
|
288
282
|
|
|
289
283
|
|
|
290
284
|
def create_demo_files(ml_instance: DerivaML):
|
|
@@ -344,21 +338,25 @@ def create_domain_schema(catalog: ErmrestCatalog, sname: str) -> None:
|
|
|
344
338
|
else:
|
|
345
339
|
raise e
|
|
346
340
|
|
|
347
|
-
domain_schema = model.create_schema(
|
|
341
|
+
domain_schema = model.create_schema(
|
|
342
|
+
SchemaDef(name=sname, annotations={"name_style": {"underline_space": True}})
|
|
343
|
+
)
|
|
348
344
|
subject_table = domain_schema.create_table(
|
|
349
|
-
|
|
345
|
+
TableDef(name="Subject", columns=[ColumnDef("Name", BuiltinType.text)])
|
|
350
346
|
)
|
|
351
347
|
with TemporaryDirectory() as tmpdir:
|
|
352
348
|
ml_instance = DerivaML(hostname=catalog.deriva_server.server, catalog_id=catalog.catalog_id, working_dir=tmpdir)
|
|
349
|
+
# Use update_navbar=False since we call apply_catalog_annotations() explicitly at the end
|
|
353
350
|
ml_instance.create_asset(
|
|
354
351
|
"Image",
|
|
355
352
|
column_defs=[
|
|
356
|
-
|
|
357
|
-
|
|
353
|
+
ColumnDef("Acquisition_Time", BuiltinType.timestamp),
|
|
354
|
+
ColumnDef("Acquisition_Date", BuiltinType.date),
|
|
358
355
|
],
|
|
359
356
|
referenced_tables=[subject_table],
|
|
357
|
+
update_navbar=False,
|
|
360
358
|
)
|
|
361
|
-
|
|
359
|
+
ml_instance.apply_catalog_annotations()
|
|
362
360
|
|
|
363
361
|
|
|
364
362
|
def destroy_demo_catalog(catalog):
|
|
@@ -386,28 +384,54 @@ def create_demo_catalog(
|
|
|
386
384
|
test_catalog = create_ml_catalog(hostname, project_name=project_name)
|
|
387
385
|
if on_exit_delete:
|
|
388
386
|
atexit.register(destroy_demo_catalog, test_catalog)
|
|
387
|
+
|
|
389
388
|
try:
|
|
390
389
|
with TemporaryDirectory() as tmpdir:
|
|
390
|
+
try:
|
|
391
|
+
subprocess.run(
|
|
392
|
+
"git clone https://github.com/informatics-isi-edu/deriva-ml.git",
|
|
393
|
+
capture_output=True,
|
|
394
|
+
text=True,
|
|
395
|
+
shell=True,
|
|
396
|
+
check=True,
|
|
397
|
+
cwd=tmpdir,
|
|
398
|
+
)
|
|
399
|
+
except subprocess.CalledProcessError:
|
|
400
|
+
raise DerivaMLException("Cannot clone deriva-ml repo from GitHub.")
|
|
401
|
+
|
|
391
402
|
create_domain_schema(test_catalog, domain_schema)
|
|
392
|
-
ml_instance = DerivaML(
|
|
393
|
-
hostname,
|
|
394
|
-
catalog_id=test_catalog.catalog_id,
|
|
395
|
-
domain_schema=domain_schema,
|
|
396
|
-
working_dir=tmpdir,
|
|
397
|
-
logging_level=logging_level,
|
|
398
|
-
)
|
|
399
403
|
|
|
400
404
|
if populate or create_features or create_datasets:
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
405
|
+
ml_instance = DerivaML(
|
|
406
|
+
hostname,
|
|
407
|
+
catalog_id=test_catalog.catalog_id,
|
|
408
|
+
default_schema=domain_schema,
|
|
409
|
+
working_dir=tmpdir,
|
|
410
|
+
logging_level=logging_level,
|
|
411
|
+
)
|
|
412
|
+
ml_instance.add_term(
|
|
413
|
+
MLVocab.workflow_type,
|
|
414
|
+
"Demo Catalog Creation",
|
|
415
|
+
description="A Workflow that creates a new catalog and populates it with demo data.",
|
|
416
|
+
)
|
|
417
|
+
populate_workflow = ml_instance.create_workflow(
|
|
418
|
+
name="Demo Creation", workflow_type="Demo Catalog Creation"
|
|
419
|
+
)
|
|
420
|
+
execution = ml_instance.create_execution(
|
|
421
|
+
workflow=populate_workflow, configuration=ExecutionConfiguration()
|
|
422
|
+
)
|
|
423
|
+
with execution.execute() as exe:
|
|
424
|
+
populate_demo_catalog(exe)
|
|
425
|
+
if create_features:
|
|
426
|
+
create_demo_features(exe)
|
|
427
|
+
if create_datasets:
|
|
428
|
+
create_demo_datasets(exe)
|
|
429
|
+
execution.upload_execution_outputs()
|
|
430
|
+
|
|
431
|
+
except Exception as e:
|
|
408
432
|
# on failure, delete catalog and re-raise exception
|
|
409
433
|
test_catalog.delete_ermrest_catalog(really=True)
|
|
410
|
-
raise
|
|
434
|
+
raise e
|
|
411
435
|
return test_catalog
|
|
412
436
|
|
|
413
437
|
|
|
@@ -428,3 +452,60 @@ class DemoML(DerivaML):
|
|
|
428
452
|
working_dir=working_dir,
|
|
429
453
|
use_minid=use_minid,
|
|
430
454
|
)
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
class DerivaMLDemoCatalogCLI(BaseCLI):
|
|
458
|
+
"""Main class to part command line arguments and call model"""
|
|
459
|
+
|
|
460
|
+
def __init__(self, description, epilog, **kwargs):
|
|
461
|
+
BaseCLI.__init__(self, description, epilog, **kwargs)
|
|
462
|
+
# Optional domain schema name for the demo catalog. Defaults to None if not provided.
|
|
463
|
+
self.parser.add_argument(
|
|
464
|
+
"--domain_schema",
|
|
465
|
+
type=str,
|
|
466
|
+
default="demo-schema",
|
|
467
|
+
help="Name of the domain schema to create/use for the demo catalog (default: demo-schema).",
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
@staticmethod
|
|
471
|
+
def _coerce_number(val: str):
|
|
472
|
+
"""
|
|
473
|
+
Try to convert a string to int, then float; otherwise return str.
|
|
474
|
+
"""
|
|
475
|
+
try:
|
|
476
|
+
return int(val)
|
|
477
|
+
except ValueError:
|
|
478
|
+
try:
|
|
479
|
+
return float(val)
|
|
480
|
+
except ValueError:
|
|
481
|
+
return val
|
|
482
|
+
|
|
483
|
+
def main(self) -> ErmrestCatalog:
|
|
484
|
+
"""Parse arguments and set up execution environment."""
|
|
485
|
+
args = self.parse_cli()
|
|
486
|
+
if not args.host:
|
|
487
|
+
raise ValueError("Host must be specified.")
|
|
488
|
+
demo_catalog = create_demo_catalog(args.host, args.domain_schema)
|
|
489
|
+
return demo_catalog
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def main() -> None:
|
|
493
|
+
"""Main entry point for the notebook runner CLI.
|
|
494
|
+
|
|
495
|
+
Creates and runs the DerivaMLRunNotebookCLI instance.
|
|
496
|
+
|
|
497
|
+
Returns:
|
|
498
|
+
None. Executes the CLI.
|
|
499
|
+
"""
|
|
500
|
+
cli = DerivaMLDemoCatalogCLI(description="Create a Deriva ML Sample Catalog", epilog="")
|
|
501
|
+
catalog = cli.main()
|
|
502
|
+
print("Created catalog: {}".format(catalog._server_uri))
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
if __name__ == "__main__":
|
|
506
|
+
try:
|
|
507
|
+
main()
|
|
508
|
+
except Exception as e:
|
|
509
|
+
print("Error creating catalog:")
|
|
510
|
+
print(e)
|
|
511
|
+
exit(1)
|
deriva_ml/execution/__init__.py
CHANGED
|
@@ -1,8 +1,30 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING
|
|
2
2
|
|
|
3
3
|
# Safe imports - no circular dependencies
|
|
4
|
-
from deriva_ml.execution.
|
|
4
|
+
from deriva_ml.execution.base_config import (
|
|
5
|
+
BaseConfig,
|
|
6
|
+
DerivaBaseConfig,
|
|
7
|
+
base_defaults,
|
|
8
|
+
get_notebook_configuration,
|
|
9
|
+
# New simplified API
|
|
10
|
+
notebook_config,
|
|
11
|
+
load_configs,
|
|
12
|
+
run_notebook,
|
|
13
|
+
# Config metadata helpers
|
|
14
|
+
DescribedList,
|
|
15
|
+
with_description,
|
|
16
|
+
)
|
|
17
|
+
from deriva_ml.execution.multirun_config import (
|
|
18
|
+
MultirunSpec,
|
|
19
|
+
multirun_config,
|
|
20
|
+
get_multirun_config,
|
|
21
|
+
list_multirun_configs,
|
|
22
|
+
get_all_multirun_configs,
|
|
23
|
+
)
|
|
24
|
+
from deriva_ml.execution.execution_configuration import AssetRID, ExecutionConfiguration
|
|
5
25
|
from deriva_ml.execution.workflow import Workflow
|
|
26
|
+
from deriva_ml.execution.runner import run_model, create_model_config, reset_multirun_state
|
|
27
|
+
from deriva_ml.execution.model_protocol import DerivaMLModel
|
|
6
28
|
|
|
7
29
|
if TYPE_CHECKING:
|
|
8
30
|
from deriva_ml.execution.execution import Execution
|
|
@@ -22,5 +44,27 @@ __all__ = [
|
|
|
22
44
|
"Execution", # Lazy-loaded
|
|
23
45
|
"ExecutionConfiguration",
|
|
24
46
|
"Workflow",
|
|
25
|
-
"
|
|
47
|
+
"AssetRID",
|
|
48
|
+
"run_model",
|
|
49
|
+
"create_model_config",
|
|
50
|
+
"reset_multirun_state",
|
|
51
|
+
"DerivaMLModel",
|
|
52
|
+
# Base configuration
|
|
53
|
+
"BaseConfig",
|
|
54
|
+
"DerivaBaseConfig",
|
|
55
|
+
"base_defaults",
|
|
56
|
+
"get_notebook_configuration",
|
|
57
|
+
# Simplified API
|
|
58
|
+
"notebook_config",
|
|
59
|
+
"load_configs",
|
|
60
|
+
"run_notebook",
|
|
61
|
+
# Config metadata helpers
|
|
62
|
+
"DescribedList",
|
|
63
|
+
"with_description",
|
|
64
|
+
# Multirun configuration
|
|
65
|
+
"MultirunSpec",
|
|
66
|
+
"multirun_config",
|
|
67
|
+
"get_multirun_config",
|
|
68
|
+
"list_multirun_configs",
|
|
69
|
+
"get_all_multirun_configs",
|
|
26
70
|
]
|