deriva-ml 1.17.9__py3-none-any.whl → 1.17.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. deriva_ml/__init__.py +43 -1
  2. deriva_ml/asset/__init__.py +17 -0
  3. deriva_ml/asset/asset.py +357 -0
  4. deriva_ml/asset/aux_classes.py +100 -0
  5. deriva_ml/bump_version.py +254 -11
  6. deriva_ml/catalog/__init__.py +21 -0
  7. deriva_ml/catalog/clone.py +1199 -0
  8. deriva_ml/catalog/localize.py +426 -0
  9. deriva_ml/core/__init__.py +29 -0
  10. deriva_ml/core/base.py +817 -1067
  11. deriva_ml/core/config.py +169 -21
  12. deriva_ml/core/constants.py +120 -19
  13. deriva_ml/core/definitions.py +123 -13
  14. deriva_ml/core/enums.py +47 -73
  15. deriva_ml/core/ermrest.py +226 -193
  16. deriva_ml/core/exceptions.py +297 -14
  17. deriva_ml/core/filespec.py +99 -28
  18. deriva_ml/core/logging_config.py +225 -0
  19. deriva_ml/core/mixins/__init__.py +42 -0
  20. deriva_ml/core/mixins/annotation.py +915 -0
  21. deriva_ml/core/mixins/asset.py +384 -0
  22. deriva_ml/core/mixins/dataset.py +237 -0
  23. deriva_ml/core/mixins/execution.py +408 -0
  24. deriva_ml/core/mixins/feature.py +365 -0
  25. deriva_ml/core/mixins/file.py +263 -0
  26. deriva_ml/core/mixins/path_builder.py +145 -0
  27. deriva_ml/core/mixins/rid_resolution.py +204 -0
  28. deriva_ml/core/mixins/vocabulary.py +400 -0
  29. deriva_ml/core/mixins/workflow.py +322 -0
  30. deriva_ml/core/validation.py +389 -0
  31. deriva_ml/dataset/__init__.py +2 -1
  32. deriva_ml/dataset/aux_classes.py +20 -4
  33. deriva_ml/dataset/catalog_graph.py +575 -0
  34. deriva_ml/dataset/dataset.py +1242 -1008
  35. deriva_ml/dataset/dataset_bag.py +1311 -182
  36. deriva_ml/dataset/history.py +27 -14
  37. deriva_ml/dataset/upload.py +225 -38
  38. deriva_ml/demo_catalog.py +186 -105
  39. deriva_ml/execution/__init__.py +46 -2
  40. deriva_ml/execution/base_config.py +639 -0
  41. deriva_ml/execution/execution.py +545 -244
  42. deriva_ml/execution/execution_configuration.py +26 -11
  43. deriva_ml/execution/execution_record.py +592 -0
  44. deriva_ml/execution/find_caller.py +298 -0
  45. deriva_ml/execution/model_protocol.py +175 -0
  46. deriva_ml/execution/multirun_config.py +153 -0
  47. deriva_ml/execution/runner.py +595 -0
  48. deriva_ml/execution/workflow.py +224 -35
  49. deriva_ml/experiment/__init__.py +8 -0
  50. deriva_ml/experiment/experiment.py +411 -0
  51. deriva_ml/feature.py +6 -1
  52. deriva_ml/install_kernel.py +143 -6
  53. deriva_ml/interfaces.py +862 -0
  54. deriva_ml/model/__init__.py +99 -0
  55. deriva_ml/model/annotations.py +1278 -0
  56. deriva_ml/model/catalog.py +286 -60
  57. deriva_ml/model/database.py +144 -649
  58. deriva_ml/model/deriva_ml_database.py +308 -0
  59. deriva_ml/model/handles.py +14 -0
  60. deriva_ml/run_model.py +319 -0
  61. deriva_ml/run_notebook.py +507 -38
  62. deriva_ml/schema/__init__.py +18 -2
  63. deriva_ml/schema/annotations.py +62 -33
  64. deriva_ml/schema/create_schema.py +169 -69
  65. deriva_ml/schema/validation.py +601 -0
  66. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -5
  67. deriva_ml-1.17.11.dist-info/RECORD +77 -0
  68. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
  69. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +2 -0
  70. deriva_ml/protocols/dataset.py +0 -19
  71. deriva_ml/test.py +0 -94
  72. deriva_ml-1.17.9.dist-info/RECORD +0 -45
  73. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
  74. {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
deriva_ml/demo_catalog.py CHANGED
@@ -1,9 +1,17 @@
1
+ # type: ignore[arg-type, call-arg]
2
+ """Demo catalog utilities for DerivaML testing and examples.
3
+
4
+ This module creates demo catalogs with sample data for testing. It uses
5
+ dynamically created Pydantic models for features, which cannot be statically
6
+ typed - hence the type ignore above.
7
+ """
1
8
  from __future__ import annotations
2
9
 
3
10
  import atexit
4
11
  import itertools
5
12
  import logging
6
13
  import string
14
+ import subprocess
7
15
  from collections.abc import Iterator, Sequence
8
16
  from datetime import datetime
9
17
  from numbers import Integral
@@ -11,25 +19,30 @@ from pathlib import Path
11
19
  from random import choice, randint, random
12
20
  from tempfile import TemporaryDirectory
13
21
 
14
- from deriva.core import ErmrestCatalog
15
- from deriva.core.ermrest_model import Column, Schema, Table, builtin_types
22
+ from deriva.core import BaseCLI, ErmrestCatalog
23
+ from deriva.core.ermrest_model import Schema, Table
24
+ from deriva.core.typed import BuiltinType, ColumnDef, SchemaDef, TableDef
16
25
  from pydantic import BaseModel, ConfigDict
17
26
  from requests.exceptions import HTTPError
18
27
 
19
- from deriva_ml import DerivaML, MLVocab
28
+ from deriva_ml import DerivaML, DerivaMLException, MLVocab
20
29
  from deriva_ml.core.definitions import RID, BuiltinTypes, ColumnDefinition
30
+ from deriva_ml.dataset import Dataset
21
31
  from deriva_ml.dataset.aux_classes import DatasetVersion
22
- from deriva_ml.execution.execution import Execution
23
- from deriva_ml.execution.execution_configuration import ExecutionConfiguration
32
+ from deriva_ml.execution.execution import Execution, ExecutionConfiguration
24
33
  from deriva_ml.schema import (
25
34
  create_ml_catalog,
26
35
  )
27
- from deriva_ml.schema.annotations import catalog_annotation
28
36
 
29
37
  try:
38
+ from pprint import pformat
39
+
30
40
  from icecream import ic
31
41
 
32
- ic.configureOutput(includeContext=True)
42
+ ic.configureOutput(
43
+ includeContext=True,
44
+ argToStringFunction=lambda x: pformat(x.model_dump() if hasattr(x, "model_dump") else x, width=80, depth=10),
45
+ )
33
46
  except ImportError: # Graceful fallback if IceCream isn't installed.
34
47
  ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
35
48
 
@@ -37,34 +50,24 @@ except ImportError: # Graceful fallback if IceCream isn't installed.
37
50
  TEST_DATASET_SIZE = 12
38
51
 
39
52
 
40
- def populate_demo_catalog(ml_instance: DerivaML) -> None:
53
+ def populate_demo_catalog(execution: Execution) -> None:
41
54
  # Delete any vocabularies and features.
42
- domain_schema = ml_instance.pathBuilder.schemas[ml_instance.domain_schema]
55
+ ml_instance = execution._ml_object
56
+ domain_schema = ml_instance.domain_path()
43
57
  subject = domain_schema.tables["Subject"]
44
58
  ss = subject.insert([{"Name": f"Thing{t + 1}"} for t in range(TEST_DATASET_SIZE)])
45
-
46
- ml_instance.add_term(
47
- MLVocab.workflow_type,
48
- "Demo Catalog Creation",
49
- description="A workflow demonstrating how to create a demo catalog.",
50
- )
51
- execution = ml_instance.create_execution(
52
- ExecutionConfiguration(
53
- workflow=ml_instance.create_workflow(name="Demo Catalog", workflow_type="Demo Catalog Creation")
59
+ for s in ss:
60
+ image_file = execution.asset_file_path(
61
+ "Image",
62
+ f"test_{s['RID']}.txt",
63
+ Subject=s["RID"],
64
+ Acquisition_Time=datetime.now(),
65
+ Acquisition_Date=datetime.now().date(),
54
66
  )
55
- )
56
- with execution.execute() as e:
57
- for s in ss:
58
- image_file = e.asset_file_path(
59
- "Image",
60
- f"test_{s['RID']}.txt",
61
- Subject=s["RID"],
62
- Acquisition_Time=datetime.now(),
63
- Acquisition_Date=datetime.now().date(),
64
- )
65
- with image_file.open("w") as f:
66
- f.write(f"Hello there {random()}\n")
67
- execution.upload_execution_outputs()
67
+ with image_file.open("w") as f:
68
+ f.write(f"Hello there {random()}\n")
69
+
70
+ execution.upload_execution_outputs()
68
71
 
69
72
 
70
73
  class DatasetDescription(BaseModel):
@@ -75,7 +78,7 @@ class DatasetDescription(BaseModel):
75
78
  ] # Either a list of nested dataset, or then number of elements to add
76
79
  member_rids: dict[str, list[RID]] = {} # The rids of the members of the dataset.
77
80
  version: DatasetVersion = DatasetVersion(1, 0, 0) # The initial version.
78
- rid: RID = None # RID of dataset that was created.
81
+ dataset: Dataset = None # RID of dataset that was created.
79
82
 
80
83
  model_config = ConfigDict(arbitrary_types_allowed=True)
81
84
 
@@ -89,7 +92,8 @@ def create_datasets(
89
92
  Create a dataset per `spec`, then add child members (either by slicing
90
93
  off pre-generated RIDs or by recursing on nested specs).
91
94
  """
92
- dataset_rid = client.create_dataset(
95
+ # Create unpinned dataset.
96
+ dataset = client.create_dataset(
93
97
  dataset_types=spec.types,
94
98
  description=spec.description,
95
99
  version=spec.version,
@@ -99,9 +103,10 @@ def create_datasets(
99
103
  description=spec.description,
100
104
  members={},
101
105
  types=spec.types,
102
- rid=dataset_rid,
106
+ dataset=dataset,
103
107
  version=spec.version,
104
108
  )
109
+
105
110
  dataset_rids = {}
106
111
  for member_type, value in spec.members.items():
107
112
  if isinstance(value, Sequence) and not isinstance(value, (str, bytes)):
@@ -110,7 +115,7 @@ def create_datasets(
110
115
  for child_spec in nested_specs:
111
116
  child_ds = create_datasets(client, child_spec, member_rids)
112
117
  result_spec.members.setdefault(member_type, []).append(child_ds)
113
- rids.append(child_ds.rid)
118
+ rids.append(child_ds.dataset.dataset_rid)
114
119
  elif isinstance(value, Integral):
115
120
  count = int(value)
116
121
  # take exactly `count` RIDs (or an empty list if count <= 0)
@@ -126,7 +131,7 @@ def create_datasets(
126
131
  if rids:
127
132
  dataset_rids[member_type] = rids
128
133
  result_spec.member_rids.setdefault(member_type, []).extend(rids)
129
- client.add_dataset_members(dataset_rid, dataset_rids, description="Added by create_datasets")
134
+ dataset.add_dataset_members(dataset_rids, description="Added by create_datasets")
130
135
 
131
136
  return result_spec
132
137
 
@@ -141,7 +146,7 @@ def dataset_spec() -> DatasetDescription:
141
146
  training_dataset = DatasetDescription(
142
147
  description="A dataset that is nested",
143
148
  members={"Dataset": [dataset, dataset], "Image": 2},
144
- types=["Testing"],
149
+ types=["Training"],
145
150
  )
146
151
 
147
152
  testing_dataset = DatasetDescription(
@@ -158,39 +163,37 @@ def dataset_spec() -> DatasetDescription:
158
163
  return double_nested_dataset
159
164
 
160
165
 
161
- def create_demo_datasets(ml_instance: DerivaML) -> DatasetDescription:
166
+ def create_demo_datasets(execution: Execution) -> DatasetDescription:
162
167
  """Create datasets from a populated catalog."""
168
+ ml_instance = execution._ml_object
163
169
  ml_instance.add_dataset_element_type("Subject")
164
170
  ml_instance.add_dataset_element_type("Image")
165
171
 
166
- _type_rid = ml_instance.add_term("Dataset_Type", "Complete", synonyms=["Whole"], description="A test")
167
- _training_rid = ml_instance.add_term("Dataset_Type", "Training", synonyms=["Train"], description="A training set")
168
- _testing_rid = ml_instance.add_term("Dataset_Type", "Testing", description="A testing set")
172
+ _type_rid = ml_instance.add_term(
173
+ "Dataset_Type", "Complete", synonyms=["Whole", "complete", "whole"], description="A test"
174
+ )
175
+ _training_rid = ml_instance.add_term(
176
+ "Dataset_Type", "Training", synonyms=["Train", "train", "training"], description="A training set"
177
+ )
178
+ _testing_rid = ml_instance.add_term(
179
+ "Dataset_Type", "Testing", synonyms=["Test", "test", "testing"], description="A testing set"
180
+ )
169
181
 
170
- table_path = ml_instance.catalog.getPathBuilder().schemas[ml_instance.domain_schema].tables["Subject"]
182
+ table_path = ml_instance.domain_path().tables["Subject"]
171
183
  subject_rids = [i["RID"] for i in table_path.entities().fetch()]
172
- table_path = ml_instance.catalog.getPathBuilder().schemas[ml_instance.domain_schema].tables["Image"]
173
- image_rids = [i["RID"] for i in table_path.entities().fetch()]
174
184
 
175
- ml_instance.add_term(
176
- MLVocab.workflow_type,
177
- "Create Dataset Workflow",
178
- description="A Workflow that creates a new dataset.",
179
- )
180
- dataset_workflow = ml_instance.create_workflow(name="API Workflow", workflow_type="Create Dataset Workflow")
181
-
182
- dataset_execution = ml_instance.create_execution(
183
- ExecutionConfiguration(workflow=dataset_workflow, description="Create Dataset")
184
- )
185
+ table_path = ml_instance.domain_path().tables["Image"]
186
+ image_rids = [i["RID"] for i in table_path.entities().fetch()]
185
187
 
186
- with dataset_execution.execute() as exe:
187
- spec = dataset_spec()
188
- dataset = create_datasets(exe, spec, {"Subject": iter(subject_rids), "Image": iter(image_rids)})
188
+ spec = dataset_spec()
189
+ dataset = create_datasets(execution, spec, {"Subject": iter(subject_rids), "Image": iter(image_rids)})
189
190
  return dataset
190
191
 
191
192
 
192
- def create_demo_features(ml_instance: DerivaML) -> None:
193
- ml_instance.create_vocabulary("SubjectHealth", "A vocab")
193
+ def create_demo_features(execution: Execution) -> None:
194
+ ml_instance = execution._ml_object
195
+ # Use update_navbar=False for batch creation, then call apply_catalog_annotations() once at the end
196
+ ml_instance.create_vocabulary("SubjectHealth", "A vocab", update_navbar=False)
194
197
  ml_instance.add_term(
195
198
  "SubjectHealth",
196
199
  "Sick",
@@ -201,10 +204,12 @@ def create_demo_features(ml_instance: DerivaML) -> None:
201
204
  "Well",
202
205
  description="The subject self reports that they feel well",
203
206
  )
204
- ml_instance.create_vocabulary("ImageQuality", "Controlled vocabulary for image quality")
207
+ ml_instance.create_vocabulary("ImageQuality", "Controlled vocabulary for image quality", update_navbar=False)
205
208
  ml_instance.add_term("ImageQuality", "Good", description="The image is good")
206
209
  ml_instance.add_term("ImageQuality", "Bad", description="The image is bad")
207
- box_asset = ml_instance.create_asset("BoundingBox", comment="A file that contains a cropped version of a image")
210
+ box_asset = ml_instance.create_asset(
211
+ "BoundingBox", comment="A file that contains a cropped version of a image", update_navbar=False
212
+ )
208
213
 
209
214
  ml_instance.create_feature(
210
215
  "Subject",
@@ -212,9 +217,13 @@ def create_demo_features(ml_instance: DerivaML) -> None:
212
217
  terms=["SubjectHealth"],
213
218
  metadata=[ColumnDefinition(name="Scale", type=BuiltinTypes.int2, nullok=True)],
214
219
  optional=["Scale"],
220
+ update_navbar=False,
215
221
  )
216
- ml_instance.create_feature("Image", "BoundingBox", assets=[box_asset])
217
- ml_instance.create_feature("Image", "Quality", terms=["ImageQuality"])
222
+ ml_instance.create_feature("Image", "BoundingBox", assets=[box_asset], update_navbar=False)
223
+ ml_instance.create_feature("Image", "Quality", terms=["ImageQuality"], update_navbar=False)
224
+
225
+ # Update navbar once after all tables are created
226
+ ml_instance.apply_catalog_annotations()
218
227
 
219
228
  ImageQualityFeature = ml_instance.feature_record_class("Image", "Quality")
220
229
  ImageBoundingboxFeature = ml_instance.feature_record_class("Image", "BoundingBox")
@@ -222,24 +231,12 @@ def create_demo_features(ml_instance: DerivaML) -> None:
222
231
 
223
232
  # Get the workflow for this notebook
224
233
 
225
- ml_instance.add_term(
226
- MLVocab.workflow_type,
227
- "Feature Notebook Workflow",
228
- description="A Workflow that uses Deriva ML API",
229
- )
230
- ml_instance.add_term(MLVocab.asset_type, "API_Model", description="Model for our Notebook workflow")
231
- notebook_workflow = ml_instance.create_workflow(name="API Workflow", workflow_type="Feature Notebook Workflow")
232
-
233
- feature_execution = ml_instance.create_execution(
234
- ExecutionConfiguration(workflow=notebook_workflow, description="Our Sample Workflow instance")
235
- )
236
-
237
- subject_rids = [i["RID"] for i in ml_instance.domain_path.tables["Subject"].entities().fetch()]
238
- image_rids = [i["RID"] for i in ml_instance.domain_path.tables["Image"].entities().fetch()]
234
+ subject_rids = [i["RID"] for i in ml_instance.domain_path().tables["Subject"].entities().fetch()]
235
+ image_rids = [i["RID"] for i in ml_instance.domain_path().tables["Image"].entities().fetch()]
239
236
  _subject_feature_list = [
240
237
  SubjectWellnessFeature(
241
238
  Subject=subject_rid,
242
- Execution=feature_execution.execution_rid,
239
+ Execution=execution.execution_rid,
243
240
  SubjectHealth=choice(["Well", "Sick"]),
244
241
  Scale=randint(1, 10),
245
242
  )
@@ -249,7 +246,7 @@ def create_demo_features(ml_instance: DerivaML) -> None:
249
246
  # Create a new set of images. For fun, lets wrap this in an execution so we get status updates
250
247
  bounding_box_files = []
251
248
  for i in range(10):
252
- bounding_box_file = feature_execution.asset_file_path("BoundingBox", f"box{i}.txt")
249
+ bounding_box_file = execution.asset_file_path("BoundingBox", f"box{i}.txt")
253
250
  with bounding_box_file.open("w") as fp:
254
251
  fp.write(f"Hi there {i}")
255
252
  bounding_box_files.append(bounding_box_file)
@@ -279,12 +276,9 @@ def create_demo_features(ml_instance: DerivaML) -> None:
279
276
  for subject_rid in subject_rids
280
277
  ]
281
278
 
282
- with feature_execution.execute() as execution:
283
- execution.add_features(image_bounding_box_feature_list)
284
- execution.add_features(image_quality_feature_list)
285
- execution.add_features(subject_feature_list)
286
-
287
- feature_execution.upload_execution_outputs()
279
+ execution.add_features(image_bounding_box_feature_list)
280
+ execution.add_features(image_quality_feature_list)
281
+ execution.add_features(subject_feature_list)
288
282
 
289
283
 
290
284
  def create_demo_files(ml_instance: DerivaML):
@@ -344,21 +338,25 @@ def create_domain_schema(catalog: ErmrestCatalog, sname: str) -> None:
344
338
  else:
345
339
  raise e
346
340
 
347
- domain_schema = model.create_schema(Schema.define(sname, annotations={"name_style": {"underline_space": True}}))
341
+ domain_schema = model.create_schema(
342
+ SchemaDef(name=sname, annotations={"name_style": {"underline_space": True}})
343
+ )
348
344
  subject_table = domain_schema.create_table(
349
- Table.define("Subject", column_defs=[Column.define("Name", builtin_types.text)])
345
+ TableDef(name="Subject", columns=[ColumnDef("Name", BuiltinType.text)])
350
346
  )
351
347
  with TemporaryDirectory() as tmpdir:
352
348
  ml_instance = DerivaML(hostname=catalog.deriva_server.server, catalog_id=catalog.catalog_id, working_dir=tmpdir)
349
+ # Use update_navbar=False since we call apply_catalog_annotations() explicitly at the end
353
350
  ml_instance.create_asset(
354
351
  "Image",
355
352
  column_defs=[
356
- Column.define("Acquisition_Time", builtin_types.timestamp),
357
- Column.define("Acquisition_Date", builtin_types.date),
353
+ ColumnDef("Acquisition_Time", BuiltinType.timestamp),
354
+ ColumnDef("Acquisition_Date", BuiltinType.date),
358
355
  ],
359
356
  referenced_tables=[subject_table],
357
+ update_navbar=False,
360
358
  )
361
- catalog_annotation(ml_instance.model)
359
+ ml_instance.apply_catalog_annotations()
362
360
 
363
361
 
364
362
  def destroy_demo_catalog(catalog):
@@ -386,28 +384,54 @@ def create_demo_catalog(
386
384
  test_catalog = create_ml_catalog(hostname, project_name=project_name)
387
385
  if on_exit_delete:
388
386
  atexit.register(destroy_demo_catalog, test_catalog)
387
+
389
388
  try:
390
389
  with TemporaryDirectory() as tmpdir:
390
+ try:
391
+ subprocess.run(
392
+ "git clone https://github.com/informatics-isi-edu/deriva-ml.git",
393
+ capture_output=True,
394
+ text=True,
395
+ shell=True,
396
+ check=True,
397
+ cwd=tmpdir,
398
+ )
399
+ except subprocess.CalledProcessError:
400
+ raise DerivaMLException("Cannot clone deriva-ml repo from GitHub.")
401
+
391
402
  create_domain_schema(test_catalog, domain_schema)
392
- ml_instance = DerivaML(
393
- hostname,
394
- catalog_id=test_catalog.catalog_id,
395
- domain_schema=domain_schema,
396
- working_dir=tmpdir,
397
- logging_level=logging_level,
398
- )
399
403
 
400
404
  if populate or create_features or create_datasets:
401
- populate_demo_catalog(ml_instance)
402
- if create_features:
403
- create_demo_features(ml_instance)
404
- if create_datasets:
405
- create_demo_datasets(ml_instance)
406
-
407
- except Exception:
405
+ ml_instance = DerivaML(
406
+ hostname,
407
+ catalog_id=test_catalog.catalog_id,
408
+ default_schema=domain_schema,
409
+ working_dir=tmpdir,
410
+ logging_level=logging_level,
411
+ )
412
+ ml_instance.add_term(
413
+ MLVocab.workflow_type,
414
+ "Demo Catalog Creation",
415
+ description="A Workflow that creates a new catalog and populates it with demo data.",
416
+ )
417
+ populate_workflow = ml_instance.create_workflow(
418
+ name="Demo Creation", workflow_type="Demo Catalog Creation"
419
+ )
420
+ execution = ml_instance.create_execution(
421
+ workflow=populate_workflow, configuration=ExecutionConfiguration()
422
+ )
423
+ with execution.execute() as exe:
424
+ populate_demo_catalog(exe)
425
+ if create_features:
426
+ create_demo_features(exe)
427
+ if create_datasets:
428
+ create_demo_datasets(exe)
429
+ execution.upload_execution_outputs()
430
+
431
+ except Exception as e:
408
432
  # on failure, delete catalog and re-raise exception
409
433
  test_catalog.delete_ermrest_catalog(really=True)
410
- raise
434
+ raise e
411
435
  return test_catalog
412
436
 
413
437
 
@@ -428,3 +452,60 @@ class DemoML(DerivaML):
428
452
  working_dir=working_dir,
429
453
  use_minid=use_minid,
430
454
  )
455
+
456
+
457
+ class DerivaMLDemoCatalogCLI(BaseCLI):
458
+ """Main class to part command line arguments and call model"""
459
+
460
+ def __init__(self, description, epilog, **kwargs):
461
+ BaseCLI.__init__(self, description, epilog, **kwargs)
462
+ # Optional domain schema name for the demo catalog. Defaults to None if not provided.
463
+ self.parser.add_argument(
464
+ "--domain_schema",
465
+ type=str,
466
+ default="demo-schema",
467
+ help="Name of the domain schema to create/use for the demo catalog (default: demo-schema).",
468
+ )
469
+
470
+ @staticmethod
471
+ def _coerce_number(val: str):
472
+ """
473
+ Try to convert a string to int, then float; otherwise return str.
474
+ """
475
+ try:
476
+ return int(val)
477
+ except ValueError:
478
+ try:
479
+ return float(val)
480
+ except ValueError:
481
+ return val
482
+
483
+ def main(self) -> ErmrestCatalog:
484
+ """Parse arguments and set up execution environment."""
485
+ args = self.parse_cli()
486
+ if not args.host:
487
+ raise ValueError("Host must be specified.")
488
+ demo_catalog = create_demo_catalog(args.host, args.domain_schema)
489
+ return demo_catalog
490
+
491
+
492
+ def main() -> None:
493
+ """Main entry point for the notebook runner CLI.
494
+
495
+ Creates and runs the DerivaMLRunNotebookCLI instance.
496
+
497
+ Returns:
498
+ None. Executes the CLI.
499
+ """
500
+ cli = DerivaMLDemoCatalogCLI(description="Create a Deriva ML Sample Catalog", epilog="")
501
+ catalog = cli.main()
502
+ print("Created catalog: {}".format(catalog._server_uri))
503
+
504
+
505
+ if __name__ == "__main__":
506
+ try:
507
+ main()
508
+ except Exception as e:
509
+ print("Error creating catalog:")
510
+ print(e)
511
+ exit(1)
@@ -1,8 +1,30 @@
1
1
  from typing import TYPE_CHECKING
2
2
 
3
3
  # Safe imports - no circular dependencies
4
- from deriva_ml.execution.execution_configuration import AssetRIDConfig, ExecutionConfiguration
4
+ from deriva_ml.execution.base_config import (
5
+ BaseConfig,
6
+ DerivaBaseConfig,
7
+ base_defaults,
8
+ get_notebook_configuration,
9
+ # New simplified API
10
+ notebook_config,
11
+ load_configs,
12
+ run_notebook,
13
+ # Config metadata helpers
14
+ DescribedList,
15
+ with_description,
16
+ )
17
+ from deriva_ml.execution.multirun_config import (
18
+ MultirunSpec,
19
+ multirun_config,
20
+ get_multirun_config,
21
+ list_multirun_configs,
22
+ get_all_multirun_configs,
23
+ )
24
+ from deriva_ml.execution.execution_configuration import AssetRID, ExecutionConfiguration
5
25
  from deriva_ml.execution.workflow import Workflow
26
+ from deriva_ml.execution.runner import run_model, create_model_config, reset_multirun_state
27
+ from deriva_ml.execution.model_protocol import DerivaMLModel
6
28
 
7
29
  if TYPE_CHECKING:
8
30
  from deriva_ml.execution.execution import Execution
@@ -22,5 +44,27 @@ __all__ = [
22
44
  "Execution", # Lazy-loaded
23
45
  "ExecutionConfiguration",
24
46
  "Workflow",
25
- "AssetRIDConfig",
47
+ "AssetRID",
48
+ "run_model",
49
+ "create_model_config",
50
+ "reset_multirun_state",
51
+ "DerivaMLModel",
52
+ # Base configuration
53
+ "BaseConfig",
54
+ "DerivaBaseConfig",
55
+ "base_defaults",
56
+ "get_notebook_configuration",
57
+ # Simplified API
58
+ "notebook_config",
59
+ "load_configs",
60
+ "run_notebook",
61
+ # Config metadata helpers
62
+ "DescribedList",
63
+ "with_description",
64
+ # Multirun configuration
65
+ "MultirunSpec",
66
+ "multirun_config",
67
+ "get_multirun_config",
68
+ "list_multirun_configs",
69
+ "get_all_multirun_configs",
26
70
  ]