deriva-ml 1.13.3__py3-none-any.whl → 1.14.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. deriva_ml/__init__.py +25 -30
  2. deriva_ml/core/__init__.py +39 -0
  3. deriva_ml/core/base.py +1489 -0
  4. deriva_ml/core/constants.py +36 -0
  5. deriva_ml/core/definitions.py +74 -0
  6. deriva_ml/core/enums.py +222 -0
  7. deriva_ml/core/ermrest.py +288 -0
  8. deriva_ml/core/exceptions.py +28 -0
  9. deriva_ml/core/filespec.py +116 -0
  10. deriva_ml/dataset/__init__.py +4 -0
  11. deriva_ml/{dataset_aux_classes.py → dataset/aux_classes.py} +16 -12
  12. deriva_ml/{dataset.py → dataset/dataset.py} +408 -416
  13. deriva_ml/{dataset_bag.py → dataset/dataset_bag.py} +137 -97
  14. deriva_ml/{history.py → dataset/history.py} +52 -33
  15. deriva_ml/{upload.py → dataset/upload.py} +48 -70
  16. deriva_ml/demo_catalog.py +233 -183
  17. deriva_ml/execution/environment.py +290 -0
  18. deriva_ml/{execution.py → execution/execution.py} +365 -252
  19. deriva_ml/execution/execution_configuration.py +163 -0
  20. deriva_ml/{execution_configuration.py → execution/workflow.py} +206 -218
  21. deriva_ml/feature.py +83 -46
  22. deriva_ml/model/__init__.py +0 -0
  23. deriva_ml/{deriva_model.py → model/catalog.py} +113 -132
  24. deriva_ml/{database_model.py → model/database.py} +52 -74
  25. deriva_ml/model/sql_mapper.py +44 -0
  26. deriva_ml/run_notebook.py +19 -11
  27. deriva_ml/schema/__init__.py +3 -0
  28. deriva_ml/{schema_setup → schema}/annotations.py +31 -22
  29. deriva_ml/schema/check_schema.py +104 -0
  30. deriva_ml/{schema_setup → schema}/create_schema.py +151 -104
  31. deriva_ml/schema/deriva-ml-reference.json +8525 -0
  32. deriva_ml/schema/table_comments_utils.py +57 -0
  33. {deriva_ml-1.13.3.dist-info → deriva_ml-1.14.26.dist-info}/METADATA +5 -4
  34. deriva_ml-1.14.26.dist-info/RECORD +40 -0
  35. {deriva_ml-1.13.3.dist-info → deriva_ml-1.14.26.dist-info}/entry_points.txt +1 -0
  36. deriva_ml/deriva_definitions.py +0 -372
  37. deriva_ml/deriva_ml_base.py +0 -1046
  38. deriva_ml/execution_environment.py +0 -139
  39. deriva_ml/schema_setup/table_comments_utils.py +0 -56
  40. deriva_ml/test-files/execution-parameters.json +0 -1
  41. deriva_ml/test-files/notebook-parameters.json +0 -5
  42. deriva_ml/test_functions.py +0 -141
  43. deriva_ml/test_notebook.ipynb +0 -197
  44. deriva_ml-1.13.3.dist-info/RECORD +0 -31
  45. /deriva_ml/{schema_setup → execution}/__init__.py +0 -0
  46. /deriva_ml/{schema_setup → schema}/policy.json +0 -0
  47. {deriva_ml-1.13.3.dist-info → deriva_ml-1.14.26.dist-info}/WHEEL +0 -0
  48. {deriva_ml-1.13.3.dist-info → deriva_ml-1.14.26.dist-info}/licenses/LICENSE +0 -0
  49. {deriva_ml-1.13.3.dist-info → deriva_ml-1.14.26.dist-info}/top_level.txt +0 -0
deriva_ml/demo_catalog.py CHANGED
@@ -1,150 +1,188 @@
1
+ from __future__ import annotations
2
+
1
3
  import atexit
2
- from importlib.resources import files
3
4
  import itertools
4
5
  import logging
5
- from random import randint, random
6
- from typing import Optional
6
+ import string
7
+ from collections.abc import Iterator, Sequence
8
+ from numbers import Integral
9
+ from pathlib import Path
10
+ from random import choice, randint, random
7
11
  from tempfile import TemporaryDirectory
8
12
 
9
- from deriva.core import DerivaServer, get_credential
10
13
  from deriva.core import ErmrestCatalog
11
- from deriva.core.datapath import DataPathException
12
- from deriva.core.ermrest_model import builtin_types, Schema, Table, Column
13
- from requests import HTTPError
14
- import subprocess
15
-
16
- from .schema_setup.annotations import catalog_annotation
17
- from deriva_ml import (
18
- DerivaML,
19
- ExecutionConfiguration,
20
- MLVocab,
21
- BuiltinTypes,
22
- ColumnDefinition,
23
- DatasetVersion,
24
- RID,
14
+ from deriva.core.ermrest_model import Column, Schema, Table, builtin_types
15
+ from pydantic import BaseModel, ConfigDict
16
+ from requests.exceptions import HTTPError
17
+
18
+ from deriva_ml import DerivaML, MLVocab
19
+ from deriva_ml.core.definitions import RID, BuiltinTypes, ColumnDefinition
20
+ from deriva_ml.dataset.aux_classes import DatasetVersion
21
+ from deriva_ml.execution.execution import Execution
22
+ from deriva_ml.execution.execution_configuration import ExecutionConfiguration
23
+ from deriva_ml.schema import (
24
+ create_ml_catalog,
25
25
  )
26
+ from deriva_ml.schema.annotations import catalog_annotation
26
27
 
27
- from deriva_ml.schema_setup.create_schema import (
28
- initialize_ml_schema,
29
- create_ml_schema,
30
- )
28
+ try:
29
+ from icecream import ic
30
+
31
+ ic.configureOutput(includeContext=True)
32
+ except ImportError: # Graceful fallback if IceCream isn't installed.
33
+ ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
34
+
35
+
36
+ TEST_DATASET_SIZE = 12
31
37
 
32
- TEST_DATASET_SIZE = 4
33
-
34
-
35
- def reset_demo_catalog(deriva_ml: DerivaML, sname: str):
36
- model = deriva_ml.model
37
- for trial in range(3):
38
- for t in [v for v in model.schemas[sname].tables.values()]:
39
- try:
40
- t.drop()
41
- except HTTPError:
42
- pass
43
- model.schemas[sname].drop()
44
- # Empty out remaining tables.
45
- pb = deriva_ml.pathBuilder
46
- retry = True
47
- while retry:
48
- for t in pb.schemas["deriva-ml"].tables.values():
49
- for e in t.entities().fetch():
50
- try:
51
- t.filter(t.RID == e["RID"]).delete()
52
- except DataPathException: # FK constraint.
53
- retry = True
54
- initialize_ml_schema(model, "deriva-ml")
55
- create_domain_schema(deriva_ml, sname)
56
-
57
-
58
- def populate_demo_catalog(deriva_ml: DerivaML, sname: str) -> None:
38
+
39
+ def populate_demo_catalog(ml_instance: DerivaML) -> None:
59
40
  # Delete any vocabularies and features.
60
- domain_schema = deriva_ml.catalog.getPathBuilder().schemas[sname]
41
+ domain_schema = ml_instance.pathBuilder.schemas[ml_instance.domain_schema]
61
42
  subject = domain_schema.tables["Subject"]
62
43
  ss = subject.insert([{"Name": f"Thing{t + 1}"} for t in range(TEST_DATASET_SIZE)])
63
- deriva_ml.add_term(
44
+
45
+ ml_instance.add_term(
64
46
  MLVocab.workflow_type,
65
47
  "Demo Catalog Creation",
66
48
  description="A workflow demonstrating how to create a demo catalog.",
67
49
  )
68
- execution = deriva_ml.create_execution(
50
+ execution = ml_instance.create_execution(
69
51
  ExecutionConfiguration(
70
- workflow=deriva_ml.create_workflow(
71
- name="Demo Catalog", workflow_type="Demo Catalog Creation"
72
- )
52
+ workflow=ml_instance.create_workflow(name="Demo Catalog", workflow_type="Demo Catalog Creation")
73
53
  )
74
54
  )
75
55
  with execution.execute() as e:
76
56
  for s in ss:
77
- image_file = e.asset_file_path(
78
- "Image", f"test_{s['RID']}.txt", Subject=s["RID"]
79
- )
80
- with open(image_file, "w") as f:
57
+ image_file = e.asset_file_path("Image", f"test_{s['RID']}.txt", Subject=s["RID"])
58
+ with image_file.open("w") as f:
81
59
  f.write(f"Hello there {random()}\n")
82
60
  execution.upload_execution_outputs()
83
61
 
84
62
 
85
- def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RID]]:
86
- ml_instance.add_dataset_element_type("Subject")
87
- ml_instance.add_dataset_element_type("Image")
63
+ class DatasetDescription(BaseModel):
64
+ types: list[str] # Types of the dataset.
65
+ description: str # Description.
66
+ members: dict[
67
+ str, int | list[DatasetDescription]
68
+ ] # Either a list of nested dataset, or then number of elements to add
69
+ member_rids: dict[str, list[RID]] = {} # The rids of the members of the dataset.
70
+ version: DatasetVersion = DatasetVersion(1, 0, 0) # The initial version.
71
+ rid: RID = None # RID of dataset that was created.
72
+
73
+ model_config = ConfigDict(arbitrary_types_allowed=True)
74
+
75
+
76
+ def create_datasets(
77
+ client: Execution,
78
+ spec: DatasetDescription,
79
+ member_rids: dict[str, Iterator[RID]],
80
+ ) -> DatasetDescription:
81
+ """
82
+ Create a dataset per `spec`, then add child members (either by slicing
83
+ off pre-generated RIDs or by recursing on nested specs).
84
+ """
85
+ dataset_rid = client.create_dataset(
86
+ dataset_types=spec.types,
87
+ description=spec.description,
88
+ version=spec.version,
89
+ )
88
90
 
89
- type_rid = ml_instance.add_term("Dataset_Type", "TestSet", description="A test")
90
- training_rid = ml_instance.add_term(
91
- "Dataset_Type", "Training", description="A training set"
91
+ result_spec = DatasetDescription(
92
+ description=spec.description,
93
+ members={},
94
+ types=spec.types,
95
+ rid=dataset_rid,
96
+ version=spec.version,
92
97
  )
93
- testing_rid = ml_instance.add_term(
94
- "Dataset_Type", "Testing", description="A testing set"
98
+ dataset_rids = {}
99
+ for member_type, value in spec.members.items():
100
+ if isinstance(value, Sequence) and not isinstance(value, (str, bytes)):
101
+ nested_specs: list[DatasetDescription] = list(value)
102
+ rids: list[RID] = []
103
+ for child_spec in nested_specs:
104
+ child_ds = create_datasets(client, child_spec, member_rids)
105
+ result_spec.members.setdefault(member_type, []).append(child_ds)
106
+ rids.append(child_ds.rid)
107
+ elif isinstance(value, Integral):
108
+ count = int(value)
109
+ # take exactly `count` RIDs (or an empty list if count <= 0)
110
+ rids = list(itertools.islice(member_rids[member_type], count))
111
+ assert len(rids) == count, f"Expected {count} RIDs, got {len(rids)}"
112
+ result_spec.members[member_type] = count
113
+ else:
114
+ raise TypeError(
115
+ f"Expected spec.members['{member_type}'] to be either an int or a list, got {type(value).__name__!r}"
116
+ )
117
+
118
+ # attach and record
119
+ if rids:
120
+ dataset_rids[member_type] = rids
121
+ result_spec.member_rids.setdefault(member_type, []).extend(rids)
122
+ client.add_dataset_members(dataset_rid, dataset_rids, description="Added by create_datasets")
123
+
124
+ return result_spec
125
+
126
+
127
+ def dataset_spec() -> DatasetDescription:
128
+ dataset = DatasetDescription(
129
+ description="A dataset",
130
+ members={"Subject": 2},
131
+ types=[],
132
+ )
133
+
134
+ training_dataset = DatasetDescription(
135
+ description="A dataset that is nested",
136
+ members={"Dataset": [dataset, dataset], "Image": 2},
137
+ types=["Testing"],
138
+ )
139
+
140
+ testing_dataset = DatasetDescription(
141
+ description="A dataset that is nested",
142
+ members={"Dataset": [dataset, dataset], "Image": 2},
143
+ types=["Testing"],
95
144
  )
96
145
 
97
- table_path = (
98
- ml_instance.catalog.getPathBuilder()
99
- .schemas[ml_instance.domain_schema]
100
- .tables["Subject"]
146
+ double_nested_dataset = DatasetDescription(
147
+ description="A dataset that is double nested",
148
+ members={"Dataset": [training_dataset, testing_dataset]},
149
+ types=["Complete"],
101
150
  )
151
+ return double_nested_dataset
152
+
153
+
154
+ def create_demo_datasets(ml_instance: DerivaML) -> DatasetDescription:
155
+ """Create datasets from a populated catalog."""
156
+ ml_instance.add_dataset_element_type("Subject")
157
+ ml_instance.add_dataset_element_type("Image")
158
+
159
+ _type_rid = ml_instance.add_term("Dataset_Type", "Complete", synonyms=["Whole"], description="A test")
160
+ _training_rid = ml_instance.add_term("Dataset_Type", "Training", synonyms=["Train"], description="A training set")
161
+ _testing_rid = ml_instance.add_term("Dataset_Type", "Testing", description="A testing set")
162
+
163
+ table_path = ml_instance.catalog.getPathBuilder().schemas[ml_instance.domain_schema].tables["Subject"]
102
164
  subject_rids = [i["RID"] for i in table_path.entities().fetch()]
165
+ table_path = ml_instance.catalog.getPathBuilder().schemas[ml_instance.domain_schema].tables["Image"]
166
+ image_rids = [i["RID"] for i in table_path.entities().fetch()]
103
167
 
104
168
  ml_instance.add_term(
105
169
  MLVocab.workflow_type,
106
170
  "Create Dataset Workflow",
107
171
  description="A Workflow that creates a new dataset.",
108
172
  )
109
- dataset_workflow = ml_instance.create_workflow(
110
- name="API Workflow", workflow_type="Create Dataset Workflow"
111
- )
173
+ dataset_workflow = ml_instance.create_workflow(name="API Workflow", workflow_type="Create Dataset Workflow")
112
174
 
113
175
  dataset_execution = ml_instance.create_execution(
114
176
  ExecutionConfiguration(workflow=dataset_workflow, description="Create Dataset")
115
177
  )
116
178
 
117
179
  with dataset_execution.execute() as exe:
118
- dataset_rids = []
119
- for r in subject_rids[0:4]:
120
- d = exe.create_dataset(
121
- dataset_types=[type_rid.name, "Testing"],
122
- description=f"Dataset {r}",
123
- version=DatasetVersion(1, 0, 0),
124
- )
125
- ml_instance.add_dataset_members(d, [r])
126
- dataset_rids.append(d)
127
-
128
- nested_datasets = []
129
- for i in range(0, 4, 2):
130
- nested_dataset = exe.create_dataset(
131
- dataset_types=[type_rid.name, "Training"],
132
- description=f"Nested Dataset {i}",
133
- version=DatasetVersion(1, 0, 0),
134
- )
135
- exe.add_dataset_members(nested_dataset, dataset_rids[i : i + 2])
136
- nested_datasets.append(nested_dataset)
137
-
138
- double_nested_dataset = exe.create_dataset(
139
- dataset_types=type_rid.name,
140
- description="Double nested dataset",
141
- version=DatasetVersion(1, 0, 0),
142
- )
143
- exe.add_dataset_members(double_nested_dataset, nested_datasets)
144
- return double_nested_dataset, nested_datasets, dataset_rids
180
+ spec = dataset_spec()
181
+ dataset = create_datasets(exe, spec, {"Subject": iter(subject_rids), "Image": iter(image_rids)})
182
+ return dataset
145
183
 
146
184
 
147
- def create_demo_features(ml_instance):
185
+ def create_demo_features(ml_instance: DerivaML) -> None:
148
186
  ml_instance.create_vocabulary("SubjectHealth", "A vocab")
149
187
  ml_instance.add_term(
150
188
  "SubjectHealth",
@@ -156,14 +194,10 @@ def create_demo_features(ml_instance):
156
194
  "Well",
157
195
  description="The subject self reports that they feel well",
158
196
  )
159
- ml_instance.create_vocabulary(
160
- "ImageQuality", "Controlled vocabulary for image quality"
161
- )
197
+ ml_instance.create_vocabulary("ImageQuality", "Controlled vocabulary for image quality")
162
198
  ml_instance.add_term("ImageQuality", "Good", description="The image is good")
163
199
  ml_instance.add_term("ImageQuality", "Bad", description="The image is bad")
164
- box_asset = ml_instance.create_asset(
165
- "BoundingBox", comment="A file that contains a cropped version of a image"
166
- )
200
+ box_asset = ml_instance.create_asset("BoundingBox", comment="A file that contains a cropped version of a image")
167
201
 
168
202
  ml_instance.create_feature(
169
203
  "Subject",
@@ -186,30 +220,20 @@ def create_demo_features(ml_instance):
186
220
  "Feature Notebook Workflow",
187
221
  description="A Workflow that uses Deriva ML API",
188
222
  )
189
- ml_instance.add_term(
190
- MLVocab.asset_type, "API_Model", description="Model for our Notebook workflow"
191
- )
192
- notebook_workflow = ml_instance.create_workflow(
193
- name="API Workflow", workflow_type="Feature Notebook Workflow"
194
- )
223
+ ml_instance.add_term(MLVocab.asset_type, "API_Model", description="Model for our Notebook workflow")
224
+ notebook_workflow = ml_instance.create_workflow(name="API Workflow", workflow_type="Feature Notebook Workflow")
195
225
 
196
226
  feature_execution = ml_instance.create_execution(
197
- ExecutionConfiguration(
198
- workflow=notebook_workflow, description="Our Sample Workflow instance"
199
- )
227
+ ExecutionConfiguration(workflow=notebook_workflow, description="Our Sample Workflow instance")
200
228
  )
201
229
 
202
- subject_rids = [
203
- i["RID"] for i in ml_instance.domain_path.tables["Subject"].entities().fetch()
204
- ]
205
- image_rids = [
206
- i["RID"] for i in ml_instance.domain_path.tables["Image"].entities().fetch()
207
- ]
208
- subject_feature_list = [
230
+ subject_rids = [i["RID"] for i in ml_instance.domain_path.tables["Subject"].entities().fetch()]
231
+ image_rids = [i["RID"] for i in ml_instance.domain_path.tables["Image"].entities().fetch()]
232
+ _subject_feature_list = [
209
233
  SubjectWellnessFeature(
210
234
  Subject=subject_rid,
211
235
  Execution=feature_execution.execution_rid,
212
- SubjectHealth=["Well", "Sick"][randint(0, 1)],
236
+ SubjectHealth=choice(["Well", "Sick"]),
213
237
  Scale=randint(1, 10),
214
238
  )
215
239
  for subject_rid in subject_rids
@@ -218,10 +242,8 @@ def create_demo_features(ml_instance):
218
242
  # Create a new set of images. For fun, lets wrap this in an execution so we get status updates
219
243
  bounding_box_files = []
220
244
  for i in range(10):
221
- bounding_box_file = feature_execution.asset_file_path(
222
- "BoundingBox", f"box{i}.txt"
223
- )
224
- with open(bounding_box_file, "w") as fp:
245
+ bounding_box_file = feature_execution.asset_file_path("BoundingBox", f"box{i}.txt")
246
+ with bounding_box_file.open("w") as fp:
225
247
  fp.write(f"Hi there {i}")
226
248
  bounding_box_files.append(bounding_box_file)
227
249
 
@@ -230,15 +252,13 @@ def create_demo_features(ml_instance):
230
252
  Image=image_rid,
231
253
  BoundingBox=asset_name,
232
254
  )
233
- for image_rid, asset_name in zip(
234
- image_rids, itertools.cycle(bounding_box_files)
235
- )
255
+ for image_rid, asset_name in zip(image_rids, itertools.cycle(bounding_box_files))
236
256
  ]
237
257
 
238
258
  image_quality_feature_list = [
239
259
  ImageQualityFeature(
240
260
  Image=image_rid,
241
- ImageQuality=["Good", "Bad"][randint(0, 1)],
261
+ ImageQuality=choice(["Good", "Bad"]),
242
262
  )
243
263
  for image_rid in image_rids
244
264
  ]
@@ -246,99 +266,129 @@ def create_demo_features(ml_instance):
246
266
  subject_feature_list = [
247
267
  SubjectWellnessFeature(
248
268
  Subject=subject_rid,
249
- SubjectHealth=["Well", "Sick"][randint(0, 1)],
269
+ SubjectHealth=choice(["Well", "Sick"]),
250
270
  Scale=randint(1, 10),
251
271
  )
252
272
  for subject_rid in subject_rids
253
273
  ]
254
274
 
255
275
  with feature_execution.execute() as execution:
256
- feature_execution.add_features(image_bounding_box_feature_list)
257
- feature_execution.add_features(image_quality_feature_list)
258
- feature_execution.add_features(subject_feature_list)
276
+ execution.add_features(image_bounding_box_feature_list)
277
+ execution.add_features(image_quality_feature_list)
278
+ execution.add_features(subject_feature_list)
259
279
 
260
280
  feature_execution.upload_execution_outputs()
261
281
 
262
282
 
263
- def create_domain_schema(ml_instance: DerivaML, sname: str) -> None:
283
+ def create_demo_files(ml_instance: DerivaML):
284
+ """Create demo files for testing purposes.
285
+
286
+ Args:
287
+ ml_instance: The DerivaML instance to create files for.
288
+
289
+ Returns:
290
+ None. Creates files in the working directory.
291
+ """
292
+
293
+ def random_string(length: int) -> str:
294
+ """Generate a random string of specified length.
295
+
296
+ Args:
297
+ length: The length of the string to generate.
298
+
299
+ Returns:
300
+ A random string of the specified length.
301
+ """
302
+ return "".join(random.choice(string.ascii_letters) for _ in range(length))
303
+
304
+ test_dir = ml_instance.working_dir / "test_dir"
305
+ test_dir.mkdir(parents=True, exist_ok=True)
306
+ d1 = test_dir / "d1"
307
+ d1.mkdir(parents=True, exist_ok=True)
308
+ d2 = test_dir / "d2"
309
+ d2.mkdir(parents=True, exist_ok=True)
310
+
311
+ # Create some demo files
312
+ for d in [test_dir, d1, d2]:
313
+ for i in range(5):
314
+ fname = Path(d) / f"file{i}.{random.choice(['txt', 'jpeg'])}"
315
+ with fname.open("w") as f:
316
+ f.write(random_string(10))
317
+ ml_instance.add_term(MLVocab.workflow_type, "File Test Workflow", description="Test workflow")
318
+
319
+
320
+ def create_domain_schema(catalog: ErmrestCatalog, sname: str) -> None:
264
321
  """
265
322
  Create a domain schema. Assumes that the ml-schema has already been created.
266
- :param model:
267
323
  :param sname:
268
324
  :return:
269
325
  """
326
+ model = catalog.getCatalogModel()
327
+ _ = model.schemas["deriva-ml"]
270
328
 
271
- _ = ml_instance.model.schemas["deriva-ml"]
272
-
273
- if ml_instance.model.schemas.get(sname):
274
- # Clean out any old junk....
275
- ml_instance.model.schemas[sname].drop()
276
-
277
- domain_schema = ml_instance.model.create_schema(
278
- Schema.define(sname, annotations={"name_style": {"underline_space": True}})
279
- )
329
+ try:
330
+ model.schemas[sname].drop(cascade=True)
331
+ except KeyError:
332
+ pass
333
+ except HTTPError as e:
334
+ print(e)
335
+ if f"Schema {sname} does not exist" in str(e):
336
+ pass
337
+ else:
338
+ raise e
339
+
340
+ domain_schema = model.create_schema(Schema.define(sname, annotations={"name_style": {"underline_space": True}}))
280
341
  subject_table = domain_schema.create_table(
281
342
  Table.define("Subject", column_defs=[Column.define("Name", builtin_types.text)])
282
343
  )
283
- ml_instance.create_asset("Image", referenced_tables=[subject_table])
284
-
285
- catalog_annotation(ml_instance.model)
344
+ with TemporaryDirectory() as tmpdir:
345
+ ml_instance = DerivaML(hostname=catalog.deriva_server.server, catalog_id=catalog.catalog_id, working_dir=tmpdir)
346
+ ml_instance.create_asset("Image", referenced_tables=[subject_table])
347
+ catalog_annotation(ml_instance.model)
286
348
 
287
349
 
288
350
  def destroy_demo_catalog(catalog):
351
+ """Destroy the demo catalog and clean up resources.
352
+
353
+ Args:
354
+ catalog: The ErmrestCatalog instance to destroy.
355
+
356
+ Returns:
357
+ None. Destroys the catalog.
358
+ """
289
359
  catalog.delete_ermrest_catalog(really=True)
290
360
 
291
361
 
292
362
  def create_demo_catalog(
293
363
  hostname,
294
- domain_schema="test-schema",
364
+ domain_schema="demo-schema",
295
365
  project_name="ml-test",
296
366
  populate=True,
297
367
  create_features=False,
298
368
  create_datasets=False,
299
369
  on_exit_delete=True,
370
+ logging_level=logging.INFO,
300
371
  ) -> ErmrestCatalog:
301
- credential = get_credential(hostname)
302
-
303
- server = DerivaServer("https", hostname, credentials=credential)
304
- test_catalog = server.create_ermrest_catalog()
305
- model = test_catalog.getCatalogModel()
306
- model.configure_baseline_catalog()
307
- policy_file = files("deriva_ml.schema_setup").joinpath("policy.json")
308
- subprocess.run(
309
- [
310
- "deriva-acl-config",
311
- "--host",
312
- test_catalog.deriva_server.server,
313
- "--config-file",
314
- policy_file,
315
- test_catalog.catalog_id,
316
- ]
317
- )
318
-
372
+ test_catalog = create_ml_catalog(hostname, project_name=project_name)
319
373
  if on_exit_delete:
320
374
  atexit.register(destroy_demo_catalog, test_catalog)
321
-
322
375
  try:
323
376
  with TemporaryDirectory() as tmpdir:
324
- create_ml_schema(test_catalog, project_name=project_name)
325
- deriva_ml = DerivaML(
326
- hostname=hostname,
377
+ create_domain_schema(test_catalog, domain_schema)
378
+ ml_instance = DerivaML(
379
+ hostname,
327
380
  catalog_id=test_catalog.catalog_id,
328
- project_name=project_name,
329
381
  domain_schema=domain_schema,
330
- logging_level=logging.WARN,
331
382
  working_dir=tmpdir,
332
- credential=credential,
383
+ logging_level=logging_level,
333
384
  )
334
- create_domain_schema(deriva_ml, domain_schema)
335
385
 
336
386
  if populate or create_features or create_datasets:
337
- populate_demo_catalog(deriva_ml, domain_schema)
387
+ populate_demo_catalog(ml_instance)
338
388
  if create_features:
339
- create_demo_features(deriva_ml)
389
+ create_demo_features(ml_instance)
340
390
  if create_datasets:
341
- create_demo_datasets(deriva_ml)
391
+ create_demo_datasets(ml_instance)
342
392
 
343
393
  except Exception:
344
394
  # on failure, delete catalog and re-raise exception
@@ -352,8 +402,8 @@ class DemoML(DerivaML):
352
402
  self,
353
403
  hostname,
354
404
  catalog_id,
355
- cache_dir: Optional[str] = None,
356
- working_dir: Optional[str] = None,
405
+ cache_dir: str | None = None,
406
+ working_dir: str | None = None,
357
407
  use_minid=True,
358
408
  ):
359
409
  super().__init__(