deriva-ml 1.10.1__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/database_model.py +3 -2
- deriva_ml/dataset.py +7 -16
- deriva_ml/dataset_bag.py +10 -3
- deriva_ml/demo_catalog.py +84 -78
- deriva_ml/deriva_definitions.py +2 -2
- deriva_ml/deriva_ml_base.py +105 -132
- deriva_ml/deriva_model.py +31 -0
- deriva_ml/execution.py +422 -315
- deriva_ml/execution_configuration.py +4 -0
- deriva_ml/feature.py +1 -2
- deriva_ml/schema_setup/create_schema.py +223 -183
- deriva_ml/upload.py +99 -236
- {deriva_ml-1.10.1.dist-info → deriva_ml-1.12.0.dist-info}/METADATA +3 -1
- deriva_ml-1.12.0.dist-info/RECORD +27 -0
- deriva_ml-1.10.1.dist-info/RECORD +0 -27
- {deriva_ml-1.10.1.dist-info → deriva_ml-1.12.0.dist-info}/WHEEL +0 -0
- {deriva_ml-1.10.1.dist-info → deriva_ml-1.12.0.dist-info}/entry_points.txt +0 -0
- {deriva_ml-1.10.1.dist-info → deriva_ml-1.12.0.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.10.1.dist-info → deriva_ml-1.12.0.dist-info}/top_level.txt +0 -0
deriva_ml/database_model.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Ths module contains the definition of the DatabaseModel class. The role of this class is to provide an
|
|
1
|
+
"""Ths module contains the definition of the DatabaseModel class. The role of this class is to provide an interface between the BDBag representation
|
|
2
2
|
of a dataset and a sqllite database in which the contents of the bag are stored.
|
|
3
3
|
"""
|
|
4
4
|
|
|
@@ -51,7 +51,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
|
|
|
51
51
|
appear in more than one database. To help manage this, a global list of all the datasets that have been loaded
|
|
52
52
|
into DatabaseModels, is kept in the class variable `_rid_map`.
|
|
53
53
|
|
|
54
|
-
Because you can load
|
|
54
|
+
Because you can load different versions of a dataset simultaneously, the dataset RID and version number are tracked, and a new
|
|
55
55
|
sqllite instance is created for every new dataset version present.
|
|
56
56
|
|
|
57
57
|
Attributes:
|
|
@@ -290,6 +290,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
|
|
|
290
290
|
return DatasetBag(self, dataset_rid or self.dataset_rid)
|
|
291
291
|
|
|
292
292
|
def dataset_version(self, dataset_rid: Optional[RID] = None) -> DatasetVersion:
|
|
293
|
+
"""Return the version of the specified dataset."""
|
|
293
294
|
if dataset_rid and dataset_rid not in self.bag_rids:
|
|
294
295
|
DerivaMLException(f"Dataset RID {dataset_rid} is not in model.")
|
|
295
296
|
return self.bag_rids[dataset_rid]
|
deriva_ml/dataset.py
CHANGED
|
@@ -232,12 +232,10 @@ class Dataset:
|
|
|
232
232
|
"""Increment the version of the specified dataset_table.
|
|
233
233
|
|
|
234
234
|
Args:
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
description: Description of the version update of the dataset_table.
|
|
240
|
-
execution_rid: Which execution is performing increment.
|
|
235
|
+
dataset_rid: RID of the dataset whose version is to be incremented.
|
|
236
|
+
component: Which version of the dataset_table to increment. Major, Minor or Patch
|
|
237
|
+
description: Description of the version update of the dataset_table.
|
|
238
|
+
execution_rid: Which execution is performing increment.
|
|
241
239
|
|
|
242
240
|
Returns:
|
|
243
241
|
new semantic version of the dataset_table as a 3-tuple
|
|
@@ -275,9 +273,6 @@ class Dataset:
|
|
|
275
273
|
description: Description of the dataset_table.
|
|
276
274
|
execution_rid: Execution under which the dataset_table will be created.
|
|
277
275
|
version: Version of the dataset_table.
|
|
278
|
-
type: str | list[str]:
|
|
279
|
-
description: str:
|
|
280
|
-
|
|
281
276
|
|
|
282
277
|
Returns:
|
|
283
278
|
New dataset_table RID.
|
|
@@ -349,7 +344,6 @@ class Dataset:
|
|
|
349
344
|
Args:
|
|
350
345
|
dataset_rid: RID of the dataset_table to delete.
|
|
351
346
|
recurse: If True, delete the dataset_table along with any nested datasets. (Default value = False)
|
|
352
|
-
dataset_rid: RID:
|
|
353
347
|
"""
|
|
354
348
|
# Get association table entries for this dataset_table
|
|
355
349
|
# Delete association table entries
|
|
@@ -397,7 +391,7 @@ class Dataset:
|
|
|
397
391
|
filtered_path = dataset_path
|
|
398
392
|
else:
|
|
399
393
|
filtered_path = dataset_path.filter(
|
|
400
|
-
(dataset_path.Deleted == False) | (dataset_path.Deleted == None)
|
|
394
|
+
(dataset_path.Deleted == False) | (dataset_path.Deleted == None) # noqa: E712
|
|
401
395
|
)
|
|
402
396
|
|
|
403
397
|
# Get a list of all the dataset_type values associated with this dataset_table.
|
|
@@ -439,8 +433,7 @@ class Dataset:
|
|
|
439
433
|
routine makes it possible to add objects from the specified table to a dataset_table.
|
|
440
434
|
|
|
441
435
|
Args:
|
|
442
|
-
element: Name
|
|
443
|
-
element: str | Table:
|
|
436
|
+
element: Name of the table or table object that is to be added to the dataset_table.
|
|
444
437
|
|
|
445
438
|
Returns:
|
|
446
439
|
The table object that was added to the dataset_table.
|
|
@@ -464,7 +457,6 @@ class Dataset:
|
|
|
464
457
|
|
|
465
458
|
Args:
|
|
466
459
|
dataset_rid: param recurse: If this is a nested dataset_table, list the members of the contained datasets
|
|
467
|
-
dataset_rid: RID:
|
|
468
460
|
recurse: (Default value = False)
|
|
469
461
|
limit: If provided, the maximum number of members to return for each element type.
|
|
470
462
|
|
|
@@ -677,7 +669,6 @@ class Dataset:
|
|
|
677
669
|
|
|
678
670
|
Args:
|
|
679
671
|
dataset_rid: return: RID of the parent dataset_table.
|
|
680
|
-
dataset_rid: RID:
|
|
681
672
|
|
|
682
673
|
Returns:
|
|
683
674
|
RID of the parent dataset_table.
|
|
@@ -805,7 +796,7 @@ class Dataset:
|
|
|
805
796
|
dataset_elements = [
|
|
806
797
|
snapshot_catalog._model.name_to_table(e)
|
|
807
798
|
for e, m in snapshot_catalog.list_dataset_members(
|
|
808
|
-
dataset_rid=dataset_rid,
|
|
799
|
+
dataset_rid=dataset_rid, # limit=1 Limit seems to make things run slow.
|
|
809
800
|
).items()
|
|
810
801
|
if m
|
|
811
802
|
]
|
deriva_ml/dataset_bag.py
CHANGED
|
@@ -168,7 +168,7 @@ class DatasetBag:
|
|
|
168
168
|
yield dict(zip(col_names, row))
|
|
169
169
|
|
|
170
170
|
@validate_call
|
|
171
|
-
def list_dataset_members(self, recurse: bool = False) -> dict[str, list
|
|
171
|
+
def list_dataset_members(self, recurse: bool = False) -> dict[str, dict[str, list]]:
|
|
172
172
|
"""Return a list of entities associated with a specific _dataset_table.
|
|
173
173
|
|
|
174
174
|
Args:
|
|
@@ -206,12 +206,19 @@ class DatasetBag:
|
|
|
206
206
|
)
|
|
207
207
|
|
|
208
208
|
with self.database as db:
|
|
209
|
+
col_names = [
|
|
210
|
+
c[1]
|
|
211
|
+
for c in db.execute(f'PRAGMA table_info("{sql_target}")').fetchall()
|
|
212
|
+
]
|
|
213
|
+
select_cols = ",".join([f'"{sql_target}".{c}' for c in col_names])
|
|
209
214
|
sql_cmd = (
|
|
210
|
-
f'SELECT
|
|
215
|
+
f'SELECT {select_cols} FROM "{sql_member}" '
|
|
211
216
|
f'JOIN "{sql_target}" ON "{sql_member}".{member_link[0]} = "{sql_target}".{member_link[1]} '
|
|
212
217
|
f'WHERE "{self.dataset_rid}" = "{sql_member}".Dataset;'
|
|
213
218
|
)
|
|
214
|
-
target_entities =
|
|
219
|
+
target_entities = [
|
|
220
|
+
dict(zip(col_names, e)) for e in db.execute(sql_cmd).fetchall()
|
|
221
|
+
]
|
|
215
222
|
members[target_table.name].extend(target_entities)
|
|
216
223
|
|
|
217
224
|
target_entities = [] # path.entities().fetch()
|
deriva_ml/demo_catalog.py
CHANGED
|
@@ -2,9 +2,7 @@ import atexit
|
|
|
2
2
|
from importlib.metadata import version
|
|
3
3
|
from importlib.resources import files
|
|
4
4
|
import logging
|
|
5
|
-
from random import
|
|
6
|
-
import tempfile
|
|
7
|
-
from tempfile import TemporaryDirectory
|
|
5
|
+
from random import randint, random
|
|
8
6
|
from typing import Optional
|
|
9
7
|
import itertools
|
|
10
8
|
|
|
@@ -12,7 +10,6 @@ from deriva.config.acl_config import AclConfig
|
|
|
12
10
|
from deriva.core import DerivaServer
|
|
13
11
|
from deriva.core import ErmrestCatalog, get_credential
|
|
14
12
|
from deriva.core.datapath import DataPathException
|
|
15
|
-
from deriva.core.ermrest_model import Model
|
|
16
13
|
from deriva.core.ermrest_model import builtin_types, Schema, Table, Column
|
|
17
14
|
from requests import HTTPError
|
|
18
15
|
|
|
@@ -35,48 +32,51 @@ TEST_DATASET_SIZE = 4
|
|
|
35
32
|
def reset_demo_catalog(deriva_ml: DerivaML, sname: str):
|
|
36
33
|
model = deriva_ml.model
|
|
37
34
|
for trial in range(3):
|
|
38
|
-
for t in [
|
|
39
|
-
v
|
|
40
|
-
for v in model.schemas[sname].tables.values()
|
|
41
|
-
if v.name not in {"Subject", "Image"}
|
|
42
|
-
]:
|
|
35
|
+
for t in [v for v in model.schemas[sname].tables.values()]:
|
|
43
36
|
try:
|
|
44
37
|
t.drop()
|
|
45
38
|
except HTTPError:
|
|
46
39
|
pass
|
|
47
|
-
|
|
40
|
+
model.schemas[sname].drop()
|
|
48
41
|
# Empty out remaining tables.
|
|
49
42
|
pb = deriva_ml.pathBuilder
|
|
50
43
|
retry = True
|
|
51
44
|
while retry:
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
except DataPathException: # FK constraint.
|
|
59
|
-
retry = True
|
|
60
|
-
|
|
45
|
+
for t in pb.schemas["deriva-ml"].tables.values():
|
|
46
|
+
for e in t.entities().fetch():
|
|
47
|
+
try:
|
|
48
|
+
t.filter(t.RID == e["RID"]).delete()
|
|
49
|
+
except DataPathException: # FK constraint.
|
|
50
|
+
retry = True
|
|
61
51
|
initialize_ml_schema(model, "deriva-ml")
|
|
52
|
+
create_domain_schema(deriva_ml, sname)
|
|
62
53
|
|
|
63
54
|
|
|
64
55
|
def populate_demo_catalog(deriva_ml: DerivaML, sname: str) -> None:
|
|
65
56
|
# Delete any vocabularies and features.
|
|
66
|
-
reset_demo_catalog(deriva_ml, sname)
|
|
67
57
|
domain_schema = deriva_ml.catalog.getPathBuilder().schemas[sname]
|
|
68
58
|
subject = domain_schema.tables["Subject"]
|
|
69
59
|
ss = subject.insert([{"Name": f"Thing{t + 1}"} for t in range(TEST_DATASET_SIZE)])
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
60
|
+
deriva_ml.add_term(
|
|
61
|
+
MLVocab.workflow_type,
|
|
62
|
+
"Demo Catalog Creation",
|
|
63
|
+
description="A workflow demonstrating how to create a demo catalog.",
|
|
64
|
+
)
|
|
65
|
+
execution = deriva_ml.create_execution(
|
|
66
|
+
ExecutionConfiguration(
|
|
67
|
+
workflow=deriva_ml.create_workflow(
|
|
68
|
+
name="Demo Catalog", workflow_type="Demo Catalog Creation"
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
with execution.execute() as e:
|
|
73
73
|
for s in ss:
|
|
74
|
-
image_file =
|
|
75
|
-
f"test_{s['RID']}.txt",
|
|
74
|
+
image_file = e.asset_file_path(
|
|
75
|
+
"Image", f"test_{s['RID']}.txt", Subject=s["RID"]
|
|
76
76
|
)
|
|
77
77
|
with open(image_file, "w") as f:
|
|
78
78
|
f.write(f"Hello there {random()}\n")
|
|
79
|
-
|
|
79
|
+
execution.upload_execution_outputs()
|
|
80
80
|
|
|
81
81
|
|
|
82
82
|
def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RID]]:
|
|
@@ -84,6 +84,13 @@ def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RI
|
|
|
84
84
|
ml_instance.add_dataset_element_type("Image")
|
|
85
85
|
|
|
86
86
|
type_rid = ml_instance.add_term("Dataset_Type", "TestSet", description="A test")
|
|
87
|
+
training_rid = ml_instance.add_term(
|
|
88
|
+
"Dataset_Type", "Training", description="A traing set"
|
|
89
|
+
)
|
|
90
|
+
testing_rid = ml_instance.add_term(
|
|
91
|
+
"Dataset_Type", "Testing", description="A testing set"
|
|
92
|
+
)
|
|
93
|
+
|
|
87
94
|
table_path = (
|
|
88
95
|
ml_instance.catalog.getPathBuilder()
|
|
89
96
|
.schemas[ml_instance.domain_schema]
|
|
@@ -94,7 +101,7 @@ def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RI
|
|
|
94
101
|
dataset_rids = []
|
|
95
102
|
for r in subject_rids[0:4]:
|
|
96
103
|
d = ml_instance.create_dataset(
|
|
97
|
-
type_rid.name,
|
|
104
|
+
type=[type_rid.name, "Testing"],
|
|
98
105
|
description=f"Dataset {r}",
|
|
99
106
|
version=DatasetVersion(1, 0, 0),
|
|
100
107
|
)
|
|
@@ -104,7 +111,7 @@ def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RI
|
|
|
104
111
|
nested_datasets = []
|
|
105
112
|
for i in range(0, 4, 2):
|
|
106
113
|
nested_dataset = ml_instance.create_dataset(
|
|
107
|
-
type_rid.name,
|
|
114
|
+
type=[type_rid.name, "Training"],
|
|
108
115
|
description=f"Nested Dataset {i}",
|
|
109
116
|
version=DatasetVersion(1, 0, 0),
|
|
110
117
|
)
|
|
@@ -132,13 +139,11 @@ def create_demo_features(ml_instance):
|
|
|
132
139
|
"Well",
|
|
133
140
|
description="The subject self reports that they feel well",
|
|
134
141
|
)
|
|
135
|
-
|
|
136
142
|
ml_instance.create_vocabulary(
|
|
137
143
|
"ImageQuality", "Controlled vocabulary for image quality"
|
|
138
144
|
)
|
|
139
145
|
ml_instance.add_term("ImageQuality", "Good", description="The image is good")
|
|
140
146
|
ml_instance.add_term("ImageQuality", "Bad", description="The image is bad")
|
|
141
|
-
|
|
142
147
|
box_asset = ml_instance.create_asset(
|
|
143
148
|
"BoundingBox", comment="A file that contains a cropped version of a image"
|
|
144
149
|
)
|
|
@@ -150,7 +155,6 @@ def create_demo_features(ml_instance):
|
|
|
150
155
|
metadata=[ColumnDefinition(name="Scale", type=BuiltinTypes.int2, nullok=True)],
|
|
151
156
|
optional=["Scale"],
|
|
152
157
|
)
|
|
153
|
-
|
|
154
158
|
ml_instance.create_feature("Image", "BoundingBox", assets=[box_asset])
|
|
155
159
|
ml_instance.create_feature("Image", "Quality", terms=["ImageQuality"])
|
|
156
160
|
|
|
@@ -158,78 +162,88 @@ def create_demo_features(ml_instance):
|
|
|
158
162
|
ImageBoundingboxFeature = ml_instance.feature_record_class("Image", "BoundingBox")
|
|
159
163
|
SubjectWellnessFeature = ml_instance.feature_record_class("Subject", "Health")
|
|
160
164
|
|
|
165
|
+
# Get the workflow for this notebook
|
|
166
|
+
|
|
161
167
|
ml_instance.add_term(
|
|
162
168
|
MLVocab.workflow_type,
|
|
163
|
-
"
|
|
169
|
+
"Feature Notebook Workflow",
|
|
164
170
|
description="A Workflow that uses Deriva ML API",
|
|
165
171
|
)
|
|
166
172
|
ml_instance.add_term(
|
|
167
|
-
MLVocab.
|
|
168
|
-
"API_Model",
|
|
169
|
-
description="Model for our API workflow",
|
|
173
|
+
MLVocab.asset_type, "API_Model", description="Model for our Notebook workflow"
|
|
170
174
|
)
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
name="API Workflow",
|
|
174
|
-
workflow_type="API Workflow",
|
|
175
|
+
notebook_workflow = ml_instance.create_workflow(
|
|
176
|
+
name="API Workflow", workflow_type="Feature Notebook Workflow"
|
|
175
177
|
)
|
|
176
178
|
|
|
177
|
-
|
|
179
|
+
feature_execution = ml_instance.create_execution(
|
|
178
180
|
ExecutionConfiguration(
|
|
179
|
-
workflow=
|
|
181
|
+
workflow=notebook_workflow, description="Our Sample Workflow instance"
|
|
180
182
|
)
|
|
181
183
|
)
|
|
182
184
|
|
|
183
|
-
with tempfile.TemporaryDirectory() as temp_dir:
|
|
184
|
-
assetdir = ml_instance.asset_dir("BoundingBox", prefix=temp_dir)
|
|
185
|
-
for i in range(10):
|
|
186
|
-
with open(assetdir.path / f"box{i}.txt", "w") as fp:
|
|
187
|
-
fp.write(f"Hi there {i}")
|
|
188
|
-
bounding_box_assets = ml_instance.upload_assets(assetdir)
|
|
189
|
-
bounding_box_rids = [a.result["RID"] for a in bounding_box_assets.values()]
|
|
190
|
-
|
|
191
|
-
# Get the IDs of al of the things that we are going to want to attach features to.
|
|
192
185
|
subject_rids = [
|
|
193
186
|
i["RID"] for i in ml_instance.domain_path.tables["Subject"].entities().fetch()
|
|
194
187
|
]
|
|
195
188
|
image_rids = [
|
|
196
189
|
i["RID"] for i in ml_instance.domain_path.tables["Image"].entities().fetch()
|
|
197
190
|
]
|
|
198
|
-
|
|
199
191
|
subject_feature_list = [
|
|
200
192
|
SubjectWellnessFeature(
|
|
201
193
|
Subject=subject_rid,
|
|
202
|
-
Execution=
|
|
194
|
+
Execution=feature_execution.execution_rid,
|
|
203
195
|
SubjectHealth=["Well", "Sick"][randint(0, 1)],
|
|
204
196
|
Scale=randint(1, 10),
|
|
205
197
|
)
|
|
206
198
|
for subject_rid in subject_rids
|
|
207
199
|
]
|
|
208
200
|
|
|
201
|
+
# Create a new set of images. For fun, lets wrap this in an execution so we get status updates
|
|
202
|
+
bounding_box_files = []
|
|
203
|
+
for i in range(10):
|
|
204
|
+
bounding_box_file = feature_execution.asset_file_path(
|
|
205
|
+
"BoundingBox", f"box{i}.txt"
|
|
206
|
+
)
|
|
207
|
+
with open(bounding_box_file, "w") as fp:
|
|
208
|
+
fp.write(f"Hi there {i}")
|
|
209
|
+
bounding_box_files.append(bounding_box_file)
|
|
210
|
+
|
|
211
|
+
image_bounding_box_feature_list = [
|
|
212
|
+
ImageBoundingboxFeature(
|
|
213
|
+
Image=image_rid,
|
|
214
|
+
BoundingBox=asset_name,
|
|
215
|
+
)
|
|
216
|
+
for image_rid, asset_name in zip(
|
|
217
|
+
image_rids, itertools.cycle(bounding_box_files)
|
|
218
|
+
)
|
|
219
|
+
]
|
|
220
|
+
|
|
209
221
|
image_quality_feature_list = [
|
|
210
222
|
ImageQualityFeature(
|
|
211
223
|
Image=image_rid,
|
|
212
|
-
Execution=api_execution.execution_rid,
|
|
213
224
|
ImageQuality=["Good", "Bad"][randint(0, 1)],
|
|
214
225
|
)
|
|
215
226
|
for image_rid in image_rids
|
|
216
227
|
]
|
|
217
228
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
229
|
+
subject_feature_list = [
|
|
230
|
+
SubjectWellnessFeature(
|
|
231
|
+
Subject=subject_rid,
|
|
232
|
+
SubjectHealth=["Well", "Sick"][randint(0, 1)],
|
|
233
|
+
Scale=randint(1, 10),
|
|
223
234
|
)
|
|
224
|
-
for
|
|
235
|
+
for subject_rid in subject_rids
|
|
225
236
|
]
|
|
226
237
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
238
|
+
with feature_execution.execute() as execution:
|
|
239
|
+
feature_execution.add_features(image_bounding_box_feature_list)
|
|
240
|
+
feature_execution.add_features(image_quality_feature_list)
|
|
241
|
+
feature_execution.add_features(subject_feature_list)
|
|
242
|
+
|
|
243
|
+
feature_execution.upload_execution_outputs()
|
|
230
244
|
|
|
231
245
|
|
|
232
|
-
def create_domain_schema(
|
|
246
|
+
def create_domain_schema(ml_instance: DerivaML, sname: str) -> None:
|
|
233
247
|
"""
|
|
234
248
|
Create a domain schema. Assumes that the ml-schema has already been created.
|
|
235
249
|
:param model:
|
|
@@ -238,28 +252,19 @@ def create_domain_schema(model: Model, sname: str) -> None:
|
|
|
238
252
|
"""
|
|
239
253
|
|
|
240
254
|
# Make sure that we have a ml schema
|
|
241
|
-
_ = model.schemas["deriva-ml"]
|
|
255
|
+
_ = ml_instance.model.schemas["deriva-ml"]
|
|
242
256
|
|
|
243
|
-
if model.schemas.get(sname):
|
|
257
|
+
if ml_instance.model.schemas.get(sname):
|
|
244
258
|
# Clean out any old junk....
|
|
245
|
-
model.schemas[sname].drop()
|
|
259
|
+
ml_instance.model.schemas[sname].drop()
|
|
246
260
|
|
|
247
|
-
domain_schema = model.create_schema(
|
|
261
|
+
domain_schema = ml_instance.model.model.create_schema(
|
|
248
262
|
Schema.define(sname, annotations={"name_style": {"underline_space": True}})
|
|
249
263
|
)
|
|
250
264
|
subject_table = domain_schema.create_table(
|
|
251
265
|
Table.define("Subject", column_defs=[Column.define("Name", builtin_types.text)])
|
|
252
266
|
)
|
|
253
|
-
|
|
254
|
-
image_table = domain_schema.create_table(
|
|
255
|
-
Table.define_asset(
|
|
256
|
-
sname=sname,
|
|
257
|
-
tname="Image",
|
|
258
|
-
hatrac_template="/hatrac/image_asset/{{MD5}}.{{Filename}}",
|
|
259
|
-
column_defs=[Column.define("Name", builtin_types.text)],
|
|
260
|
-
)
|
|
261
|
-
)
|
|
262
|
-
image_table.create_reference(subject_table)
|
|
267
|
+
ml_instance.create_asset("Image", referenced_tables=[subject_table])
|
|
263
268
|
|
|
264
269
|
|
|
265
270
|
def destroy_demo_catalog(catalog):
|
|
@@ -284,13 +289,14 @@ def create_demo_catalog(
|
|
|
284
289
|
|
|
285
290
|
try:
|
|
286
291
|
create_ml_schema(model, project_name=project_name)
|
|
287
|
-
create_domain_schema(model, domain_schema)
|
|
288
292
|
deriva_ml = DerivaML(
|
|
289
293
|
hostname=hostname,
|
|
290
294
|
catalog_id=test_catalog.catalog_id,
|
|
291
295
|
project_name=project_name,
|
|
296
|
+
domain_schema=domain_schema,
|
|
292
297
|
logging_level=logging.WARN,
|
|
293
298
|
)
|
|
299
|
+
create_domain_schema(deriva_ml, domain_schema)
|
|
294
300
|
working_dir = deriva_ml.working_dir
|
|
295
301
|
dataset_table = deriva_ml.dataset_table
|
|
296
302
|
dataset_table.annotations.update(
|
deriva_ml/deriva_definitions.py
CHANGED
|
@@ -186,9 +186,9 @@ class MLVocab(StrEnum):
|
|
|
186
186
|
|
|
187
187
|
dataset_type = "Dataset_Type"
|
|
188
188
|
workflow_type = "Workflow_Type"
|
|
189
|
-
execution_asset_type = "Execution_Asset_Type"
|
|
190
|
-
execution_metadata_type = "Execution_Metadata_Type"
|
|
191
189
|
file_type = "File_Type"
|
|
190
|
+
asset_type = "Asset_Type"
|
|
191
|
+
asset_role = "Asset_Role"
|
|
192
192
|
|
|
193
193
|
|
|
194
194
|
class ExecMetadataVocab(StrEnum):
|