deriva-ml 1.17.9__py3-none-any.whl → 1.17.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/__init__.py +43 -1
- deriva_ml/asset/__init__.py +17 -0
- deriva_ml/asset/asset.py +357 -0
- deriva_ml/asset/aux_classes.py +100 -0
- deriva_ml/bump_version.py +254 -11
- deriva_ml/catalog/__init__.py +21 -0
- deriva_ml/catalog/clone.py +1199 -0
- deriva_ml/catalog/localize.py +426 -0
- deriva_ml/core/__init__.py +29 -0
- deriva_ml/core/base.py +817 -1067
- deriva_ml/core/config.py +169 -21
- deriva_ml/core/constants.py +120 -19
- deriva_ml/core/definitions.py +123 -13
- deriva_ml/core/enums.py +47 -73
- deriva_ml/core/ermrest.py +226 -193
- deriva_ml/core/exceptions.py +297 -14
- deriva_ml/core/filespec.py +99 -28
- deriva_ml/core/logging_config.py +225 -0
- deriva_ml/core/mixins/__init__.py +42 -0
- deriva_ml/core/mixins/annotation.py +915 -0
- deriva_ml/core/mixins/asset.py +384 -0
- deriva_ml/core/mixins/dataset.py +237 -0
- deriva_ml/core/mixins/execution.py +408 -0
- deriva_ml/core/mixins/feature.py +365 -0
- deriva_ml/core/mixins/file.py +263 -0
- deriva_ml/core/mixins/path_builder.py +145 -0
- deriva_ml/core/mixins/rid_resolution.py +204 -0
- deriva_ml/core/mixins/vocabulary.py +400 -0
- deriva_ml/core/mixins/workflow.py +322 -0
- deriva_ml/core/validation.py +389 -0
- deriva_ml/dataset/__init__.py +2 -1
- deriva_ml/dataset/aux_classes.py +20 -4
- deriva_ml/dataset/catalog_graph.py +575 -0
- deriva_ml/dataset/dataset.py +1242 -1008
- deriva_ml/dataset/dataset_bag.py +1311 -182
- deriva_ml/dataset/history.py +27 -14
- deriva_ml/dataset/upload.py +225 -38
- deriva_ml/demo_catalog.py +186 -105
- deriva_ml/execution/__init__.py +46 -2
- deriva_ml/execution/base_config.py +639 -0
- deriva_ml/execution/execution.py +545 -244
- deriva_ml/execution/execution_configuration.py +26 -11
- deriva_ml/execution/execution_record.py +592 -0
- deriva_ml/execution/find_caller.py +298 -0
- deriva_ml/execution/model_protocol.py +175 -0
- deriva_ml/execution/multirun_config.py +153 -0
- deriva_ml/execution/runner.py +595 -0
- deriva_ml/execution/workflow.py +224 -35
- deriva_ml/experiment/__init__.py +8 -0
- deriva_ml/experiment/experiment.py +411 -0
- deriva_ml/feature.py +6 -1
- deriva_ml/install_kernel.py +143 -6
- deriva_ml/interfaces.py +862 -0
- deriva_ml/model/__init__.py +99 -0
- deriva_ml/model/annotations.py +1278 -0
- deriva_ml/model/catalog.py +286 -60
- deriva_ml/model/database.py +144 -649
- deriva_ml/model/deriva_ml_database.py +308 -0
- deriva_ml/model/handles.py +14 -0
- deriva_ml/run_model.py +319 -0
- deriva_ml/run_notebook.py +507 -38
- deriva_ml/schema/__init__.py +18 -2
- deriva_ml/schema/annotations.py +62 -33
- deriva_ml/schema/create_schema.py +169 -69
- deriva_ml/schema/validation.py +601 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/METADATA +4 -5
- deriva_ml-1.17.11.dist-info/RECORD +77 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/WHEEL +1 -1
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/entry_points.txt +2 -0
- deriva_ml/protocols/dataset.py +0 -19
- deriva_ml/test.py +0 -94
- deriva_ml-1.17.9.dist-info/RECORD +0 -45
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.17.9.dist-info → deriva_ml-1.17.11.dist-info}/top_level.txt +0 -0
deriva_ml/schema/__init__.py
CHANGED
|
@@ -1,3 +1,19 @@
|
|
|
1
|
-
from deriva_ml.schema.create_schema import create_ml_catalog, reset_ml_schema
|
|
1
|
+
from deriva_ml.schema.create_schema import create_ml_catalog, create_ml_schema, reset_ml_schema
|
|
2
|
+
from deriva_ml.schema.validation import (
|
|
3
|
+
SchemaValidationReport,
|
|
4
|
+
SchemaValidator,
|
|
5
|
+
ValidationIssue,
|
|
6
|
+
ValidationSeverity,
|
|
7
|
+
validate_ml_schema,
|
|
8
|
+
)
|
|
2
9
|
|
|
3
|
-
__all__ = [
|
|
10
|
+
__all__ = [
|
|
11
|
+
"create_ml_catalog",
|
|
12
|
+
"create_ml_schema",
|
|
13
|
+
"reset_ml_schema",
|
|
14
|
+
"SchemaValidationReport",
|
|
15
|
+
"SchemaValidator",
|
|
16
|
+
"ValidationIssue",
|
|
17
|
+
"ValidationSeverity",
|
|
18
|
+
"validate_ml_schema",
|
|
19
|
+
]
|
deriva_ml/schema/annotations.py
CHANGED
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import sys
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
# Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
|
|
5
|
+
import importlib
|
|
6
|
+
_ermrest_model = importlib.import_module("deriva.core.ermrest_model")
|
|
7
|
+
_core_utils = importlib.import_module("deriva.core.utils.core_utils")
|
|
8
|
+
|
|
9
|
+
Model = _ermrest_model.Model
|
|
10
|
+
Table = _ermrest_model.Table
|
|
11
|
+
deriva_tags = _core_utils.tag
|
|
6
12
|
|
|
7
13
|
from deriva_ml.core.constants import DerivaAssetColumns
|
|
8
14
|
from deriva_ml.dataset.upload import bulk_upload_configuration
|
|
@@ -86,7 +92,7 @@ def catalog_annotation(model: DerivaModel) -> None:
|
|
|
86
92
|
},
|
|
87
93
|
],
|
|
88
94
|
},
|
|
89
|
-
{ #
|
|
95
|
+
{ # WWW schema tables.
|
|
90
96
|
"name": "WWW",
|
|
91
97
|
"children": [
|
|
92
98
|
{
|
|
@@ -99,19 +105,24 @@ def catalog_annotation(model: DerivaModel) -> None:
|
|
|
99
105
|
},
|
|
100
106
|
],
|
|
101
107
|
},
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
108
|
+
# One menu per domain schema
|
|
109
|
+
*[
|
|
110
|
+
{
|
|
111
|
+
"name": domain_schema,
|
|
112
|
+
"children": [
|
|
113
|
+
{
|
|
114
|
+
"name": tname,
|
|
115
|
+
"url": f"/chaise/recordset/#{catalog_id}/{domain_schema}:{tname}",
|
|
116
|
+
}
|
|
117
|
+
for tname in model.schemas[domain_schema].tables
|
|
118
|
+
# Don't include controlled vocabularies, association tables, or feature tables.
|
|
119
|
+
if not (model.is_vocabulary(tname) or model.is_association(tname, pure=False, max_arity=3))
|
|
120
|
+
],
|
|
121
|
+
}
|
|
122
|
+
for domain_schema in sorted(model.domain_schemas)
|
|
123
|
+
if domain_schema in model.schemas
|
|
124
|
+
],
|
|
125
|
+
{ # Vocabulary menu with all controlled vocabularies.
|
|
115
126
|
"name": "Vocabulary",
|
|
116
127
|
"children": [{"name": f"{ml_schema} Vocabularies", "header": True}]
|
|
117
128
|
+ [
|
|
@@ -123,21 +134,22 @@ def catalog_annotation(model: DerivaModel) -> None:
|
|
|
123
134
|
if model.is_vocabulary(tname)
|
|
124
135
|
]
|
|
125
136
|
+ [
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
137
|
+
item
|
|
138
|
+
for domain_schema in sorted(model.domain_schemas)
|
|
139
|
+
if domain_schema in model.schemas
|
|
140
|
+
for item in [
|
|
141
|
+
{"name": f"{domain_schema} Vocabularies", "header": True}
|
|
142
|
+
] + [
|
|
143
|
+
{
|
|
144
|
+
"url": f"/chaise/recordset/#{catalog_id}/{domain_schema}:{tname}",
|
|
145
|
+
"name": tname,
|
|
146
|
+
}
|
|
147
|
+
for tname in model.schemas[domain_schema].tables
|
|
148
|
+
if model.is_vocabulary(tname)
|
|
149
|
+
]
|
|
138
150
|
],
|
|
139
151
|
},
|
|
140
|
-
{ # List of all
|
|
152
|
+
{ # List of all asset tables.
|
|
141
153
|
"name": "Assets",
|
|
142
154
|
"children": [
|
|
143
155
|
{
|
|
@@ -149,10 +161,12 @@ def catalog_annotation(model: DerivaModel) -> None:
|
|
|
149
161
|
]
|
|
150
162
|
+ [
|
|
151
163
|
{
|
|
152
|
-
"url": f"/chaise/recordset/#{catalog_id}/{
|
|
164
|
+
"url": f"/chaise/recordset/#{catalog_id}/{domain_schema}:{tname}",
|
|
153
165
|
"name": tname,
|
|
154
166
|
}
|
|
155
|
-
for
|
|
167
|
+
for domain_schema in sorted(model.domain_schemas)
|
|
168
|
+
if domain_schema in model.schemas
|
|
169
|
+
for tname in model.schemas[domain_schema].tables
|
|
156
170
|
if model.is_asset(tname)
|
|
157
171
|
],
|
|
158
172
|
},
|
|
@@ -248,7 +262,6 @@ def asset_annotation(asset_table: Table):
|
|
|
248
262
|
|
|
249
263
|
|
|
250
264
|
def generate_annotation(model: Model, schema: str) -> dict:
|
|
251
|
-
catalog_id = model.catalog.catalog_id
|
|
252
265
|
workflow_annotation = {
|
|
253
266
|
deriva_tags.visible_columns: {
|
|
254
267
|
"*": [
|
|
@@ -287,8 +300,24 @@ def generate_annotation(model: Model, schema: str) -> dict:
|
|
|
287
300
|
"Status_Detail",
|
|
288
301
|
]
|
|
289
302
|
},
|
|
290
|
-
|
|
303
|
+
deriva_tags.visible_foreign_keys: {
|
|
291
304
|
"detailed": [
|
|
305
|
+
{
|
|
306
|
+
"source": [
|
|
307
|
+
{"inbound": [schema, "Execution_Execution_Nested_Execution_fkey"]},
|
|
308
|
+
{"outbound": [schema, "Execution_Execution_Execution_fkey"]},
|
|
309
|
+
"RID",
|
|
310
|
+
],
|
|
311
|
+
"markdown_name": "Parent Executions",
|
|
312
|
+
},
|
|
313
|
+
{
|
|
314
|
+
"source": [
|
|
315
|
+
{"inbound": [schema, "Execution_Execution_Execution_fkey"]},
|
|
316
|
+
{"outbound": [schema, "Execution_Execution_Nested_Execution_fkey"]},
|
|
317
|
+
"RID",
|
|
318
|
+
],
|
|
319
|
+
"markdown_name": "Child Executions",
|
|
320
|
+
},
|
|
292
321
|
{
|
|
293
322
|
"source": [
|
|
294
323
|
{"inbound": [schema, "Dataset_Execution_Execution_fkey"]},
|
|
@@ -5,14 +5,16 @@ from importlib.resources import files
|
|
|
5
5
|
from typing import Any, Optional
|
|
6
6
|
|
|
7
7
|
from deriva.core import DerivaServer, ErmrestCatalog, get_credential
|
|
8
|
-
from deriva.core.ermrest_model import
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
8
|
+
from deriva.core.ermrest_model import Model, Schema, Table
|
|
9
|
+
from deriva.core.typed import (
|
|
10
|
+
BuiltinType,
|
|
11
|
+
ColumnDef,
|
|
12
|
+
ForeignKeyDef,
|
|
13
|
+
KeyDef,
|
|
14
|
+
SchemaDef,
|
|
15
|
+
TableDef,
|
|
16
|
+
VocabularyTableDef,
|
|
17
|
+
AssetTableDef,
|
|
16
18
|
)
|
|
17
19
|
|
|
18
20
|
from deriva_ml.core.definitions import ML_SCHEMA, MLTable, MLVocab
|
|
@@ -32,18 +34,21 @@ def create_dataset_table(
|
|
|
32
34
|
version_annotation: Optional[dict] = None,
|
|
33
35
|
) -> Table:
|
|
34
36
|
dataset_table = schema.create_table(
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
37
|
+
TableDef(
|
|
38
|
+
name=MLTable.dataset,
|
|
39
|
+
columns=[
|
|
40
|
+
ColumnDef("Description", BuiltinType.markdown),
|
|
41
|
+
ColumnDef("Deleted", BuiltinType.boolean),
|
|
40
42
|
],
|
|
41
43
|
annotations=dataset_annotation if dataset_annotation is not None else {},
|
|
42
44
|
)
|
|
43
45
|
)
|
|
44
46
|
|
|
45
|
-
dataset_type = schema.create_table(
|
|
47
|
+
dataset_type = schema.create_table(
|
|
48
|
+
VocabularyTableDef(name=MLVocab.dataset_type, curie_template=f"{project_name}:{{RID}}")
|
|
49
|
+
)
|
|
46
50
|
|
|
51
|
+
# Association table for Dataset <-> Dataset_Type
|
|
47
52
|
schema.create_table(
|
|
48
53
|
Table.define_association(
|
|
49
54
|
associates=[
|
|
@@ -66,7 +71,7 @@ def create_dataset_table(
|
|
|
66
71
|
return dataset_table
|
|
67
72
|
|
|
68
73
|
|
|
69
|
-
def define_table_dataset_version(sname: str, annotation: Optional[dict] = None):
|
|
74
|
+
def define_table_dataset_version(sname: str, annotation: Optional[dict] = None) -> TableDef:
|
|
70
75
|
"""Define the dataset version table in the specified schema.
|
|
71
76
|
|
|
72
77
|
Args:
|
|
@@ -74,38 +79,47 @@ def define_table_dataset_version(sname: str, annotation: Optional[dict] = None):
|
|
|
74
79
|
annotation: Optional annotation dictionary for the table.
|
|
75
80
|
|
|
76
81
|
Returns:
|
|
77
|
-
|
|
82
|
+
A TableDef for the dataset version table.
|
|
78
83
|
"""
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
"Version",
|
|
84
|
-
|
|
84
|
+
return TableDef(
|
|
85
|
+
name=MLTable.dataset_version,
|
|
86
|
+
columns=[
|
|
87
|
+
ColumnDef(
|
|
88
|
+
name="Version",
|
|
89
|
+
type=BuiltinType.text,
|
|
85
90
|
default="0.1.0",
|
|
86
91
|
comment="Semantic version of dataset",
|
|
87
92
|
),
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
"Snapshot",
|
|
94
|
-
|
|
93
|
+
ColumnDef("Description", BuiltinType.markdown),
|
|
94
|
+
ColumnDef("Dataset", BuiltinType.text, comment="RID of dataset"),
|
|
95
|
+
ColumnDef("Execution", BuiltinType.text, comment="RID of execution"),
|
|
96
|
+
ColumnDef("Minid", BuiltinType.text, comment="URL to MINID for dataset"),
|
|
97
|
+
ColumnDef(
|
|
98
|
+
name="Snapshot",
|
|
99
|
+
type=BuiltinType.text,
|
|
95
100
|
comment="Catalog Snapshot ID for dataset",
|
|
96
101
|
),
|
|
97
102
|
],
|
|
98
|
-
annotations=annotation,
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
+
annotations=annotation if annotation else {},
|
|
104
|
+
keys=[KeyDef(columns=["Dataset", "Version"])],
|
|
105
|
+
foreign_keys=[
|
|
106
|
+
ForeignKeyDef(
|
|
107
|
+
columns=["Dataset"],
|
|
108
|
+
referenced_schema=sname,
|
|
109
|
+
referenced_table="Dataset",
|
|
110
|
+
referenced_columns=["RID"],
|
|
111
|
+
),
|
|
112
|
+
ForeignKeyDef(
|
|
113
|
+
columns=["Execution"],
|
|
114
|
+
referenced_schema=sname,
|
|
115
|
+
referenced_table="Execution",
|
|
116
|
+
referenced_columns=["RID"],
|
|
117
|
+
),
|
|
103
118
|
],
|
|
104
119
|
)
|
|
105
|
-
return table
|
|
106
120
|
|
|
107
121
|
|
|
108
|
-
def create_execution_table(schema, annotation: Optional[dict] = None):
|
|
122
|
+
def create_execution_table(schema: Schema, annotation: Optional[dict] = None) -> Table:
|
|
109
123
|
"""Create the execution table in the specified schema.
|
|
110
124
|
|
|
111
125
|
Args:
|
|
@@ -117,34 +131,71 @@ def create_execution_table(schema, annotation: Optional[dict] = None):
|
|
|
117
131
|
"""
|
|
118
132
|
annotation = annotation if annotation is not None else {}
|
|
119
133
|
execution = schema.create_table(
|
|
120
|
-
|
|
121
|
-
MLTable.execution,
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
134
|
+
TableDef(
|
|
135
|
+
name=MLTable.execution,
|
|
136
|
+
columns=[
|
|
137
|
+
ColumnDef("Workflow", BuiltinType.text),
|
|
138
|
+
ColumnDef("Description", BuiltinType.markdown),
|
|
139
|
+
ColumnDef("Duration", BuiltinType.text),
|
|
140
|
+
ColumnDef("Status", BuiltinType.text),
|
|
141
|
+
ColumnDef("Status_Detail", BuiltinType.text),
|
|
142
|
+
],
|
|
143
|
+
foreign_keys=[
|
|
144
|
+
ForeignKeyDef(
|
|
145
|
+
columns=["Workflow"],
|
|
146
|
+
referenced_schema=schema.name,
|
|
147
|
+
referenced_table="Workflow",
|
|
148
|
+
referenced_columns=["RID"],
|
|
149
|
+
)
|
|
128
150
|
],
|
|
129
|
-
fkey_defs=[ForeignKey.define(["Workflow"], schema.name, "Workflow", ["RID"])],
|
|
130
151
|
annotations=annotation,
|
|
131
152
|
)
|
|
132
153
|
)
|
|
154
|
+
|
|
155
|
+
# Nested executions - allows grouping executions hierarchically
|
|
156
|
+
# (e.g., a sweep/multirun as parent with individual runs as children)
|
|
157
|
+
schema.create_table(
|
|
158
|
+
Table.define_association(
|
|
159
|
+
associates=[("Execution", execution), ("Nested_Execution", execution)],
|
|
160
|
+
comment="Association table for hierarchical execution nesting (parent-child relationships)",
|
|
161
|
+
metadata=[
|
|
162
|
+
ColumnDef(
|
|
163
|
+
name="Sequence",
|
|
164
|
+
type=BuiltinType.int4,
|
|
165
|
+
nullok=True,
|
|
166
|
+
comment="Order of nested execution (null if parallel)",
|
|
167
|
+
).to_dict() # Convert to dict for Table.define_association()
|
|
168
|
+
],
|
|
169
|
+
)
|
|
170
|
+
)
|
|
133
171
|
return execution
|
|
134
172
|
|
|
135
173
|
|
|
136
174
|
def create_asset_table(
|
|
137
|
-
schema,
|
|
175
|
+
schema: Schema,
|
|
138
176
|
asset_name: str,
|
|
139
|
-
execution_table,
|
|
140
|
-
asset_type_table,
|
|
141
|
-
asset_role_table,
|
|
177
|
+
execution_table: Table,
|
|
178
|
+
asset_type_table: Table,
|
|
179
|
+
asset_role_table: Table,
|
|
142
180
|
use_hatrac: bool = True,
|
|
143
|
-
):
|
|
181
|
+
) -> Table:
|
|
182
|
+
"""Create an asset table with associated type and execution associations.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
schema: The schema where the table should be created.
|
|
186
|
+
asset_name: Name for the asset table.
|
|
187
|
+
execution_table: The execution table for association.
|
|
188
|
+
asset_type_table: The asset type vocabulary table.
|
|
189
|
+
asset_role_table: The asset role vocabulary table.
|
|
190
|
+
use_hatrac: Whether to use Hatrac for file storage (default True).
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
The created asset Table object.
|
|
194
|
+
"""
|
|
144
195
|
asset_table = schema.create_table(
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
196
|
+
AssetTableDef(
|
|
197
|
+
schema_name=schema.name,
|
|
198
|
+
name=asset_name,
|
|
148
199
|
hatrac_template="/hatrac/metadata/{{MD5}}.{{Filename}}",
|
|
149
200
|
)
|
|
150
201
|
)
|
|
@@ -170,7 +221,7 @@ def create_asset_table(
|
|
|
170
221
|
return asset_table
|
|
171
222
|
|
|
172
223
|
|
|
173
|
-
def create_workflow_table(schema: Schema, annotations: Optional[dict[str, Any]] = None):
|
|
224
|
+
def create_workflow_table(schema: Schema, annotations: Optional[dict[str, Any]] = None) -> Table:
|
|
174
225
|
"""Create the workflow table in the specified schema.
|
|
175
226
|
|
|
176
227
|
Args:
|
|
@@ -181,20 +232,22 @@ def create_workflow_table(schema: Schema, annotations: Optional[dict[str, Any]]
|
|
|
181
232
|
The created Table object.
|
|
182
233
|
"""
|
|
183
234
|
workflow_table = schema.create_table(
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
235
|
+
TableDef(
|
|
236
|
+
name=MLTable.workflow,
|
|
237
|
+
columns=[
|
|
238
|
+
ColumnDef("Name", BuiltinType.text),
|
|
239
|
+
ColumnDef("Description", BuiltinType.markdown),
|
|
240
|
+
ColumnDef("URL", BuiltinType.ermrest_uri),
|
|
241
|
+
ColumnDef("Checksum", BuiltinType.text),
|
|
242
|
+
ColumnDef("Version", BuiltinType.text),
|
|
192
243
|
],
|
|
193
|
-
annotations=annotations,
|
|
244
|
+
annotations=annotations if annotations else {},
|
|
194
245
|
)
|
|
195
246
|
)
|
|
196
247
|
workflow_table.create_reference(
|
|
197
|
-
schema.create_table(
|
|
248
|
+
schema.create_table(
|
|
249
|
+
VocabularyTableDef(name=MLVocab.workflow_type, curie_template=f"{schema.name}:{{RID}}")
|
|
250
|
+
)
|
|
198
251
|
)
|
|
199
252
|
return workflow_table
|
|
200
253
|
|
|
@@ -221,13 +274,21 @@ def create_ml_schema(
|
|
|
221
274
|
model.schemas["public"].tables["ERMrest_Client"].annotations.update(client_annotation)
|
|
222
275
|
model.apply()
|
|
223
276
|
|
|
224
|
-
schema = model.create_schema(
|
|
277
|
+
schema = model.create_schema(
|
|
278
|
+
SchemaDef(name=schema_name, annotations=annotations["schema_annotation"])
|
|
279
|
+
)
|
|
225
280
|
|
|
226
281
|
# Create workflow and execution table.
|
|
227
282
|
|
|
228
|
-
schema.create_table(
|
|
229
|
-
|
|
230
|
-
|
|
283
|
+
schema.create_table(
|
|
284
|
+
VocabularyTableDef(name=MLVocab.feature_name, curie_template=f"{project_name}:{{RID}}")
|
|
285
|
+
)
|
|
286
|
+
asset_type_table = schema.create_table(
|
|
287
|
+
VocabularyTableDef(name=MLVocab.asset_type, curie_template=f"{project_name}:{{RID}}")
|
|
288
|
+
)
|
|
289
|
+
asset_role_table = schema.create_table(
|
|
290
|
+
VocabularyTableDef(name=MLVocab.asset_role, curie_template=f"{project_name}:{{RID}}")
|
|
291
|
+
)
|
|
231
292
|
|
|
232
293
|
create_workflow_table(schema, annotations["workflow_annotation"])
|
|
233
294
|
execution_table = create_execution_table(schema, annotations["execution_annotation"])
|
|
@@ -300,6 +361,14 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
|
|
|
300
361
|
"Name": "Runtime_Env",
|
|
301
362
|
"Description": "Information about the runtime environment",
|
|
302
363
|
},
|
|
364
|
+
{
|
|
365
|
+
"Name": "Hydra_Config",
|
|
366
|
+
"Description": "Hydra YAML configuration file (config.yaml, overrides.yaml, hydra.yaml)",
|
|
367
|
+
},
|
|
368
|
+
{
|
|
369
|
+
"Name": "Deriva_Config",
|
|
370
|
+
"Description": "DerivaML execution configuration (configuration.json with datasets, assets, workflow)",
|
|
371
|
+
},
|
|
303
372
|
{
|
|
304
373
|
"Name": "Execution_Metadata",
|
|
305
374
|
"Description": "Information about the execution environment",
|
|
@@ -335,7 +404,28 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
|
|
|
335
404
|
)
|
|
336
405
|
|
|
337
406
|
|
|
338
|
-
def create_ml_catalog(
|
|
407
|
+
def create_ml_catalog(
|
|
408
|
+
hostname: str,
|
|
409
|
+
project_name: str,
|
|
410
|
+
catalog_alias: str | None = None,
|
|
411
|
+
) -> ErmrestCatalog:
|
|
412
|
+
"""Create a new DerivaML catalog with all ML schema tables.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
hostname: Server hostname (e.g., "localhost", "www.eye-ai.org").
|
|
416
|
+
project_name: Name for the project, becomes the domain schema name.
|
|
417
|
+
catalog_alias: Optional alias name for the catalog. If provided, creates
|
|
418
|
+
an alias that points to the new catalog, allowing access via the
|
|
419
|
+
alias name instead of the numeric catalog ID.
|
|
420
|
+
|
|
421
|
+
Returns:
|
|
422
|
+
The created ErmrestCatalog instance.
|
|
423
|
+
|
|
424
|
+
Example:
|
|
425
|
+
# Create catalog with alias
|
|
426
|
+
catalog = create_ml_catalog("localhost", "my_project", catalog_alias="my-project")
|
|
427
|
+
# Now accessible as both /ermrest/catalog/<id> and /ermrest/catalog/my-project
|
|
428
|
+
"""
|
|
339
429
|
server = DerivaServer("https", hostname, credentials=get_credential(hostname))
|
|
340
430
|
catalog = server.create_ermrest_catalog()
|
|
341
431
|
model = catalog.getCatalogModel()
|
|
@@ -352,6 +442,16 @@ def create_ml_catalog(hostname: str, project_name: str) -> ErmrestCatalog:
|
|
|
352
442
|
]
|
|
353
443
|
)
|
|
354
444
|
create_ml_schema(catalog, project_name=project_name)
|
|
445
|
+
|
|
446
|
+
# Create alias if requested
|
|
447
|
+
if catalog_alias:
|
|
448
|
+
server.create_ermrest_alias(
|
|
449
|
+
id=catalog_alias,
|
|
450
|
+
alias_target=catalog.catalog_id,
|
|
451
|
+
name=project_name,
|
|
452
|
+
description=f"Alias for {project_name} catalog (ID: {catalog.catalog_id})",
|
|
453
|
+
)
|
|
454
|
+
|
|
355
455
|
return catalog
|
|
356
456
|
|
|
357
457
|
|