deriva-ml 1.17.10__py3-none-any.whl → 1.17.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. deriva_ml/__init__.py +69 -1
  2. deriva_ml/asset/__init__.py +17 -0
  3. deriva_ml/asset/asset.py +357 -0
  4. deriva_ml/asset/aux_classes.py +100 -0
  5. deriva_ml/bump_version.py +254 -11
  6. deriva_ml/catalog/__init__.py +31 -0
  7. deriva_ml/catalog/clone.py +1939 -0
  8. deriva_ml/catalog/localize.py +426 -0
  9. deriva_ml/core/__init__.py +29 -0
  10. deriva_ml/core/base.py +845 -1067
  11. deriva_ml/core/config.py +169 -21
  12. deriva_ml/core/constants.py +120 -19
  13. deriva_ml/core/definitions.py +123 -13
  14. deriva_ml/core/enums.py +47 -73
  15. deriva_ml/core/ermrest.py +226 -193
  16. deriva_ml/core/exceptions.py +297 -14
  17. deriva_ml/core/filespec.py +99 -28
  18. deriva_ml/core/logging_config.py +225 -0
  19. deriva_ml/core/mixins/__init__.py +42 -0
  20. deriva_ml/core/mixins/annotation.py +915 -0
  21. deriva_ml/core/mixins/asset.py +384 -0
  22. deriva_ml/core/mixins/dataset.py +237 -0
  23. deriva_ml/core/mixins/execution.py +408 -0
  24. deriva_ml/core/mixins/feature.py +365 -0
  25. deriva_ml/core/mixins/file.py +263 -0
  26. deriva_ml/core/mixins/path_builder.py +145 -0
  27. deriva_ml/core/mixins/rid_resolution.py +204 -0
  28. deriva_ml/core/mixins/vocabulary.py +400 -0
  29. deriva_ml/core/mixins/workflow.py +322 -0
  30. deriva_ml/core/validation.py +389 -0
  31. deriva_ml/dataset/__init__.py +2 -1
  32. deriva_ml/dataset/aux_classes.py +20 -4
  33. deriva_ml/dataset/catalog_graph.py +575 -0
  34. deriva_ml/dataset/dataset.py +1242 -1008
  35. deriva_ml/dataset/dataset_bag.py +1311 -182
  36. deriva_ml/dataset/history.py +27 -14
  37. deriva_ml/dataset/upload.py +225 -38
  38. deriva_ml/demo_catalog.py +126 -110
  39. deriva_ml/execution/__init__.py +46 -2
  40. deriva_ml/execution/base_config.py +639 -0
  41. deriva_ml/execution/execution.py +543 -242
  42. deriva_ml/execution/execution_configuration.py +26 -11
  43. deriva_ml/execution/execution_record.py +592 -0
  44. deriva_ml/execution/find_caller.py +298 -0
  45. deriva_ml/execution/model_protocol.py +175 -0
  46. deriva_ml/execution/multirun_config.py +153 -0
  47. deriva_ml/execution/runner.py +595 -0
  48. deriva_ml/execution/workflow.py +223 -34
  49. deriva_ml/experiment/__init__.py +8 -0
  50. deriva_ml/experiment/experiment.py +411 -0
  51. deriva_ml/feature.py +6 -1
  52. deriva_ml/install_kernel.py +143 -6
  53. deriva_ml/interfaces.py +862 -0
  54. deriva_ml/model/__init__.py +99 -0
  55. deriva_ml/model/annotations.py +1278 -0
  56. deriva_ml/model/catalog.py +286 -60
  57. deriva_ml/model/database.py +144 -649
  58. deriva_ml/model/deriva_ml_database.py +308 -0
  59. deriva_ml/model/handles.py +14 -0
  60. deriva_ml/run_model.py +319 -0
  61. deriva_ml/run_notebook.py +507 -38
  62. deriva_ml/schema/__init__.py +18 -2
  63. deriva_ml/schema/annotations.py +62 -33
  64. deriva_ml/schema/create_schema.py +169 -69
  65. deriva_ml/schema/validation.py +601 -0
  66. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/METADATA +4 -4
  67. deriva_ml-1.17.12.dist-info/RECORD +77 -0
  68. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/WHEEL +1 -1
  69. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/entry_points.txt +1 -0
  70. deriva_ml/protocols/dataset.py +0 -19
  71. deriva_ml/test.py +0 -94
  72. deriva_ml-1.17.10.dist-info/RECORD +0 -45
  73. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/licenses/LICENSE +0 -0
  74. {deriva_ml-1.17.10.dist-info → deriva_ml-1.17.12.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,19 @@
1
- from deriva_ml.schema.create_schema import create_ml_catalog, reset_ml_schema
1
+ from deriva_ml.schema.create_schema import create_ml_catalog, create_ml_schema, reset_ml_schema
2
+ from deriva_ml.schema.validation import (
3
+ SchemaValidationReport,
4
+ SchemaValidator,
5
+ ValidationIssue,
6
+ ValidationSeverity,
7
+ validate_ml_schema,
8
+ )
2
9
 
3
- __all__ = ["create_ml_catalog", "reset_ml_schema"]
10
+ __all__ = [
11
+ "create_ml_catalog",
12
+ "create_ml_schema",
13
+ "reset_ml_schema",
14
+ "SchemaValidationReport",
15
+ "SchemaValidator",
16
+ "ValidationIssue",
17
+ "ValidationSeverity",
18
+ "validate_ml_schema",
19
+ ]
@@ -1,8 +1,14 @@
1
1
  import argparse
2
2
  import sys
3
3
 
4
- from deriva.core.ermrest_model import Model, Table
5
- from deriva.core.utils.core_utils import tag as deriva_tags
4
+ # Deriva imports - use importlib to avoid shadowing by local 'deriva.py' files
5
+ import importlib
6
+ _ermrest_model = importlib.import_module("deriva.core.ermrest_model")
7
+ _core_utils = importlib.import_module("deriva.core.utils.core_utils")
8
+
9
+ Model = _ermrest_model.Model
10
+ Table = _ermrest_model.Table
11
+ deriva_tags = _core_utils.tag
6
12
 
7
13
  from deriva_ml.core.constants import DerivaAssetColumns
8
14
  from deriva_ml.dataset.upload import bulk_upload_configuration
@@ -86,7 +92,7 @@ def catalog_annotation(model: DerivaModel) -> None:
86
92
  },
87
93
  ],
88
94
  },
89
- { # All the primary tables in deriva-ml schema.
95
+ { # WWW schema tables.
90
96
  "name": "WWW",
91
97
  "children": [
92
98
  {
@@ -99,19 +105,24 @@ def catalog_annotation(model: DerivaModel) -> None:
99
105
  },
100
106
  ],
101
107
  },
102
- {
103
- "name": model.domain_schema,
104
- "children": [
105
- {
106
- "name": tname,
107
- "url": f"/chaise/recordset/#{catalog_id}/{model.domain_schema}:{tname}",
108
- }
109
- for tname in model.schemas[model.domain_schema].tables
110
- # Don't include controlled vocabularies, association tables, or feature tables.
111
- if not (model.is_vocabulary(tname) or model.is_association(tname, pure=False, max_arity=3))
112
- ],
113
- },
114
- { # Vocabulary menu which will list all the controlled vocabularies in deriva-ml and domain.
108
+ # One menu per domain schema
109
+ *[
110
+ {
111
+ "name": domain_schema,
112
+ "children": [
113
+ {
114
+ "name": tname,
115
+ "url": f"/chaise/recordset/#{catalog_id}/{domain_schema}:{tname}",
116
+ }
117
+ for tname in model.schemas[domain_schema].tables
118
+ # Don't include controlled vocabularies, association tables, or feature tables.
119
+ if not (model.is_vocabulary(tname) or model.is_association(tname, pure=False, max_arity=3))
120
+ ],
121
+ }
122
+ for domain_schema in sorted(model.domain_schemas)
123
+ if domain_schema in model.schemas
124
+ ],
125
+ { # Vocabulary menu with all controlled vocabularies.
115
126
  "name": "Vocabulary",
116
127
  "children": [{"name": f"{ml_schema} Vocabularies", "header": True}]
117
128
  + [
@@ -123,21 +134,22 @@ def catalog_annotation(model: DerivaModel) -> None:
123
134
  if model.is_vocabulary(tname)
124
135
  ]
125
136
  + [
126
- {
127
- "name": f"{model.domain_schema} Vocabularies",
128
- "header": True,
129
- }
130
- ]
131
- + [
132
- {
133
- "url": f"/chaise/recordset/#{catalog_id}/{model.domain_schema}:{tname}",
134
- "name": tname,
135
- }
136
- for tname in model.schemas[model.domain_schema].tables
137
- if model.is_vocabulary(tname)
137
+ item
138
+ for domain_schema in sorted(model.domain_schemas)
139
+ if domain_schema in model.schemas
140
+ for item in [
141
+ {"name": f"{domain_schema} Vocabularies", "header": True}
142
+ ] + [
143
+ {
144
+ "url": f"/chaise/recordset/#{catalog_id}/{domain_schema}:{tname}",
145
+ "name": tname,
146
+ }
147
+ for tname in model.schemas[domain_schema].tables
148
+ if model.is_vocabulary(tname)
149
+ ]
138
150
  ],
139
151
  },
140
- { # List of all of the asset tables in deriva-ml and domain schemas.
152
+ { # List of all asset tables.
141
153
  "name": "Assets",
142
154
  "children": [
143
155
  {
@@ -149,10 +161,12 @@ def catalog_annotation(model: DerivaModel) -> None:
149
161
  ]
150
162
  + [
151
163
  {
152
- "url": f"/chaise/recordset/#{catalog_id}/{model.domain_schema}:{tname}",
164
+ "url": f"/chaise/recordset/#{catalog_id}/{domain_schema}:{tname}",
153
165
  "name": tname,
154
166
  }
155
- for tname in model.schemas[model.domain_schema].tables
167
+ for domain_schema in sorted(model.domain_schemas)
168
+ if domain_schema in model.schemas
169
+ for tname in model.schemas[domain_schema].tables
156
170
  if model.is_asset(tname)
157
171
  ],
158
172
  },
@@ -248,7 +262,6 @@ def asset_annotation(asset_table: Table):
248
262
 
249
263
 
250
264
  def generate_annotation(model: Model, schema: str) -> dict:
251
- catalog_id = model.catalog.catalog_id
252
265
  workflow_annotation = {
253
266
  deriva_tags.visible_columns: {
254
267
  "*": [
@@ -287,8 +300,24 @@ def generate_annotation(model: Model, schema: str) -> dict:
287
300
  "Status_Detail",
288
301
  ]
289
302
  },
290
- "tag:isrd.isi.edu,2016:visible-foreign-keys": {
303
+ deriva_tags.visible_foreign_keys: {
291
304
  "detailed": [
305
+ {
306
+ "source": [
307
+ {"inbound": [schema, "Execution_Execution_Nested_Execution_fkey"]},
308
+ {"outbound": [schema, "Execution_Execution_Execution_fkey"]},
309
+ "RID",
310
+ ],
311
+ "markdown_name": "Parent Executions",
312
+ },
313
+ {
314
+ "source": [
315
+ {"inbound": [schema, "Execution_Execution_Execution_fkey"]},
316
+ {"outbound": [schema, "Execution_Execution_Nested_Execution_fkey"]},
317
+ "RID",
318
+ ],
319
+ "markdown_name": "Child Executions",
320
+ },
292
321
  {
293
322
  "source": [
294
323
  {"inbound": [schema, "Dataset_Execution_Execution_fkey"]},
@@ -5,14 +5,16 @@ from importlib.resources import files
5
5
  from typing import Any, Optional
6
6
 
7
7
  from deriva.core import DerivaServer, ErmrestCatalog, get_credential
8
- from deriva.core.ermrest_model import (
9
- Column,
10
- ForeignKey,
11
- Key,
12
- Model,
13
- Schema,
14
- Table,
15
- builtin_types,
8
+ from deriva.core.ermrest_model import Model, Schema, Table
9
+ from deriva.core.typed import (
10
+ BuiltinType,
11
+ ColumnDef,
12
+ ForeignKeyDef,
13
+ KeyDef,
14
+ SchemaDef,
15
+ TableDef,
16
+ VocabularyTableDef,
17
+ AssetTableDef,
16
18
  )
17
19
 
18
20
  from deriva_ml.core.definitions import ML_SCHEMA, MLTable, MLVocab
@@ -32,18 +34,21 @@ def create_dataset_table(
32
34
  version_annotation: Optional[dict] = None,
33
35
  ) -> Table:
34
36
  dataset_table = schema.create_table(
35
- Table.define(
36
- tname=MLTable.dataset,
37
- column_defs=[
38
- Column.define("Description", builtin_types.markdown),
39
- Column.define("Deleted", builtin_types.boolean),
37
+ TableDef(
38
+ name=MLTable.dataset,
39
+ columns=[
40
+ ColumnDef("Description", BuiltinType.markdown),
41
+ ColumnDef("Deleted", BuiltinType.boolean),
40
42
  ],
41
43
  annotations=dataset_annotation if dataset_annotation is not None else {},
42
44
  )
43
45
  )
44
46
 
45
- dataset_type = schema.create_table(Table.define_vocabulary(MLVocab.dataset_type, f"{project_name}:{{RID}}"))
47
+ dataset_type = schema.create_table(
48
+ VocabularyTableDef(name=MLVocab.dataset_type, curie_template=f"{project_name}:{{RID}}")
49
+ )
46
50
 
51
+ # Association table for Dataset <-> Dataset_Type
47
52
  schema.create_table(
48
53
  Table.define_association(
49
54
  associates=[
@@ -66,7 +71,7 @@ def create_dataset_table(
66
71
  return dataset_table
67
72
 
68
73
 
69
- def define_table_dataset_version(sname: str, annotation: Optional[dict] = None):
74
+ def define_table_dataset_version(sname: str, annotation: Optional[dict] = None) -> TableDef:
70
75
  """Define the dataset version table in the specified schema.
71
76
 
72
77
  Args:
@@ -74,38 +79,47 @@ def define_table_dataset_version(sname: str, annotation: Optional[dict] = None):
74
79
  annotation: Optional annotation dictionary for the table.
75
80
 
76
81
  Returns:
77
- The created Table object.
82
+ A TableDef for the dataset version table.
78
83
  """
79
- table = Table.define(
80
- tname=MLTable.dataset_version,
81
- column_defs=[
82
- Column.define(
83
- "Version",
84
- builtin_types.text,
84
+ return TableDef(
85
+ name=MLTable.dataset_version,
86
+ columns=[
87
+ ColumnDef(
88
+ name="Version",
89
+ type=BuiltinType.text,
85
90
  default="0.1.0",
86
91
  comment="Semantic version of dataset",
87
92
  ),
88
- Column.define("Description", builtin_types.markdown),
89
- Column.define("Dataset", builtin_types.text, comment="RID of dataset"),
90
- Column.define("Execution", builtin_types.text, comment="RID of execution"),
91
- Column.define("Minid", builtin_types.text, comment="URL to MINID for dataset"),
92
- Column.define(
93
- "Snapshot",
94
- builtin_types.text,
93
+ ColumnDef("Description", BuiltinType.markdown),
94
+ ColumnDef("Dataset", BuiltinType.text, comment="RID of dataset"),
95
+ ColumnDef("Execution", BuiltinType.text, comment="RID of execution"),
96
+ ColumnDef("Minid", BuiltinType.text, comment="URL to MINID for dataset"),
97
+ ColumnDef(
98
+ name="Snapshot",
99
+ type=BuiltinType.text,
95
100
  comment="Catalog Snapshot ID for dataset",
96
101
  ),
97
102
  ],
98
- annotations=annotation,
99
- key_defs=[Key.define(["Dataset", "Version"])],
100
- fkey_defs=[
101
- ForeignKey.define(["Dataset"], sname, "Dataset", ["RID"]),
102
- ForeignKey.define(["Execution"], sname, "Execution", ["RID"]),
103
+ annotations=annotation if annotation else {},
104
+ keys=[KeyDef(columns=["Dataset", "Version"])],
105
+ foreign_keys=[
106
+ ForeignKeyDef(
107
+ columns=["Dataset"],
108
+ referenced_schema=sname,
109
+ referenced_table="Dataset",
110
+ referenced_columns=["RID"],
111
+ ),
112
+ ForeignKeyDef(
113
+ columns=["Execution"],
114
+ referenced_schema=sname,
115
+ referenced_table="Execution",
116
+ referenced_columns=["RID"],
117
+ ),
103
118
  ],
104
119
  )
105
- return table
106
120
 
107
121
 
108
- def create_execution_table(schema, annotation: Optional[dict] = None):
122
+ def create_execution_table(schema: Schema, annotation: Optional[dict] = None) -> Table:
109
123
  """Create the execution table in the specified schema.
110
124
 
111
125
  Args:
@@ -117,34 +131,71 @@ def create_execution_table(schema, annotation: Optional[dict] = None):
117
131
  """
118
132
  annotation = annotation if annotation is not None else {}
119
133
  execution = schema.create_table(
120
- Table.define(
121
- MLTable.execution,
122
- column_defs=[
123
- Column.define("Workflow", builtin_types.text),
124
- Column.define("Description", builtin_types.markdown),
125
- Column.define("Duration", builtin_types.text),
126
- Column.define("Status", builtin_types.text),
127
- Column.define("Status_Detail", builtin_types.text),
134
+ TableDef(
135
+ name=MLTable.execution,
136
+ columns=[
137
+ ColumnDef("Workflow", BuiltinType.text),
138
+ ColumnDef("Description", BuiltinType.markdown),
139
+ ColumnDef("Duration", BuiltinType.text),
140
+ ColumnDef("Status", BuiltinType.text),
141
+ ColumnDef("Status_Detail", BuiltinType.text),
142
+ ],
143
+ foreign_keys=[
144
+ ForeignKeyDef(
145
+ columns=["Workflow"],
146
+ referenced_schema=schema.name,
147
+ referenced_table="Workflow",
148
+ referenced_columns=["RID"],
149
+ )
128
150
  ],
129
- fkey_defs=[ForeignKey.define(["Workflow"], schema.name, "Workflow", ["RID"])],
130
151
  annotations=annotation,
131
152
  )
132
153
  )
154
+
155
+ # Nested executions - allows grouping executions hierarchically
156
+ # (e.g., a sweep/multirun as parent with individual runs as children)
157
+ schema.create_table(
158
+ Table.define_association(
159
+ associates=[("Execution", execution), ("Nested_Execution", execution)],
160
+ comment="Association table for hierarchical execution nesting (parent-child relationships)",
161
+ metadata=[
162
+ ColumnDef(
163
+ name="Sequence",
164
+ type=BuiltinType.int4,
165
+ nullok=True,
166
+ comment="Order of nested execution (null if parallel)",
167
+ ).to_dict() # Convert to dict for Table.define_association()
168
+ ],
169
+ )
170
+ )
133
171
  return execution
134
172
 
135
173
 
136
174
  def create_asset_table(
137
- schema,
175
+ schema: Schema,
138
176
  asset_name: str,
139
- execution_table,
140
- asset_type_table,
141
- asset_role_table,
177
+ execution_table: Table,
178
+ asset_type_table: Table,
179
+ asset_role_table: Table,
142
180
  use_hatrac: bool = True,
143
- ):
181
+ ) -> Table:
182
+ """Create an asset table with associated type and execution associations.
183
+
184
+ Args:
185
+ schema: The schema where the table should be created.
186
+ asset_name: Name for the asset table.
187
+ execution_table: The execution table for association.
188
+ asset_type_table: The asset type vocabulary table.
189
+ asset_role_table: The asset role vocabulary table.
190
+ use_hatrac: Whether to use Hatrac for file storage (default True).
191
+
192
+ Returns:
193
+ The created asset Table object.
194
+ """
144
195
  asset_table = schema.create_table(
145
- Table.define_asset(
146
- sname=schema.name,
147
- tname=asset_name,
196
+ AssetTableDef(
197
+ schema_name=schema.name,
198
+ name=asset_name,
148
199
  hatrac_template="/hatrac/metadata/{{MD5}}.{{Filename}}",
149
200
  )
150
201
  )
@@ -170,7 +221,7 @@ def create_asset_table(
170
221
  return asset_table
171
222
 
172
223
 
173
- def create_workflow_table(schema: Schema, annotations: Optional[dict[str, Any]] = None):
224
+ def create_workflow_table(schema: Schema, annotations: Optional[dict[str, Any]] = None) -> Table:
174
225
  """Create the workflow table in the specified schema.
175
226
 
176
227
  Args:
@@ -181,20 +232,22 @@ def create_workflow_table(schema: Schema, annotations: Optional[dict[str, Any]]
181
232
  The created Table object.
182
233
  """
183
234
  workflow_table = schema.create_table(
184
- Table.define(
185
- tname=MLTable.workflow,
186
- column_defs=[
187
- Column.define("Name", builtin_types.text),
188
- Column.define("Description", builtin_types.markdown),
189
- Column.define("URL", builtin_types.ermrest_uri),
190
- Column.define("Checksum", builtin_types.text),
191
- Column.define("Version", builtin_types.text),
235
+ TableDef(
236
+ name=MLTable.workflow,
237
+ columns=[
238
+ ColumnDef("Name", BuiltinType.text),
239
+ ColumnDef("Description", BuiltinType.markdown),
240
+ ColumnDef("URL", BuiltinType.ermrest_uri),
241
+ ColumnDef("Checksum", BuiltinType.text),
242
+ ColumnDef("Version", BuiltinType.text),
192
243
  ],
193
- annotations=annotations,
244
+ annotations=annotations if annotations else {},
194
245
  )
195
246
  )
196
247
  workflow_table.create_reference(
197
- schema.create_table(Table.define_vocabulary(MLVocab.workflow_type, f"{schema.name}:{{RID}}"))
248
+ schema.create_table(
249
+ VocabularyTableDef(name=MLVocab.workflow_type, curie_template=f"{schema.name}:{{RID}}")
250
+ )
198
251
  )
199
252
  return workflow_table
200
253
 
@@ -221,13 +274,21 @@ def create_ml_schema(
221
274
  model.schemas["public"].tables["ERMrest_Client"].annotations.update(client_annotation)
222
275
  model.apply()
223
276
 
224
- schema = model.create_schema(Schema.define(schema_name, annotations=annotations["schema_annotation"]))
277
+ schema = model.create_schema(
278
+ SchemaDef(name=schema_name, annotations=annotations["schema_annotation"])
279
+ )
225
280
 
226
281
  # Create workflow and execution table.
227
282
 
228
- schema.create_table(Table.define_vocabulary(MLVocab.feature_name, f"{project_name}:{{RID}}"))
229
- asset_type_table = schema.create_table(Table.define_vocabulary(MLVocab.asset_type, f"{project_name}:{{RID}}"))
230
- asset_role_table = schema.create_table(Table.define_vocabulary(MLVocab.asset_role, f"{project_name}:{{RID}}"))
283
+ schema.create_table(
284
+ VocabularyTableDef(name=MLVocab.feature_name, curie_template=f"{project_name}:{{RID}}")
285
+ )
286
+ asset_type_table = schema.create_table(
287
+ VocabularyTableDef(name=MLVocab.asset_type, curie_template=f"{project_name}:{{RID}}")
288
+ )
289
+ asset_role_table = schema.create_table(
290
+ VocabularyTableDef(name=MLVocab.asset_role, curie_template=f"{project_name}:{{RID}}")
291
+ )
231
292
 
232
293
  create_workflow_table(schema, annotations["workflow_annotation"])
233
294
  execution_table = create_execution_table(schema, annotations["execution_annotation"])
@@ -300,6 +361,14 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
300
361
  "Name": "Runtime_Env",
301
362
  "Description": "Information about the runtime environment",
302
363
  },
364
+ {
365
+ "Name": "Hydra_Config",
366
+ "Description": "Hydra YAML configuration file (config.yaml, overrides.yaml, hydra.yaml)",
367
+ },
368
+ {
369
+ "Name": "Deriva_Config",
370
+ "Description": "DerivaML execution configuration (configuration.json with datasets, assets, workflow)",
371
+ },
303
372
  {
304
373
  "Name": "Execution_Metadata",
305
374
  "Description": "Information about the execution environment",
@@ -335,7 +404,28 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
335
404
  )
336
405
 
337
406
 
338
- def create_ml_catalog(hostname: str, project_name: str) -> ErmrestCatalog:
407
+ def create_ml_catalog(
408
+ hostname: str,
409
+ project_name: str,
410
+ catalog_alias: str | None = None,
411
+ ) -> ErmrestCatalog:
412
+ """Create a new DerivaML catalog with all ML schema tables.
413
+
414
+ Args:
415
+ hostname: Server hostname (e.g., "localhost", "www.eye-ai.org").
416
+ project_name: Name for the project, becomes the domain schema name.
417
+ catalog_alias: Optional alias name for the catalog. If provided, creates
418
+ an alias that points to the new catalog, allowing access via the
419
+ alias name instead of the numeric catalog ID.
420
+
421
+ Returns:
422
+ The created ErmrestCatalog instance.
423
+
424
+ Example:
425
+ # Create catalog with alias
426
+ catalog = create_ml_catalog("localhost", "my_project", catalog_alias="my-project")
427
+ # Now accessible as both /ermrest/catalog/<id> and /ermrest/catalog/my-project
428
+ """
339
429
  server = DerivaServer("https", hostname, credentials=get_credential(hostname))
340
430
  catalog = server.create_ermrest_catalog()
341
431
  model = catalog.getCatalogModel()
@@ -352,6 +442,16 @@ def create_ml_catalog(hostname: str, project_name: str) -> ErmrestCatalog:
352
442
  ]
353
443
  )
354
444
  create_ml_schema(catalog, project_name=project_name)
445
+
446
+ # Create alias if requested
447
+ if catalog_alias:
448
+ server.create_ermrest_alias(
449
+ id=catalog_alias,
450
+ alias_target=catalog.catalog_id,
451
+ name=project_name,
452
+ description=f"Alias for {project_name} catalog (ID: {catalog.catalog_id})",
453
+ )
454
+
355
455
  return catalog
356
456
 
357
457