deriva-ml 1.10.1__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,7 @@
1
+ """
2
+ Classes that are used to define an execution configuration.
3
+ """
4
+
1
5
  from __future__ import annotations
2
6
 
3
7
  import json
deriva_ml/feature.py CHANGED
@@ -25,12 +25,11 @@ class FeatureRecord(BaseModel):
25
25
  """
26
26
 
27
27
  # model_dump of this feature should be compatible with feature table columns.
28
- Execution: str
28
+ Execution: Optional[str] = None
29
29
  Feature_Name: str
30
30
  feature: ClassVar[Optional["Feature"]] = None
31
31
 
32
32
  class Config:
33
-
34
33
  arbitrary_types_allowed = True
35
34
 
36
35
  @classmethod
@@ -1,6 +1,6 @@
1
1
  import argparse
2
2
  import sys
3
- from typing import Optional
3
+ from typing import Optional, Any
4
4
 
5
5
  from deriva.core import DerivaServer, get_credential
6
6
  from deriva.core.ermrest_model import Model
@@ -19,28 +19,48 @@ from deriva_ml.schema_setup.annotations import generate_annotation
19
19
  from deriva_ml.deriva_model import DerivaModel
20
20
 
21
21
 
22
- def define_table_workflow(workflow_annotation: dict):
23
- return Table.define(
24
- "Workflow",
25
- column_defs=[
26
- Column.define("Name", builtin_types.text),
27
- Column.define("Description", builtin_types.markdown),
28
- Column.define("URL", builtin_types.ermrest_uri),
29
- Column.define("Checksum", builtin_types.text),
30
- Column.define("Version", builtin_types.text),
31
- ],
32
- annotations=workflow_annotation,
22
+ def create_dataset_table(
23
+ schema: Schema,
24
+ execution_table: Table,
25
+ project_name: str,
26
+ dataset_annotation: Optional[dict] = None,
27
+ ):
28
+ dataset_table = schema.create_table(
29
+ Table.define(
30
+ tname="Dataset",
31
+ column_defs=[
32
+ Column.define("Description", builtin_types.markdown),
33
+ Column.define("Deleted", builtin_types.boolean),
34
+ ],
35
+ annotations=dataset_annotation if dataset_annotation is not None else {},
36
+ )
33
37
  )
34
38
 
39
+ dataset_type = schema.create_table(
40
+ Table.define_vocabulary(MLVocab.dataset_type, f"{project_name}:{{RID}}")
41
+ )
42
+ schema.create_table(
43
+ Table.define_association(
44
+ associates=[
45
+ ("Dataset", dataset_table),
46
+ (MLVocab.dataset_type, dataset_type),
47
+ ]
48
+ )
49
+ )
35
50
 
36
- def define_table_dataset(dataset_annotation: Optional[dict] = None):
37
- return Table.define(
38
- tname="Dataset",
39
- column_defs=[
40
- Column.define("Description", builtin_types.markdown),
41
- Column.define("Deleted", builtin_types.boolean),
42
- ],
43
- annotations=dataset_annotation if dataset_annotation is not None else {},
51
+ dataset_version = schema.create_table(define_table_dataset_version(schema.name))
52
+ dataset_table.create_reference(("Version", True, dataset_version))
53
+
54
+ # Nested datasets.
55
+ schema.create_table(
56
+ Table.define_association(
57
+ associates=[("Dataset", dataset_table), ("Nested_Dataset", dataset_table)]
58
+ )
59
+ )
60
+ schema.create_table(
61
+ Table.define_association(
62
+ associates=[("Dataset", dataset_table), ("Execution", execution_table)]
63
+ )
44
64
  )
45
65
 
46
66
 
@@ -66,92 +86,120 @@ def define_table_dataset_version(sname: str):
66
86
  )
67
87
 
68
88
 
69
- def define_table_execution(sname: str, execution_annotation: dict):
70
- table_def = Table.define(
71
- "Execution",
72
- column_defs=[
73
- Column.define("Workflow", builtin_types.text),
74
- Column.define("Description", builtin_types.markdown),
75
- Column.define("Duration", builtin_types.text),
76
- Column.define("Status", builtin_types.text),
77
- Column.define("Status_Detail", builtin_types.text),
78
- ],
79
- fkey_defs=[ForeignKey.define(["Workflow"], sname, "Workflow", ["RID"])],
80
- annotations=execution_annotation,
89
+ def create_execution_table(schema, annotation: Optional[dict] = None):
90
+ annotation = annotation if annotation is not None else {}
91
+ execution = schema.create_table(
92
+ Table.define(
93
+ "Execution",
94
+ column_defs=[
95
+ Column.define("Workflow", builtin_types.text),
96
+ Column.define("Description", builtin_types.markdown),
97
+ Column.define("Duration", builtin_types.text),
98
+ Column.define("Status", builtin_types.text),
99
+ Column.define("Status_Detail", builtin_types.text),
100
+ ],
101
+ fkey_defs=[
102
+ ForeignKey.define(["Workflow"], schema.name, "Workflow", ["RID"])
103
+ ],
104
+ annotations=annotation,
105
+ )
81
106
  )
82
- return table_def
107
+ return execution
83
108
 
84
109
 
85
- def define_asset_execution_metadata(sname: str, execution_metadata_annotation: dict):
86
- return Table.define_asset(
87
- sname=sname,
88
- tname="Execution_Metadata",
89
- hatrac_template="/hatrac/metadata/{{MD5}}.{{Filename}}",
90
- annotations=execution_metadata_annotation,
110
+ def create_asset_table(
111
+ schema,
112
+ asset_name: str,
113
+ execution_table,
114
+ asset_type_table,
115
+ asset_role_table,
116
+ annotation: Optional[dict] = None,
117
+ ):
118
+ annotation = annotation if annotation is not None else {}
119
+ asset_table = schema.create_table(
120
+ Table.define_asset(
121
+ sname=schema.name,
122
+ tname=asset_name,
123
+ hatrac_template="/hatrac/metadata/{{MD5}}.{{Filename}}",
124
+ annotations=annotation,
125
+ )
126
+ )
127
+ atable = schema.create_table(
128
+ Table.define_association(
129
+ [
130
+ (asset_name, asset_table),
131
+ ("Asset_Type", asset_type_table),
132
+ ],
133
+ )
91
134
  )
92
135
 
93
-
94
- def define_asset_execution_asset(sname: str, execution_asset_annotation: dict):
95
- table_def = Table.define_asset(
96
- sname=sname,
97
- tname="Execution_Asset",
98
- hatrac_template="/hatrac/execution_asset/{{MD5}}.{{Filename}}",
99
- annotations=execution_asset_annotation,
136
+ atable = schema.create_table(
137
+ Table.define_association(
138
+ [
139
+ (asset_name, asset_table),
140
+ ("Execution", execution_table),
141
+ ],
142
+ )
100
143
  )
101
- return table_def
144
+ atable.create_reference(asset_role_table)
145
+ return asset_table
102
146
 
103
147
 
104
- def define_table_file(sname):
148
+ def create_file_table(
149
+ schema: Schema,
150
+ execution_table: Table,
151
+ project_name: str,
152
+ annotation: Optional[dict] = None,
153
+ ):
105
154
  """Define files table structure"""
106
- return Table.define_asset(
107
- sname=sname,
108
- tname="File",
155
+ annotation = annotation or {}
156
+ file_table = schema.create_table(
157
+ Table.define_asset(sname=schema.name, tname="File")
109
158
  )
110
159
 
160
+ file_type = schema.create_table(
161
+ Table.define_vocabulary(MLVocab.file_type, f"{project_name}:{{RID}}")
162
+ )
111
163
 
112
- def create_www_schema(model: Model):
113
- """
114
- Set up a new schema and tables to hold web-page like content. The tables include a page table, and an asset
115
- table that can have images that are referred to by the web page. Pages are written using markdown.
116
- :return:
117
- """
118
- if model.schemas.get("www"):
119
- model.schemas["www"].drop(cascade=True)
120
- www_schema = model.create_schema(
121
- Schema.define(
122
- "www", comment="Schema for tables that will be displayed as web content"
164
+ schema.create_table(
165
+ Table.define_association(
166
+ associates=[
167
+ ("File", file_table),
168
+ (MLVocab.file_type, file_type),
169
+ ]
123
170
  )
124
171
  )
125
- www_schema.create_table(
172
+ schema.create_table(
173
+ Table.define_association(
174
+ [
175
+ ("File", file_table),
176
+ ("Execution", execution_table),
177
+ ]
178
+ )
179
+ )
180
+
181
+
182
+ def create_workflow_table(schema: Schema, annotations: Optional[dict[str, Any]] = None):
183
+ annotations = annotations or {}
184
+ workflow_table = schema.create_table(
126
185
  Table.define(
127
- "Page",
186
+ "Workflow",
128
187
  column_defs=[
129
- Column.define(
130
- "Title",
131
- builtin_types.text,
132
- nullok=False,
133
- comment="Unique title for the page",
134
- ),
135
- Column.define(
136
- "Content",
137
- builtin_types.markdown,
138
- comment="Content of the page in markdown",
139
- ),
188
+ Column.define("Name", builtin_types.text),
189
+ Column.define("Description", builtin_types.markdown),
190
+ Column.define("URL", builtin_types.ermrest_uri),
191
+ Column.define("Checksum", builtin_types.text),
192
+ Column.define("Version", builtin_types.text),
140
193
  ],
141
- key_defs=[Key.define(["Title"])],
142
- annotations={
143
- chaise_tags.table_display: {
144
- "detailed": {
145
- "hide_column_headers": True,
146
- "collapse_toc_panel": True,
147
- }
148
- },
149
- chaise_tags.visible_foreign_keys: {"detailed": {}},
150
- chaise_tags.visible_columns: {"detailed": ["Content"]},
151
- },
194
+ annotations=annotations,
152
195
  )
153
196
  )
154
- return www_schema
197
+ workflow_table.create_reference(
198
+ schema.create_table(
199
+ Table.define_vocabulary(MLVocab.workflow_type, f"{schema.name}:{{RID}}")
200
+ )
201
+ )
202
+ return workflow_table
155
203
 
156
204
 
157
205
  def create_ml_schema(
@@ -182,124 +230,100 @@ def create_ml_schema(
182
230
  Schema.define(schema_name, annotations=annotations["schema_annotation"])
183
231
  )
184
232
  project_name = project_name or schema_name
185
- # Workflow
186
- schema.create_table(
187
- Table.define_vocabulary("Feature_Name", f"{project_name}:{{RID}}")
188
- )
189
-
190
- workflow_table = schema.create_table(
191
- define_table_workflow(annotations["workflow_annotation"])
192
- )
193
- workflow_table.create_reference(
194
- schema.create_table(
195
- Table.define_vocabulary(MLVocab.workflow_type, f"{schema_name}:{{RID}}")
196
- )
197
- )
198
233
 
199
- execution_table = schema.create_table(
200
- define_table_execution(schema_name, annotations["execution_annotation"])
201
- )
234
+ # Create workflow and execution table.
202
235
 
203
- dataset_table = schema.create_table(
204
- define_table_dataset(annotations["dataset_annotation"])
205
- )
206
- dataset_type = schema.create_table(
207
- Table.define_vocabulary(MLVocab.dataset_type, f"{project_name}:{{RID}}")
208
- )
209
236
  schema.create_table(
210
- Table.define_association(
211
- associates=[
212
- ("Dataset", dataset_table),
213
- (MLVocab.dataset_type, dataset_type),
214
- ]
215
- )
237
+ Table.define_vocabulary("Feature_Name", f"{project_name}:{{RID}}")
216
238
  )
217
- schema.create_table(
218
- Table.define_association(
219
- associates=[("Dataset", dataset_table), ("Execution", execution_table)]
220
- )
239
+ asset_type_table = schema.create_table(
240
+ Table.define_vocabulary("Asset_Type", f"{project_name}:{{RID}}")
221
241
  )
222
-
223
- dataset_version = schema.create_table(define_table_dataset_version(schema_name))
224
- dataset_table.create_reference(("Version", True, dataset_version))
225
-
226
- # Nested datasets.
227
- schema.create_table(
228
- Table.define_association(
229
- associates=[("Dataset", dataset_table), ("Nested_Dataset", dataset_table)]
230
- )
242
+ asset_role_table = schema.create_table(
243
+ Table.define_vocabulary("Asset_Role", f"{project_name}:{{RID}}")
231
244
  )
232
245
 
233
- # Execution Metadata
234
- execution_metadata_table = schema.create_table(
235
- define_asset_execution_metadata(
236
- schema.name, annotations["execution_metadata_annotation"]
237
- )
246
+ create_workflow_table(schema, annotations["workflow_annotation"])
247
+ execution_table = create_execution_table(
248
+ schema, annotations["execution_annotation"]
238
249
  )
239
- execution_metadata_table.create_reference(
240
- schema.create_table(
241
- Table.define_vocabulary(
242
- "Execution_Metadata_Type", f"{project_name}:{{RID}}"
243
- )
244
- )
245
- )
246
- schema.create_table(
247
- Table.define_association(
248
- [
249
- ("Execution_Metadata", execution_metadata_table),
250
- ("Execution", execution_table),
251
- ]
252
- )
250
+ create_dataset_table(
251
+ schema, execution_table, project_name, annotations["dataset_annotation"]
253
252
  )
254
253
 
255
- # Execution Asset
256
- execution_asset_table = schema.create_table(
257
- define_asset_execution_asset(
258
- schema.name, annotations["execution_asset_annotation"]
259
- )
260
- )
261
- execution_asset_table.create_reference(
262
- schema.create_table(
263
- Table.define_vocabulary("Execution_Asset_Type", f"{project_name}:{{RID}}")
264
- )
254
+ create_asset_table(
255
+ schema,
256
+ "Execution_Metadata",
257
+ execution_table,
258
+ asset_type_table,
259
+ asset_role_table,
260
+ annotations["execution_metadata_annotation"],
265
261
  )
266
- schema.create_table(
267
- Table.define_association(
268
- [("Execution_Asset", execution_asset_table), ("Execution", execution_table)]
269
- )
262
+ create_asset_table(
263
+ schema,
264
+ "Execution_Asset",
265
+ execution_table,
266
+ asset_type_table,
267
+ asset_role_table,
268
+ annotations["execution_asset_annotation"],
270
269
  )
271
270
 
272
271
  # File table
273
- file_table = schema.create_table(define_table_file(schema_name))
274
- file_type = schema.create_table(
275
- Table.define_vocabulary(MLVocab.file_type, f"{project_name}:{{RID}}")
276
- )
277
- schema.create_table(
278
- Table.define_association(
279
- associates=[
280
- ("File", file_table),
281
- (MLVocab.file_type, file_type),
282
- ]
272
+ create_file_table(schema, execution_table, project_name)
273
+
274
+ create_www_schema(model)
275
+ initialize_ml_schema(model, schema_name)
276
+
277
+
278
+ def create_www_schema(model: Model):
279
+ """
280
+ Set up a new schema and tables to hold web-page like content. The tables include a page table, and an asset
281
+ table that can have images that are referred to by the web page. Pages are written using markdown.
282
+ :return:
283
+ """
284
+ if model.schemas.get("www"):
285
+ model.schemas["www"].drop(cascade=True)
286
+ www_schema = model.create_schema(
287
+ Schema.define(
288
+ "www", comment="Schema for tables that will be displayed as web content"
283
289
  )
284
290
  )
285
- schema.create_table(
286
- Table.define_association(
287
- [
288
- ("File", file_table),
289
- ("Execution", execution_table),
290
- ]
291
+ www_schema.create_table(
292
+ Table.define(
293
+ "Page",
294
+ column_defs=[
295
+ Column.define(
296
+ "Title",
297
+ builtin_types.text,
298
+ nullok=False,
299
+ comment="Unique title for the page",
300
+ ),
301
+ Column.define(
302
+ "Content",
303
+ builtin_types.markdown,
304
+ comment="Content of the page in markdown",
305
+ ),
306
+ ],
307
+ key_defs=[Key.define(["Title"])],
308
+ annotations={
309
+ chaise_tags.table_display: {
310
+ "detailed": {
311
+ "hide_column_headers": True,
312
+ "collapse_toc_panel": True,
313
+ }
314
+ },
315
+ chaise_tags.visible_foreign_keys: {"detailed": {}},
316
+ chaise_tags.visible_columns: {"detailed": ["Content"]},
317
+ },
291
318
  )
292
319
  )
293
- create_www_schema(model)
294
- initialize_ml_schema(model, schema_name)
320
+ return www_schema
295
321
 
296
322
 
297
323
  def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
298
324
  catalog = model.catalog
299
- execution_metadata_type = (
300
- catalog.getPathBuilder().schemas[schema_name].tables["Execution_Metadata_Type"]
301
- )
302
- execution_metadata_type.insert(
325
+ asset_type = catalog.getPathBuilder().schemas[schema_name].tables["Asset_Type"]
326
+ asset_type.insert(
303
327
  [
304
328
  {
305
329
  "Name": "Execution_Config",
@@ -309,6 +333,22 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
309
333
  "Name": "Runtime_Env",
310
334
  "Description": "Information about the execution environment",
311
335
  },
336
+ {
337
+ "Name": "Execution_Metadata",
338
+ "Description": "Information about the execution environment",
339
+ },
340
+ {
341
+ "Name": "Execution_Asset",
342
+ "Description": "A file generated by an execution",
343
+ },
344
+ ],
345
+ defaults={"ID", "URI"},
346
+ )
347
+ asset_role = catalog.getPathBuilder().schemas[schema_name].tables["Asset_Role"]
348
+ asset_role.insert(
349
+ [
350
+ {"Name": "Input", "Description": "Asset used for input of an execution."},
351
+ {"Name": "Output", "Description": "Asset used for output of an execution."},
312
352
  ],
313
353
  defaults={"ID", "URI"},
314
354
  )