deriva-ml 1.13.2__py3-none-any.whl → 1.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,256 @@
1
1
  import argparse
2
2
  import sys
3
3
 
4
+ from deriva.core.ermrest_model import Model, Table
4
5
  from deriva.core.utils.core_utils import tag as deriva_tags
5
6
  from ..deriva_model import DerivaModel
6
7
  from ..upload import bulk_upload_configuration
7
8
 
8
9
 
9
- def generate_annotation(model: DerivaModel) -> dict:
10
+ def catalog_annotation(model: DerivaModel) -> None:
11
+ """Set the annotations for a catalog.
12
+
13
+ This routine will dynamically walk the domain schema and create menu bar for the catalog based on the current
14
+ configuration. A side effect is that the annotation attribute of the catalog will be updated and the result
15
+ pushed to the catalog.
16
+
17
+
18
+ Args:
19
+ model: A deriva model to the current catalog.
20
+
21
+ """
22
+ catalog_id = model.catalog.catalog_id
23
+ ml_schema = model.ml_schema
24
+
25
+ catalog_annotation = {
26
+ deriva_tags.display: {"name_style": {"underline_space": True}},
27
+ deriva_tags.chaise_config: {
28
+ "headTitle": "Catalog ML",
29
+ "navbarBrandText": "ML Data Browser",
30
+ "systemColumnsDisplayEntry": ["RID"],
31
+ "systemColumnsDisplayCompact": ["RID"],
32
+ "defaultTable": {"table": "Dataset", "schema": "deriva-ml"},
33
+ "deleteRecord": True,
34
+ "showFaceting": True,
35
+ "shareCiteAcls": True,
36
+ "exportConfigsSubmenu": {"acls": {"show": ["*"], "enable": ["*"]}},
37
+ "resolverImplicitCatalog": False,
38
+ "navbarMenu": {
39
+ "newTab": False,
40
+ "children": [
41
+ {
42
+ "name": "User Info",
43
+ "children": [
44
+ {
45
+ "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_Client",
46
+ "name": "Users",
47
+ },
48
+ {
49
+ "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_Group",
50
+ "name": "Groups",
51
+ },
52
+ {
53
+ "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_RID_Lease",
54
+ "name": "ERMrest RID Lease",
55
+ },
56
+ ],
57
+ },
58
+ { # All the primary tables in deriva-ml schema.
59
+ "name": "Deriva-ML",
60
+ "children": [
61
+ {
62
+ "url": f"/chaise/recordset/#{catalog_id}/{ml_schema}:Workflow",
63
+ "name": "Workflow",
64
+ },
65
+ {
66
+ "url": f"/chaise/recordset/#{catalog_id}/{ml_schema}:Execution",
67
+ "name": "Execution",
68
+ },
69
+ {
70
+ "url": f"/chaise/recordset/#{catalog_id}/{ml_schema}:Execution_Metadata",
71
+ "name": "Execution Metadata",
72
+ },
73
+ {
74
+ "url": f"/chaise/recordset/#{catalog_id}/{ml_schema}:Execution_Asset",
75
+ "name": "Execution Asset",
76
+ },
77
+ {
78
+ "url": f"/chaise/recordset/#{catalog_id}/{ml_schema}:Dataset",
79
+ "name": "Dataset",
80
+ },
81
+ {
82
+ "url": f"/chaise/recordset/#{catalog_id}/{ml_schema}:Dataset_Version",
83
+ "name": "Dataset Version",
84
+ },
85
+ ],
86
+ },
87
+ { # All the primary tables in deriva-ml schema.
88
+ "name": "WWW",
89
+ "children": [
90
+ {
91
+ "url": f"/chaise/recordset/#{catalog_id}/WWW:Page",
92
+ "name": "Page",
93
+ },
94
+ {
95
+ "url": f"/chaise/recordset/#{catalog_id}/WWW:File",
96
+ "name": "File",
97
+ },
98
+ ],
99
+ },
100
+ {
101
+ "name": model.domain_schema,
102
+ "children": [
103
+ {
104
+ "name": tname,
105
+ "url": f"/chaise/recordset/#{catalog_id}/{model.domain_schema}:{tname}",
106
+ }
107
+ for tname in model.schemas[model.domain_schema].tables
108
+ # Don't include controlled vocabularies, association tables, or feature tables.
109
+ if not (
110
+ model.is_vocabulary(tname)
111
+ or model.is_association(tname, pure=False, max_arity=3)
112
+ )
113
+ ],
114
+ },
115
+ { # Vocabulary menu which will list all the controlled vocabularies in deriva-ml and domain.
116
+ "name": "Vocabulary",
117
+ "children": [
118
+ {"name": f"{ml_schema} Vocabularies", "header": True}
119
+ ]
120
+ + [
121
+ {
122
+ "url": f"/chaise/recordset/#{catalog_id}/{ml_schema}:{tname}",
123
+ "name": tname,
124
+ }
125
+ for tname in model.schemas[model.ml_schema].tables
126
+ if model.is_vocabulary(tname)
127
+ ]
128
+ + [
129
+ {
130
+ "name": f"{model.domain_schema} Vocabularies",
131
+ "header": True,
132
+ }
133
+ ]
134
+ + [
135
+ {
136
+ "url": f"/chaise/recordset/#{catalog_id}/{model.domain_schema}:{tname}",
137
+ "name": tname,
138
+ }
139
+ for tname in model.schemas[model.domain_schema].tables
140
+ if model.is_vocabulary(tname)
141
+ ],
142
+ },
143
+ { # List of all of the asset tables in deriva-ml and domain schemas.
144
+ "name": "Assets",
145
+ "children": [
146
+ {
147
+ "url": f"/chaise/recordset/#{catalog_id}/{ml_schema}:{tname}",
148
+ "name": tname,
149
+ }
150
+ for tname in model.schemas[model.ml_schema].tables
151
+ if model.is_asset(tname)
152
+ ]
153
+ + [
154
+ {
155
+ "url": f"/chaise/recordset/#{catalog_id}/{model.domain_schema}:{tname}",
156
+ "name": tname,
157
+ }
158
+ for tname in model.schemas[model.domain_schema].tables
159
+ if model.is_asset(tname)
160
+ ],
161
+ },
162
+ {
163
+ "url": "/chaise/recordset/#0/ermrest:registry@sort(RID)",
164
+ "name": "Catalog Registry",
165
+ },
166
+ {
167
+ "name": "Documentation",
168
+ "children": [
169
+ {
170
+ "url": "https://github.com/informatics-isi-edu/deriva-ml/blob/main/docs/ml_workflow_instruction.md",
171
+ "name": "ML Notebook Instruction",
172
+ },
173
+ {
174
+ "url": "https://informatics-isi-edu.github.io/deriva-ml/",
175
+ "name": "Deriva-ML Documentation",
176
+ },
177
+ ],
178
+ },
179
+ ],
180
+ },
181
+ },
182
+ deriva_tags.bulk_upload: bulk_upload_configuration(model=model),
183
+ }
184
+ model.annotations.update(catalog_annotation)
185
+ model.apply()
186
+
187
+
188
+ def asset_annotation(asset_table: Table):
189
+ schema = asset_table.schema.name
190
+ asset_name = asset_table.name
191
+ model = DerivaModel(asset_table.schema.model)
192
+
193
+ def fkey_column(column):
194
+ """Map the column name to a FK if a constraint exists on the column"""
195
+ return next(
196
+ (
197
+ (fk.name[0].name, fk.name[1])
198
+ for fk in asset_table.foreign_keys
199
+ if asset_table.columns[column] in fk.column_map
200
+ ),
201
+ column,
202
+ )
203
+
204
+ annotations = {
205
+ deriva_tags.table_display: {
206
+ "row_name": {"row_markdown_pattern": "{{{Filename}}}"}
207
+ },
208
+ deriva_tags.visible_columns: {
209
+ "*": [
210
+ "RID",
211
+ "RCT",
212
+ "RMT",
213
+ [schema, f"{asset_name}_RCB_fkey"],
214
+ [schema, f"{asset_name}_RMB_fkey"],
215
+ "URL",
216
+ "Filename",
217
+ "Description",
218
+ "Length",
219
+ "MD5",
220
+ {
221
+ "source": [
222
+ {
223
+ "inbound": [
224
+ schema,
225
+ f"{asset_name}_Asset_Type_{asset_name}_fkey",
226
+ ]
227
+ },
228
+ {
229
+ "outbound": [
230
+ schema,
231
+ f"{asset_name}_Asset_Type_Asset_Type_fkey",
232
+ ]
233
+ },
234
+ "RID",
235
+ ],
236
+ "markdown_name": "Asset Types",
237
+ },
238
+ ]
239
+ + [fkey_column(c) for c in model.asset_metadata(asset_table)],
240
+ },
241
+ }
242
+ asset_table.annotations.update(annotations)
243
+ model.apply()
244
+
245
+
246
+ def generate_annotation(model: Model, schema: str) -> dict:
10
247
  catalog_id = model.catalog.catalog_id
11
- schema = model.ml_schema
12
248
  workflow_annotation = {
13
249
  deriva_tags.visible_columns: {
14
250
  "*": [
15
251
  "RID",
252
+ [schema, "Workflow_RCB_fkey"],
253
+ [schema, "Workflow_RMB_fkey"],
16
254
  "Name",
17
255
  "Description",
18
256
  {
@@ -36,6 +274,7 @@ def generate_annotation(model: DerivaModel) -> dict:
36
274
  "*": [
37
275
  "RID",
38
276
  [schema, "Execution_RCB_fkey"],
277
+ [schema, "Execution_RMB_fkey"],
39
278
  "RCT",
40
279
  "Description",
41
280
  {"source": [{"outbound": [schema, "Execution_Workflow_fkey"]}, "RID"]},
@@ -83,155 +322,131 @@ def generate_annotation(model: DerivaModel) -> dict:
83
322
  },
84
323
  }
85
324
 
86
- execution_asset_annotation = {
87
- deriva_tags.table_display: {
88
- "row_name": {"row_markdown_pattern": "{{{Filename}}}"}
89
- },
325
+ dataset_annotation = {
90
326
  deriva_tags.visible_columns: {
91
- "compact": [
327
+ "*": [
92
328
  "RID",
93
- "URL",
94
329
  "Description",
95
- "Length",
96
- [schema, "Execution_Asset_Execution_Asset_Type_fkey"],
97
- # {
98
- # "display": {
99
- # "template_engine": "handlebars",
100
- # "markdown_pattern": "{{#if (eq _Execution_Asset_Type \"2-5QME\")}}\n ::: iframe []("
101
- # "https://dev.eye-ai.org/~vivi/deriva-webapps/plot/?config=test-line"
102
- # "-plot&Execution_Asset_RID={{{RID}}}){class=chaise-autofill "
103
- # "style=\"min-width: 500px; min-height: 300px;\"} \\n:::\n {{/if}}"
104
- # },
105
- # "markdown_name": "ROC Plot"
106
- # }
330
+ [schema, "Dataset_RCB_fkey"],
331
+ [schema, "Dataset_RMB_fkey"],
332
+ {
333
+ "source": [
334
+ {"outbound": ["deriva-ml", "Dataset_Version_fkey"]},
335
+ "Version",
336
+ ],
337
+ "markdown_name": "Dataset Version",
338
+ },
107
339
  ],
108
340
  "detailed": [
109
341
  "RID",
110
- "RCT",
111
- "RMT",
112
- "RCB",
113
- "RMB",
114
- # {
115
- # "display": {
116
- # "template_engine": "handlebars",
117
- # "markdown_pattern": "{{#if (eq _Execution_Asset_Type \"2-5QME\")}} ::: iframe []("
118
- # "https://dev.eye-ai.org/~vivi/deriva-webapps/plot/?config=test-line"
119
- # "-plot&Execution_Asset_RID={{{RID}}}){style=\"min-width:1000px; "
120
- # "min-height:700px; height:70vh;\" class=\"chaise-autofill\"} \\n::: {"
121
- # "{/if}}"
122
- # },
123
- # "markdown_name": "ROC Plot"
124
- # },
125
- "URL",
126
- "Filename",
127
342
  "Description",
128
- "Length",
129
- "MD5",
130
- [schema, "Execution_Asset_Execution_Asset_Type_fkey"],
343
+ {
344
+ "source": [
345
+ {"inbound": ["deriva-ml", "Dataset_Dataset_Type_Dataset_fkey"]},
346
+ {
347
+ "outbound": [
348
+ "deriva-ml",
349
+ "Dataset_Dataset_Type_Dataset_Type_fkey",
350
+ ]
351
+ },
352
+ "RID",
353
+ ],
354
+ "markdown_name": "Dataset Types",
355
+ },
356
+ {
357
+ "source": [
358
+ {"outbound": ["deriva-ml", "Dataset_Version_fkey"]},
359
+ "Version",
360
+ ],
361
+ "markdown_name": "Dataset Version",
362
+ },
363
+ [schema, "Dataset_RCB_fkey"],
364
+ [schema, "Dataset_RMB_fkey"],
131
365
  ],
132
- },
133
- }
134
-
135
- execution_metadata_annotation = {
136
- deriva_tags.table_display: {
137
- "row_name": {"row_markdown_pattern": "{{{Filename}}}"}
138
- }
139
- }
140
-
141
- dataset_annotation = {
142
- # Setup Facet on types
143
- # Make types in visible columns
144
- # Have all connected values be visible FK.
145
- }
146
-
147
- schema_annotation = {
148
- "name_style": {"underline_space": True},
149
- }
150
-
151
- catalog_annotation = {
152
- deriva_tags.display: {"name_style": {"underline_space": True}},
153
- deriva_tags.chaise_config: {
154
- "headTitle": "Catalog ML",
155
- "navbarBrandText": "ML Data Browser",
156
- "systemColumnsDisplayEntry": ["RID"],
157
- "systemColumnsDisplayCompact": ["RID"],
158
- "navbarMenu": {
159
- "newTab": False,
160
- "children": [
366
+ "filter": {
367
+ "and": [
368
+ {"source": "RID"},
369
+ {"source": "Description"},
161
370
  {
162
- "name": "User Info",
163
- "children": [
371
+ "source": [
164
372
  {
165
- "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_Client",
166
- "name": "Users",
373
+ "inbound": [
374
+ "deriva-ml",
375
+ "Dataset_Dataset_Type_Dataset_fkey",
376
+ ]
167
377
  },
168
378
  {
169
- "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_Group",
170
- "name": "Groups",
171
- },
172
- {
173
- "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_RID_Lease",
174
- "name": "ERMrest RID Lease",
379
+ "outbound": [
380
+ "deriva-ml",
381
+ "Dataset_Dataset_Type_Dataset_Type_fkey",
382
+ ]
175
383
  },
384
+ "RID",
176
385
  ],
386
+ "markdown_name": "Dataset Types",
177
387
  },
178
388
  {
179
- "name": "Deriva-ML",
180
- "children": [
181
- {
182
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Workflow",
183
- "name": "Workflow",
184
- },
185
- {
186
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Workflow_Type",
187
- "name": "Workflow Type",
188
- },
189
- {
190
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution",
191
- "name": "Execution",
192
- },
193
- {
194
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Metadata",
195
- "name": "Execution Metadata",
196
- },
197
- {
198
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Metadata_Type",
199
- "name": "Execution Metadata Type",
200
- },
201
- {
202
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Asset",
203
- "name": "Execution Asset",
204
- },
205
- {
206
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Asset_Type",
207
- "name": "Execution Asset Type",
208
- },
209
- {
210
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Dataset",
211
- "name": "Dataset",
212
- },
213
- ],
389
+ "source": [{"outbound": [schema, "Dataset_RCB_fkey"]}, "RID"],
390
+ "markdown_name": "Created By",
214
391
  },
215
- ],
392
+ {
393
+ "source": [{"outbound": [schema, "Dataset_RMB_fkey"]}, "RID"],
394
+ "markdown_name": "Modified By",
395
+ },
396
+ ]
216
397
  },
217
- "defaultTable": {"table": "Dataset", "schema": "deriva-ml"},
218
- "deleteRecord": True,
219
- "showFaceting": True,
220
- "shareCiteAcls": True,
221
- "exportConfigsSubmenu": {"acls": {"show": ["*"], "enable": ["*"]}},
222
- "resolverImplicitCatalog": catalog_id,
398
+ }
399
+ }
400
+
401
+ schema_annotation = {
402
+ "name_style": {"underline_space": True},
403
+ }
404
+
405
+ dataset_version_annotation = {
406
+ deriva_tags.visible_columns: {
407
+ "*": [
408
+ "RID",
409
+ "RCT",
410
+ "RMT",
411
+ [schema, "Dataset_Version_RCB_fkey"],
412
+ [schema, "Dataset_Version_RMB_fkey"],
413
+ {
414
+ "source": [
415
+ {"outbound": [schema, "Dataset_Version_Dataset_fkey"]},
416
+ "RID",
417
+ ]
418
+ },
419
+ "Description",
420
+ {
421
+ "display": {
422
+ "template_engine": "handlebars",
423
+ "markdown_pattern": "[{{{Version}}}](https://{{{$location.host}}}/id/{{{$catalog.id}}}/{{{Dataset}}}@{{{Snapshot}}})",
424
+ },
425
+ "markdown_name": "Version",
426
+ },
427
+ "Minid",
428
+ {
429
+ "source": [
430
+ {"outbound": [schema, "Dataset_Version_Execution_fkey"]},
431
+ "RID",
432
+ ]
433
+ },
434
+ ]
435
+ },
436
+ deriva_tags.visible_foreign_keys: {"*": []},
437
+ deriva_tags.table_display: {
438
+ "row_name": {
439
+ "row_markdown_pattern": "{{{$fkey_deriva-ml_Dataset_Version_Dataset_fkey.RID}}}:{{{Version}}}"
440
+ }
223
441
  },
224
- deriva_tags.bulk_upload: bulk_upload_configuration(model=DerivaModel(model)),
225
442
  }
226
443
 
227
444
  return {
228
445
  "workflow_annotation": workflow_annotation,
229
446
  "dataset_annotation": dataset_annotation,
230
447
  "execution_annotation": execution_annotation,
231
- "execution_asset_annotation": execution_asset_annotation,
232
- "execution_metadata_annotation": execution_metadata_annotation,
233
448
  "schema_annotation": schema_annotation,
234
- "catalog_annotation": catalog_annotation,
449
+ "dataset_version_annotation": dataset_version_annotation,
235
450
  }
236
451
 
237
452
 
@@ -240,7 +455,7 @@ def main():
240
455
  parser.add_argument("--catalog_id", type=str, required=True)
241
456
  parser.add_argument("--schema_name", type=str, required=True)
242
457
  args = parser.parse_args()
243
- generate_annotation(args.catalog_id)
458
+ generate_annotation(args.catalog_id, args.schema_name)
244
459
 
245
460
 
246
461
  if __name__ == "__main__":