deriva-ml 1.8.10__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deriva_ml/execution.py CHANGED
@@ -54,7 +54,9 @@ except ImportError: # Graceful fallback if IceCream isn't installed.
54
54
  try:
55
55
  from jupyter_server.serverapp import list_running_servers
56
56
  except ImportError:
57
- list_running_servers = lambda: []
57
+
58
+ def list_running_servers():
59
+ return []
58
60
 
59
61
 
60
62
  class Execution:
@@ -155,7 +157,6 @@ class Execution:
155
157
  self._initialize_execution(reload)
156
158
 
157
159
  def _save_runtime_environment(self):
158
-
159
160
  runtime_env_path = ExecMetadataVocab.runtime_env.value
160
161
  runtime_env_dir = self.execution_metadata_path(runtime_env_path)
161
162
  with NamedTemporaryFile(
@@ -253,17 +254,9 @@ class Execution:
253
254
 
254
255
  def _create_notebook_checkpoint(self):
255
256
  """Trigger a checkpoint creation using Jupyter's API."""
256
- notebook_name = self._ml_object._notebook
257
-
258
- # Look for the server running this notebook.
259
- root = Path("").absolute().parent.as_posix()
260
- servers = list(list_running_servers())
261
- # Jupyterhub seems to handle root_dir differently then server case.
262
- server = (
263
- servers
264
- if len(servers) == 1
265
- else [s for s in servers if s["root_dir"] == root]
266
- )[0]
257
+
258
+ server, session = self._ml_object._get_notebook_session()
259
+ notebook_name = session["notebook"]["path"]
267
260
  notebook_url = f"{server['url']}api/contents/{notebook_name}"
268
261
 
269
262
  # Get notebook content
@@ -275,7 +268,7 @@ class Execution:
275
268
  # Execution metadata cannot be in a directory, so map path into filename.
276
269
  checkpoint_path = (
277
270
  self.execution_metadata_path(ExecMetadataVocab.runtime_env.value)
278
- / f"{notebook_name.as_posix().replace('/','_')}.checkpoint"
271
+ / f"{notebook_name.replace('/', '_')}.checkpoint"
279
272
  )
280
273
  with open(checkpoint_path, "w", encoding="utf-8") as f:
281
274
  json.dump(notebook_content, f)
@@ -295,7 +288,7 @@ class Execution:
295
288
  minutes, seconds = divmod(remainder, 60)
296
289
  duration = f"{round(hours, 0)}H {round(minutes, 0)}min {round(seconds, 4)}sec"
297
290
 
298
- if self._ml_object._notebook:
291
+ if self._ml_object._is_notebook:
299
292
  self._create_notebook_checkpoint()
300
293
 
301
294
  self.update_status(Status.completed, "Algorithm execution ended.")
@@ -367,7 +360,7 @@ class Execution:
367
360
  if m := is_feature_asset_dir(p):
368
361
  try:
369
362
  self.update_status(
370
- Status.running, f'Uploading feature {m["feature_name"]}...'
363
+ Status.running, f"Uploading feature {m['feature_name']}..."
371
364
  )
372
365
  feature_assets[m["target_table"], m["feature_name"]] = (
373
366
  self._ml_object.upload_assets(p)
@@ -1,12 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
- from typing import Optional, Any
4
+ from typing import Optional
5
5
 
6
6
  from pydantic import (
7
7
  BaseModel,
8
8
  conlist,
9
- ConfigDict, field_validator,
9
+ ConfigDict,
10
10
  )
11
11
  from pathlib import Path
12
12
 
@@ -36,7 +36,6 @@ class Workflow(BaseModel):
36
36
  checksum: Optional[str]
37
37
 
38
38
 
39
-
40
39
  class ExecutionConfiguration(BaseModel):
41
40
  """Define the parameters that are used to configure a specific execution.
42
41
 
@@ -69,23 +68,21 @@ class ExecutionConfiguration(BaseModel):
69
68
  config = json.load(fd)
70
69
  return ExecutionConfiguration.model_validate(config)
71
70
 
72
- def download_execution_configuration(
73
- self, configuration_rid: RID
74
- ) -> ExecutionConfiguration:
75
- """Create an ExecutionConfiguration object from a catalog RID that points to a JSON representation of that
76
- configuration in hatrac
77
-
78
- Args:
79
- configuration_rid: RID that should be to an asset table that refers to an execution configuration
80
-
81
- Returns:
82
- A ExecutionConfiguration object for configured by the parameters in the configuration file.
83
- """
84
- AssertionError("Not Implemented")
85
- return ExecutionConfiguration.load_configuration(configuration_rid)
86
-
87
- # configuration = self.retrieve_rid(configuration_rid)
88
- # with NamedTemporaryFile("w+", delete=False, suffix=".json") as dest_file:
89
- # hs = HatracStore("https", self.host_name, self.credential)
90
- # hs.get_obj(path=configuration["URL"], destfilename=dest_file.name)
91
- # return ExecutionConfiguration.load_configuration(Path(dest_file.name))
71
+ # def download_execution_configuration(
72
+ # self, configuration_rid: RID
73
+ # ) -> ExecutionConfiguration:
74
+ # """Create an ExecutionConfiguration object from a catalog RID that points to a JSON representation of that
75
+ # configuration in hatrac
76
+ #
77
+ # Args:
78
+ # configuration_rid: RID that should be to an asset table that refers to an execution configuration
79
+ #
80
+ # Returns:
81
+ # A ExecutionConfiguration object for configured by the parameters in the configuration file.
82
+ # """
83
+ # AssertionError("Not Implemented")
84
+ # configuration = self.retrieve_rid(configuration_rid)
85
+ # with NamedTemporaryFile("w+", delete=False, suffix=".json") as dest_file:
86
+ # hs = HatracStore("https", self.host_name, self.credential)
87
+ # hs.get_obj(path=configuration["URL"], destfilename=dest_file.name)
88
+ # return ExecutionConfiguration.load_configuration(Path(dest_file.name))
@@ -240,7 +240,7 @@ def main():
240
240
  parser.add_argument("--catalog_id", type=str, required=True)
241
241
  parser.add_argument("--schema_name", type=str, required=True)
242
242
  args = parser.parse_args()
243
- generate_annotation(args.catalog_id, args.schema_name)
243
+ generate_annotation(args.catalog_id)
244
244
 
245
245
 
246
246
  if __name__ == "__main__":
@@ -1,5 +1,6 @@
1
1
  import argparse
2
2
  import sys
3
+ from typing import Optional
3
4
 
4
5
  from deriva.core import DerivaServer, get_credential
5
6
  from deriva.core.ermrest_model import Model
@@ -32,7 +33,7 @@ def define_table_workflow(workflow_annotation: dict):
32
33
  )
33
34
 
34
35
 
35
- def define_table_dataset(dataset_annotation: dict = None):
36
+ def define_table_dataset(dataset_annotation: Optional[dict] = None):
36
37
  return Table.define(
37
38
  tname="Dataset",
38
39
  column_defs=[
@@ -154,7 +155,7 @@ def create_www_schema(model: Model):
154
155
 
155
156
 
156
157
  def create_ml_schema(
157
- model: Model, schema_name: str = "deriva-ml", project_name: str = None
158
+ model: Model, schema_name: str = "deriva-ml", project_name: Optional[str] = None
158
159
  ):
159
160
  if model.schemas.get(schema_name):
160
161
  model.schemas[schema_name].drop(cascade=True)
deriva_ml/upload.py CHANGED
@@ -483,7 +483,7 @@ def upload_directory(
483
483
 
484
484
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
485
485
  def upload_asset(
486
- model: DerivaModel, file: Path | str, table: Table | str, **kwargs: Any
486
+ model: DerivaModel, file: Path | str, table: Table, **kwargs: Any
487
487
  ) -> dict:
488
488
  """Upload the specified file into Hatrac and update the associated asset table.
489
489
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.8.10
3
+ Version: 1.9.0
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -0,0 +1,27 @@
1
+ deriva_ml/__init__.py,sha256=r1Z9N5vtZkAET7emqhpAx2bf_xJUp5wHOc4_DIplsG8,1082
2
+ deriva_ml/database_model.py,sha256=HaJoxKSogc-xLGaZfEviqRAWO9wUy52h7yK8by6FKxM,14838
3
+ deriva_ml/dataset.py,sha256=XIXyTej55WduvEOGitG5SJIfPYrQu36cXjCoCNHNMwQ,60746
4
+ deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
5
+ deriva_ml/dataset_bag.py,sha256=aOJxFA9t5apjE5BNBrk8Pi9R1Cp8AWnnaL-10P8ELrQ,11515
6
+ deriva_ml/demo_catalog.py,sha256=zQAHWSvrVPxMg-vyRUqoC0Jj5RhfGjkBwXW3mBksLhA,10986
7
+ deriva_ml/deriva_definitions.py,sha256=jNiInYA2Cb1GE4OOT1CofxBygdLDSOmNsw5Wl6NbZQE,8943
8
+ deriva_ml/deriva_ml_base.py,sha256=B0_0R0tgx4o30VM-QSSKIGy2BN5kOBcYKuYGvmPkwMg,46953
9
+ deriva_ml/deriva_model.py,sha256=B4gwr3-92IQU-mEZlusgNEnRyulD96esWGS67q9MzHk,12024
10
+ deriva_ml/execution.py,sha256=on8hAtuZr9qFiyxuk_vDCmnRJ9Cv4kFOgHK4HY4CmV8,29585
11
+ deriva_ml/execution_configuration.py,sha256=vsdL31J09dz7CQDd2rYXIjyBPwNlgAWvrTqsXNWi82g,3357
12
+ deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
13
+ deriva_ml/feature.py,sha256=7e8WYPCfJSrGxJh9oUTduYSnB5ekybRhXa_0HIigS_w,5459
14
+ deriva_ml/history.py,sha256=qTDLDs8Ow_6r7mDO0gZm0Fg81SWKOAgtCU5pzZoDRgM,2828
15
+ deriva_ml/test_functions.py,sha256=-eqLHjjCQCLBNAr1ofbZekNiCOfMISSACRxT_YHER8I,4396
16
+ deriva_ml/upload.py,sha256=P35ViZzlNNbsXVbnTyq-G781nGHbX4md1tiCp3c_KKI,22264
17
+ deriva_ml/schema_setup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ deriva_ml/schema_setup/annotations.py,sha256=v0gTpmWYxRqsQ-bcnQzsr8WowGv2pi9pZUsO3WWnu1U,9528
19
+ deriva_ml/schema_setup/create_schema.py,sha256=BRdYeWW5I8HxuATkB1hkKuIw4n-JQu620xod7EQoVSE,10674
20
+ deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
21
+ deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
22
+ deriva_ml-1.9.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
+ deriva_ml-1.9.0.dist-info/METADATA,sha256=REDBcboXpGhYbG7bVaICPhZP81cDLoSiCdiY7PX8GrQ,669
24
+ deriva_ml-1.9.0.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
25
+ deriva_ml-1.9.0.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
26
+ deriva_ml-1.9.0.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
27
+ deriva_ml-1.9.0.dist-info/RECORD,,
File without changes
@@ -1,36 +0,0 @@
1
- import sys
2
- import argparse
3
- from deriva.core import DerivaServer, get_credential, ErmrestCatalog
4
- from deriva.core.ermrest_model import builtin_types, Schema, Table, Column, ForeignKey
5
- from annotation_temp import generate_annotation
6
-
7
-
8
- def alter_table_annotation(catalog, schema_name: str, table_name: str, annotation: dict):
9
- model_root = catalog.getCatalogModel()
10
- table = model_root.dataset_table(schema_name, table_name)
11
- table.alter(annotations=annotation)
12
-
13
-
14
- def alter_schema_annotation(catalog, schema_name: str, annotation: dict):
15
- model_root = catalog.getCatalogModel()
16
- schema = model_root.schemas[schema_name]
17
- schema.alter(annotations=annotation)
18
-
19
- def main():
20
- parser = argparse.ArgumentParser()
21
- parser.add_argument('--hostname', type=str, required=True)
22
- parser.add_argument('--schema_name', type=str, required=True)
23
- parser.add_argument('--catalog_id', type=str, required=True)
24
- args = parser.parse_args()
25
- credentials = get_credential(args.hostname)
26
- catalog = ErmrestCatalog('https', args.hostname, args.catalog_id, credentials)
27
-
28
- annotations = generate_annotation(args.schema_name, args.catalog_id)
29
- alter_table_annotation(catalog, args.schema_name, 'Workflow', annotations["workflow_annotation"])
30
- alter_table_annotation(catalog, args.schema_name, 'Execution', annotations["execution_annotation"])
31
- alter_table_annotation(catalog, args.schema_name, 'Execution_Metadata', annotations["execution_metadata_annotation"])
32
- alter_table_annotation(catalog, args.schema_name, 'Execution_Assets', annotations["execution_assets_annotation"])
33
- alter_schema_annotation(catalog, args.schema_name, annotations["schema_annotation"])
34
-
35
- if __name__ == "__main__":
36
- sys.exit(main())
@@ -1,255 +0,0 @@
1
- import argparse
2
- import sys
3
-
4
-
5
- def generate_annotation(catalog_id: str, schema: str) -> dict:
6
- workflow_annotation = {
7
- "tag:isrd.isi.edu,2016:visible-columns": {
8
- "*": [
9
- "RID",
10
- "Name",
11
- "Description",
12
- {
13
- "display": {"markdown_pattern": "[{{{URL}}}]({{{URL}}})"},
14
- "markdown_name": "URL"
15
- },
16
- "Checksum",
17
- "Version",
18
- {
19
- "source": [
20
- {
21
- "outbound": [
22
- schema,
23
- "Workflow_Workflow_Type_fkey"
24
- ]
25
- },
26
- "RID"
27
- ]
28
- }
29
- ]
30
- }
31
- }
32
-
33
- execution_annotation = {
34
- "tag:isrd.isi.edu,2016:visible-columns": {
35
- "*": [
36
- "RID",
37
- [
38
- schema,
39
- "Execution_RCB_fkey"
40
- ],
41
- "RCT",
42
- "Description",
43
- {"source": [
44
- {"outbound": [
45
- "eye-ai",
46
- "Execution_Workflow_fkey"
47
- ]
48
- },
49
- "RID"
50
- ]
51
- },
52
- "Duration",
53
- "Status",
54
- "Status_Detail"
55
- ]
56
- },
57
- "tag:isrd.isi.edu,2016:visible-foreign-keys": {
58
- "detailed": [
59
- {
60
- "source": [
61
- {
62
- "inbound": [
63
- schema,
64
- "Dataset_Execution_Execution_fkey"
65
- ]
66
- },
67
- {
68
- "outbound": [
69
- schema,
70
- "Dataset_Execution_Dataset_fkey"
71
- ]
72
- },
73
- "RID"
74
- ],
75
- "markdown_name": "Dataset"
76
- },
77
- {
78
- "source": [
79
- {
80
- "inbound": [
81
- schema,
82
- "Execution_Assets_Execution_Execution_fkey"
83
- ]
84
- },
85
- {
86
- "outbound": [
87
- schema,
88
- "Execution_Assets_Execution_Execution_Assets_fkey"
89
- ]
90
- },
91
- "RID"
92
- ],
93
- "markdown_name": "Execution Assets"
94
- },
95
- {
96
- "source": [
97
- {
98
- "inbound": [
99
- schema,
100
- "Execution_Metadata_Execution_fkey"
101
- ]
102
- },
103
- "RID"
104
- ],
105
- "markdown_name": "Execution Metadata"
106
- }
107
- ]
108
- }
109
- }
110
-
111
- execution_assets_annotation = {
112
- "tag:isrd.isi.edu,2016:table-display": {
113
- "row_name": {
114
- "row_markdown_pattern": "{{{Filename}}}"
115
- }
116
- },
117
- "tag:isrd.isi.edu,2016:visible-columns": {
118
- "compact": [
119
- "RID",
120
- "URL",
121
- "Description",
122
- "Length",
123
- [
124
- schema,
125
- "Execution_Assets_Execution_Asset_Type_fkey"
126
- ],
127
- # {
128
- # "display": {
129
- # "template_engine": "handlebars",
130
- # "markdown_pattern": "{{#if (eq _Execution_Asset_Type \"2-5QME\")}}\n ::: iframe []("
131
- # "https://dev.eye-ai.org/~vivi/deriva-webapps/plot/?config=test-line"
132
- # "-plot&Execution_Assets_RID={{{RID}}}){class=chaise-autofill "
133
- # "style=\"min-width: 500px; min-height: 300px;\"} \\n:::\n {{/if}}"
134
- # },
135
- # "markdown_name": "ROC Plot"
136
- # }
137
- ],
138
- "detailed": [
139
- "RID",
140
- "RCT",
141
- "RMT",
142
- "RCB",
143
- "RMB",
144
- # {
145
- # "display": {
146
- # "template_engine": "handlebars",
147
- # "markdown_pattern": "{{#if (eq _Execution_Asset_Type \"2-5QME\")}} ::: iframe []("
148
- # "https://dev.eye-ai.org/~vivi/deriva-webapps/plot/?config=test-line"
149
- # "-plot&Execution_Assets_RID={{{RID}}}){style=\"min-width:1000px; "
150
- # "min-height:700px; height:70vh;\" class=\"chaise-autofill\"} \\n::: {"
151
- # "{/if}}"
152
- # },
153
- # "markdown_name": "ROC Plot"
154
- # },
155
- "URL",
156
- "Filename",
157
- "Description",
158
- "Length",
159
- "MD5",
160
- [
161
- schema,
162
- "Execution_Assets_Execution_Asset_Type_fkey"
163
- ]
164
- ]
165
- }
166
- }
167
-
168
- execution_metadata_annotation = {
169
- "tag:isrd.isi.edu,2016:table-display": {
170
- "row_name": {
171
- "row_markdown_pattern": "{{{Filename}}}"
172
- }
173
- }
174
- }
175
-
176
- schema_annotation = {
177
- "headTitle": "Catalog ML",
178
- "navbarMenu": {
179
- "newTab": False,
180
- "children": [
181
- {
182
- "name": "User Info",
183
- "children": [
184
- {
185
- "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_Client",
186
- "name": "Users"
187
- },
188
- {
189
- "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_Group",
190
- "name": "Groups"
191
- },
192
- {
193
- "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_RID_Lease",
194
- "name": "ERMrest RID Lease"
195
- }
196
- ]
197
- },
198
- {
199
- "name": "FaceBase-ML",
200
- "children": [
201
- {
202
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Workflow",
203
- "name": "Workflow"
204
- },
205
- {
206
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Workflow_Type",
207
- "name": "Workflow Type"
208
- },
209
- {
210
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution",
211
- "name": "Execution"
212
- },
213
- {
214
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Metadata",
215
- "name": "Execution Metadata"
216
- },
217
- {
218
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Metadata_Type",
219
- "name": "Execution Metadata Type"
220
- },
221
- {
222
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Assets",
223
- "name": "Execution Assets"
224
- },
225
- {
226
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Asset_Type",
227
- "name": "Execution Asset Type"
228
- }
229
- ]
230
- }
231
- ]
232
- },
233
- "navbarBrandText": "ML Data Browser",
234
- "systemColumnsDisplayEntry": ["RID"],
235
- "systemColumnsDisplayCompact": ["RID"]
236
- }
237
-
238
- return {"workflow_annotation": workflow_annotation,
239
- "execution_annotation": execution_annotation,
240
- "execution_assets_annotation": execution_assets_annotation,
241
- "execution_metadata_annotation": execution_metadata_annotation,
242
- "schema_annotation": schema_annotation
243
- }
244
-
245
-
246
- def main():
247
- parser = argparse.ArgumentParser()
248
- parser.add_argument('--catalog_id', type=str, required=True)
249
- parser.add_argument('--schema_name', type=str, required=True)
250
- args = parser.parse_args()
251
- return generate_annotation(args.catalog_id, args.schema_name)
252
-
253
-
254
- if __name__ == "__main__":
255
- sys.exit(main())
@@ -1,165 +0,0 @@
1
- import sys
2
-
3
- from deriva.core import DerivaServer, get_credential
4
- from deriva.core.ermrest_model import builtin_types, Schema, Table, Column, ForeignKey
5
- from deriva.chisel import Model, Schema, Table, Column, ForeignKey
6
- from deriva_ml.schema_setup.annotation_temp import generate_annotation
7
- import argparse
8
-
9
-
10
- def create_schema_if_not_exist(model, schema_name, schema_comment=None):
11
- if schema_name not in model.schemas:
12
- schema = model.create_schema(Schema.define(schema_name, schema_comment))
13
- return schema
14
- else:
15
- schema = model.schemas[schema_name]
16
- return schema
17
-
18
-
19
- def create_table_if_not_exist(schema, table_name, create_spec):
20
- if table_name not in schema.tables:
21
- table = schema.create_table(create_spec)
22
- return table
23
- else:
24
- table = schema.tables[table_name]
25
- return table
26
-
27
-
28
- def define_table_workflow(workflow_annotation: dict):
29
- table_def = Table.define(
30
- 'Workflow',
31
- column_defs=[
32
- Column.define('Name', builtin_types.text),
33
- Column.define('Description', builtin_types.markdown),
34
- Column.define('URL', builtin_types.ermrest_uri),
35
- Column.define('Checksum', builtin_types.text),
36
- Column.define('Version', builtin_types.text)
37
- ],
38
- fkey_defs=[
39
- ForeignKey.define(
40
- ['RCB'],
41
- 'public', 'ERMrest_Client',
42
- ['ID']
43
- )
44
- ],
45
- annotations=workflow_annotation
46
- )
47
- return table_def
48
-
49
-
50
- def define_table_execution(execution_annotation: dict):
51
- table_def = Table.define(
52
- 'Execution',
53
- column_defs=[
54
- Column.define('Description', builtin_types.markdown),
55
- Column.define('Duration', builtin_types.text),
56
- Column.define('Status', builtin_types.text),
57
- Column.define('Status_Detail', builtin_types.text)
58
- ],
59
- fkey_defs=[
60
- ForeignKey.define(
61
- ['RCB'],
62
- 'public', 'ERMrest_Client',
63
- ['ID']
64
- )
65
- ],
66
- annotations=execution_annotation
67
- )
68
- return table_def
69
-
70
-
71
- def define_asset_execution_metadata(schema: str, execution_metadata_annotation: dict):
72
- table_def = Table.define_asset(
73
- sname=schema,
74
- tname='Execution_Metadata',
75
- hatrac_template='/hatrac/metadata/{{MD5}}.{{Filename}}',
76
- fkey_defs=[
77
- ForeignKey.define(
78
- ['RCB'],
79
- 'public', 'ERMrest_Client',
80
- ['ID']
81
- )
82
- ],
83
- annotations=execution_metadata_annotation
84
- )
85
- return table_def
86
-
87
-
88
- def define_asset_execution_assets(schema: str, execution_assets_annotation: dict):
89
- table_def = Table.define_asset(
90
- sname=schema,
91
- tname='Execution_Assets',
92
- hatrac_template='/hatrac/execution_assets/{{MD5}}.{{Filename}}',
93
- fkey_defs=[
94
- ForeignKey.define(
95
- ['RCB'],
96
- 'public', 'ERMrest_Client',
97
- ['ID']
98
- )
99
- ],
100
- annotations=execution_assets_annotation
101
- )
102
- return table_def
103
-
104
-
105
- def setup_ml_workflow(model, schema_name, catalog_id):
106
- curie_template = catalog_id+':{RID}'
107
- schema = create_schema_if_not_exist(model, schema_name)
108
- # get annotations
109
- annotations = generate_annotation(catalog_id, schema_name)
110
- # Workflow
111
- workflow_table = create_table_if_not_exist(schema, 'Workflow',
112
- define_table_workflow(annotations["workflow_annotation"]))
113
- table_def_workflow_type_vocab = Table.define_vocabulary(
114
- tname='Workflow_Type', curie_template=curie_template
115
- )
116
- workflow_type_table = schema.create_table(table_def_workflow_type_vocab)
117
- workflow_table.add_reference(workflow_type_table)
118
-
119
- # Execution
120
- execution_table = create_table_if_not_exist(schema, 'Execution',
121
- define_table_execution(annotations["execution_annotation"]))
122
- execution_table.add_reference(workflow_table)
123
- # dataset_table = create_table_if_not_exist(schema, 'Dataset', define_table_dataset(schema))
124
- # association_dataset_execution = schema.create_association(dataset_table, execution_table)
125
-
126
- # Execution Metadata
127
- execution_metadata_table = create_table_if_not_exist(schema, 'Execution_Metadata',
128
- define_asset_execution_metadata(schema,
129
- annotations["execution_metadata_annotation"]))
130
- execution_metadata_table.add_reference(execution_table)
131
- table_def_metadata_type_vocab = Table.define_vocabulary(tname='Execution_Metadata_Type',
132
- curie_template=curie_template)
133
- metadata_type_table = schema.create_table(table_def_metadata_type_vocab)
134
- execution_metadata_table.add_reference(metadata_type_table)
135
-
136
- # Execution Asset
137
- execution_assets_table = create_table_if_not_exist(schema, 'Execution_Assets',
138
- define_asset_execution_assets(schema,
139
- annotations["execution_assets_annotation"]))
140
- association_execution_execution_asset = schema.create_association(execution_assets_table, execution_table)
141
-
142
- table_def_execution_product_type_vocab = Table.define_vocabulary(
143
- tname='Execution_Asset_Type', curie_template=curie_template
144
- )
145
- execution_asset_type_table = schema.create_table(table_def_execution_product_type_vocab)
146
- execution_assets_table.add_reference(execution_asset_type_table)
147
- # image_table = create_table_if_not_exist(schema, 'Image', define_asset_image(schema))
148
- # association_image_execution_asset = schema.create_association(execution_assets_table, image_table)
149
-
150
-
151
- def main():
152
- scheme = 'https'
153
- parser = argparse.ArgumentParser()
154
- parser.add_argument('--hostname', type=str, required=True)
155
- parser.add_argument('--schema_name', type=str, required=True)
156
- parser.add_argument('--catalog_id', type=str, required=True)
157
- args = parser.parse_args()
158
- credentials = get_credential(args.hostname)
159
- server = DerivaServer(scheme, args.hostname, credentials)
160
- model = Model.from_catalog(server.connect_ermrest(args.catalog_id))
161
- setup_ml_workflow(model, args.schema_name, args.catalog_id)
162
-
163
-
164
- if __name__ == "__main__":
165
- sys.exit(main())