deriva-ml 1.8.11__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,15 @@
1
- """Ths module constains the definition of the DatabaseModel class. The role of this class is to provide an nterface between the BDBag representation
1
+ """Ths module contains the definition of the DatabaseModel class. The role of this class is to provide an nterface between the BDBag representation
2
2
  of a dataset and a sqllite database in which the contents of the bag are stored.
3
3
  """
4
+
5
+ from __future__ import annotations
6
+
4
7
  import logging
5
8
  import sqlite3
6
9
 
7
10
  from csv import reader
8
11
  from pathlib import Path
9
- from typing import Any, Optional
12
+ from typing import Any, Optional, Generator
10
13
  from urllib.parse import urlparse
11
14
 
12
15
  from deriva.core.ermrest_model import Model
@@ -20,7 +23,7 @@ from .dataset_bag import DatasetBag
20
23
  class DatabaseModelMeta(type):
21
24
  """Use metaclass to ensure that there is onl one instance per path"""
22
25
 
23
- _paths_loaded: dict[Path:"DatabaseModel"] = {}
26
+ _paths_loaded: dict[Path, "DatabaseModel"] = {}
24
27
 
25
28
  def __call__(cls, *args, **kwargs):
26
29
  logger = logging.getLogger("deriva_ml")
@@ -47,7 +50,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
47
50
  Because of nested datasets, it's possible that more than one dataset rid is in a bag, or that a dataset rid might
48
51
  appear in more than one database. To help manage this, a global list of all the datasets that have been loaded
49
52
  into DatabaseModels, is kept in the class variable `_rid_map`.
50
-
53
+
51
54
  Because you can load diffent versions of a dataset simultaniously, the dataset RID and version number are tracked, and a new
52
55
  sqllite instance is created for every new dataset version present.
53
56
 
@@ -315,6 +318,26 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
315
318
  )
316
319
  return datasets
317
320
 
321
+ def get_table_as_dict(self, table: str) -> Generator[dict[str, Any], None, None]:
322
+ """Retrieve the contents of the specified table as a dictionary.
323
+
324
+ Args:
325
+ table: Table to retrieve data from. f schema is not provided as part of the table name,
326
+ the method will attempt to locate the schema for the table.
327
+
328
+ Returns:
329
+ A generator producing dictionaries containing the contents of the specified table as name/value pairs.
330
+ """
331
+ table_name = self.normalize_table_name(table)
332
+ with self.dbase as dbase:
333
+ col_names = [
334
+ c[1]
335
+ for c in dbase.execute(f'PRAGMA table_info("{table_name}")').fetchall()
336
+ ]
337
+ result = self.dbase.execute(f'SELECT * FROM "{table_name}"')
338
+ while row := result.fetchone():
339
+ yield dict(zip(col_names, row))
340
+
318
341
  def normalize_table_name(self, table: str) -> str:
319
342
  """Attempt to insert the schema into a table name if it's not provided.
320
343
 
deriva_ml/dataset.py CHANGED
@@ -92,7 +92,7 @@ class Dataset:
92
92
  dataset_list: list[DatasetSpec],
93
93
  description: Optional[str] = "",
94
94
  execution_rid: Optional[RID] = None,
95
- ) -> RID:
95
+ ) -> list[dict[str, Any]]:
96
96
  schema_path = self._model.catalog.getPathBuilder().schemas[self._ml_schema]
97
97
 
98
98
  # Construct version records for insert
@@ -245,7 +245,7 @@ class Dataset:
245
245
  DerivaMLException: if provided RID is not to a dataset_table.
246
246
  """
247
247
 
248
- # Find all of the datasets that are reachable from this dataset and determine their new version numbers.
248
+ # Find all the datasets that are reachable from this dataset and determine their new version numbers.
249
249
  related_datasets = list(self._build_dataset_graph(dataset_rid=dataset_rid))
250
250
  version_update_list = [
251
251
  DatasetSpec(
@@ -254,7 +254,7 @@ class Dataset:
254
254
  )
255
255
  for ds_rid in related_datasets
256
256
  ]
257
- updated_versions = self._insert_dataset_versions(
257
+ self._insert_dataset_versions(
258
258
  version_update_list, description=description, execution_rid=execution_rid
259
259
  )
260
260
  return [d.version for d in version_update_list if d.rid == dataset_rid][0]
@@ -751,9 +751,10 @@ class Dataset:
751
751
  ]
752
752
 
753
753
  def _table_paths(
754
- self, dataset: DatasetSpec = None, snapshot_catalog: Optional[DerivaML] = None
754
+ self,
755
+ dataset: Optional[DatasetSpec] = None,
756
+ snapshot_catalog: Optional[DerivaML] = None,
755
757
  ) -> Iterator[tuple[str, str, Table]]:
756
-
757
758
  paths = self._collect_paths(dataset and dataset.rid, snapshot_catalog)
758
759
 
759
760
  def source_path(path: tuple[Table, ...]):
@@ -779,17 +780,20 @@ class Dataset:
779
780
  def _collect_paths(
780
781
  self,
781
782
  dataset_rid: Optional[RID] = None,
782
- snapshot_catalog: Optional[DerivaML] = None,
783
+ snapshot: Optional[Dataset] = None,
783
784
  dataset_nesting_depth: Optional[int] = None,
784
785
  ) -> set[tuple[Table, ...]]:
785
786
 
786
- snapshot_catalog = snapshot_catalog or self
787
+ snapshot_catalog = snapshot if snapshot else self
788
+
787
789
  dataset_table = snapshot_catalog._model.schemas[self._ml_schema].tables[
788
790
  "Dataset"
789
791
  ]
790
792
  dataset_dataset = snapshot_catalog._model.schemas[self._ml_schema].tables[
791
793
  "Dataset_Dataset"
792
794
  ]
795
+
796
+ # Figure out what types of elements the dataset contains.
793
797
  dataset_associations = [
794
798
  a
795
799
  for a in self.dataset_table.find_associations()
@@ -812,7 +816,8 @@ class Dataset:
812
816
  ]
813
817
  else:
814
818
  included_associations = dataset_associations
815
- # Get the paths through the schema and filter out all of dataset paths not used by this dataset.
819
+
820
+ # Get the paths through the schema and filter out all the dataset paths not used by this dataset.
816
821
  paths = {
817
822
  tuple(p)
818
823
  for p in snapshot_catalog._model._schema_to_paths()
@@ -827,7 +832,7 @@ class Dataset:
827
832
  if dataset_rid:
828
833
  for c in snapshot_catalog.list_dataset_children(dataset_rid=dataset_rid):
829
834
  nested_paths |= self._collect_paths(
830
- c, snapshot_catalog=snapshot_catalog
835
+ c, snapshot=snapshot_catalog
831
836
  )
832
837
  else:
833
838
  # Initialize nesting depth if not already provided.
deriva_ml/dataset_bag.py CHANGED
@@ -109,7 +109,7 @@ class DatasetBag:
109
109
  for ts, on in paths:
110
110
  tables = " JOIN ".join(ts)
111
111
  on_expression = " and ".join(
112
- [f"{column_name(l)}={column_name(r)}" for l, r in on]
112
+ [f"{column_name(left)}={column_name(right)}" for left, right in on]
113
113
  )
114
114
  sql.append(
115
115
  f"SELECT {select_args} FROM {tables} ON {on_expression} WHERE {dataset_table_name}.RID IN ({datasets})"
deriva_ml/demo_catalog.py CHANGED
@@ -5,6 +5,7 @@ import logging
5
5
  from random import random, randint
6
6
  import tempfile
7
7
  from tempfile import TemporaryDirectory
8
+ from typing import Optional
8
9
  import itertools
9
10
 
10
11
  from deriva.config.acl_config import AclConfig
@@ -18,7 +19,6 @@ from requests import HTTPError
18
19
  from deriva_ml import (
19
20
  DerivaML,
20
21
  ExecutionConfiguration,
21
- Workflow,
22
22
  MLVocab,
23
23
  BuiltinTypes,
24
24
  ColumnDefinition,
@@ -169,12 +169,9 @@ def create_demo_features(ml_instance):
169
169
  description="Model for our API workflow",
170
170
  )
171
171
 
172
- api_workflow = ml_instance.add_workflow(
173
- Workflow(
174
- name="API Workflow",
175
- url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/pyproject.toml",
176
- workflow_type="API Workflow",
177
- )
172
+ api_workflow = ml_instance.create_workflow(
173
+ name="API Workflow",
174
+ workflow_type="API Workflow",
178
175
  )
179
176
 
180
177
  api_execution = ml_instance.create_execution(
@@ -322,7 +319,11 @@ def create_demo_catalog(
322
319
 
323
320
  class DemoML(DerivaML):
324
321
  def __init__(
325
- self, hostname, catalog_id, cache_dir: str = None, working_dir: str = None
322
+ self,
323
+ hostname,
324
+ catalog_id,
325
+ cache_dir: Optional[str] = None,
326
+ working_dir: Optional[str] = None,
326
327
  ):
327
328
  super().__init__(
328
329
  hostname=hostname,
@@ -8,7 +8,7 @@ from enum import Enum
8
8
  from typing import Any, Iterable, Optional, Annotated
9
9
 
10
10
  import deriva.core.ermrest_model as em
11
- from urllib.parse import urlparse, urljoin
11
+ from urllib.parse import urlparse
12
12
  from deriva.core.ermrest_model import builtin_types
13
13
  from pydantic import (
14
14
  BaseModel,
@@ -139,13 +139,18 @@ class FileSpec(BaseModel):
139
139
  if url_parts.scheme == "tag":
140
140
  return v
141
141
  elif not url_parts.scheme:
142
- return f'tag://{gethostname()},{date.today()}:file://{v}'
142
+ return f"tag://{gethostname()},{date.today()}:file://{v}"
143
143
  else:
144
144
  raise ValidationError("url is not a file URL")
145
145
 
146
146
  @model_serializer()
147
147
  def serialize_filespec(self):
148
- return {'URL': self.url, 'Description': self.description, 'MD5': self.md5, 'Length': self.length}
148
+ return {
149
+ "URL": self.url,
150
+ "Description": self.description,
151
+ "MD5": self.md5,
152
+ "Length": self.length,
153
+ }
149
154
 
150
155
 
151
156
  class VocabularyTerm(BaseModel):
@@ -32,6 +32,7 @@ from deriva.core.deriva_server import DerivaServer
32
32
  from deriva.core.ermrest_catalog import ResolveRidResult
33
33
  from deriva.core.ermrest_model import Key, Table
34
34
  from deriva.core.hatrac_store import HatracStore
35
+ from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
35
36
  from pydantic import validate_call, ConfigDict
36
37
  from requests import RequestException
37
38
 
@@ -70,17 +71,29 @@ except ImportError: # Graceful fallback if IceCream isn't installed.
70
71
  try:
71
72
  from IPython import get_ipython
72
73
  except ImportError: # Graceful fallback if IPython isn't installed.
73
- get_ipython = lambda: None
74
+
75
+ def get_ipython():
76
+ """Dummy routine in case you are not running in IPython."""
77
+ return None
78
+
74
79
 
75
80
  try:
76
81
  from jupyter_server.serverapp import list_running_servers
77
82
  except ImportError:
78
- list_running_servers = lambda: []
83
+
84
+ def list_running_servers():
85
+ """Dummy routine in case you are not running in Jupyter."""
86
+ return []
87
+
79
88
 
80
89
  try:
81
90
  from ipykernel import get_connection_file
82
91
  except ImportError:
83
- get_connection_file = lambda: ""
92
+
93
+ def get_connection_file():
94
+ """Dummy routine in case you are not running in Jupyter."""
95
+ return ""
96
+
84
97
 
85
98
  if TYPE_CHECKING:
86
99
  from .execution import Execution
@@ -102,8 +115,8 @@ class DerivaML(Dataset):
102
115
  self,
103
116
  hostname: str,
104
117
  catalog_id: str | int,
105
- domain_schema: str = None,
106
- project_name: str = None,
118
+ domain_schema: Optional[str] = None,
119
+ project_name: Optional[str] = None,
107
120
  cache_dir: Optional[str] = None,
108
121
  working_dir: Optional[str] = None,
109
122
  model_version: str = "1",
@@ -205,9 +218,8 @@ class DerivaML(Dataset):
205
218
  except subprocess.CalledProcessError:
206
219
  self._logger.error("nbstripout is not found.")
207
220
 
208
- def _get_notebook_session(
209
- self,
210
- ) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
221
+ @staticmethod
222
+ def _get_notebook_session() -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
211
223
  """Return the absolute path of the current notebook."""
212
224
  # Get the kernel's connection file and extract the kernel ID
213
225
  try:
@@ -267,7 +279,7 @@ class DerivaML(Dataset):
267
279
  ) # Get the caller's filename, which is two up the stack from here.
268
280
  else:
269
281
  raise DerivaMLException(
270
- f"Looking for caller failed"
282
+ "Looking for caller failed"
271
283
  ) # Stack is too shallow
272
284
  return filename, is_notebook
273
285
 
@@ -335,7 +347,7 @@ class DerivaML(Dataset):
335
347
  )
336
348
 
337
349
  def asset_dir(
338
- self, table: str | Table, prefix: str | Path = None
350
+ self, table: str | Table, prefix: Optional[str | Path] = None
339
351
  ) -> UploadAssetDirectory:
340
352
  """Return a local file path in which to place a files for an asset table. T
341
353
 
@@ -369,6 +381,29 @@ class DerivaML(Dataset):
369
381
  """
370
382
  return self.cache_dir if cached else self.working_dir
371
383
 
384
+ @staticmethod
385
+ def globus_login(host: str) -> None:
386
+ """Log into the specified host using Globus.
387
+
388
+ Args:
389
+ host:
390
+
391
+ Returns:
392
+
393
+ """
394
+ gnl = GlobusNativeLogin(host=host)
395
+ if gnl.is_logged_in([host]):
396
+ print("You are already logged in.")
397
+ else:
398
+ gnl.login(
399
+ [host],
400
+ no_local_server=True,
401
+ no_browser=True,
402
+ refresh_tokens=True,
403
+ update_bdbag_keychain=True,
404
+ )
405
+ print("Login Successful")
406
+
372
407
  def chaise_url(self, table: RID | Table) -> str:
373
408
  """Return a Chaise URL to the specified table.
374
409
 
@@ -379,15 +414,15 @@ class DerivaML(Dataset):
379
414
  Returns:
380
415
  URL to the table in Chaise format.
381
416
  """
417
+ table_obj = self.model.name_to_table(table)
382
418
  try:
383
- table = self.model.name_to_table(table)
384
419
  uri = self.catalog.get_server_uri().replace(
385
420
  "ermrest/catalog/", "chaise/recordset/#"
386
421
  )
387
422
  except DerivaMLException:
388
423
  # Perhaps we have a RID....
389
424
  uri = self.cite(table)
390
- return f"{uri}/{urlquote(table.schema.name)}:{urlquote(table.name)}"
425
+ return f"{uri}/{urlquote(table_obj.schema.name)}:{urlquote(table_obj.name)}"
391
426
 
392
427
  def cite(self, entity: dict | str) -> str:
393
428
  """Return a citation URL for the provided entity.
@@ -401,7 +436,9 @@ class DerivaML(Dataset):
401
436
  Raises:
402
437
  DerivaMLException: if provided RID does not exist.
403
438
  """
404
- if entity.startswith(f"https://{self.host_name}/id/{self.catalog_id}/"):
439
+ if isinstance(entity, str) and entity.startswith(
440
+ f"https://{self.host_name}/id/{self.catalog_id}/"
441
+ ):
405
442
  # Already got a citation...
406
443
  return entity
407
444
  try:
@@ -498,9 +535,9 @@ class DerivaML(Dataset):
498
535
  def create_asset(
499
536
  self,
500
537
  asset_name: str,
501
- column_defs: Iterable[ColumnDefinition] = None,
538
+ column_defs: Optional[Iterable[ColumnDefinition]] = None,
502
539
  comment: str = "",
503
- schema: str = None,
540
+ schema: Optional[str] = None,
504
541
  ) -> Table:
505
542
  """Create an asset table with the given asset name.
506
543
 
@@ -532,9 +569,9 @@ class DerivaML(Dataset):
532
569
  self,
533
570
  target_table: Table | str,
534
571
  feature_name: str,
535
- terms: list[Table | str] = None,
536
- assets: list[Table | str] = None,
537
- metadata: Iterable[ColumnDefinition | Table | Key | str] = None,
572
+ terms: Optional[list[Table | str]] = None,
573
+ assets: Optional[list[Table | str]] = None,
574
+ metadata: Optional[Iterable[ColumnDefinition | Table | Key | str]] = None,
538
575
  optional: Optional[list[str]] = None,
539
576
  comment: str = "",
540
577
  ) -> type[FeatureRecord]:
@@ -899,6 +936,7 @@ class DerivaML(Dataset):
899
936
  """
900
937
 
901
938
  def path_to_asset(path: str) -> str:
939
+ """Pull the asset name out of a path to that asset in the filesystem"""
902
940
  components = path.split("/")
903
941
  return components[
904
942
  components.index("asset") + 2
@@ -963,6 +1001,7 @@ class DerivaML(Dataset):
963
1001
  )
964
1002
 
965
1003
  def check_file_type(dtype: str) -> bool:
1004
+ """Make sure that the specified string is either the name or synonym for a file type term."""
966
1005
  for term in defined_types:
967
1006
  if dtype == term.name or (term.synonyms and file_type in term.synonyms):
968
1007
  return True
@@ -1098,7 +1137,7 @@ class DerivaML(Dataset):
1098
1137
 
1099
1138
  def create_workflow(
1100
1139
  self, name: str, workflow_type: str, description: str = "", create: bool = True
1101
- ) -> RID:
1140
+ ) -> RID | None:
1102
1141
  """Identify current executing program and return a workflow RID for it
1103
1142
 
1104
1143
  Determine the notebook or script that is currently being executed. Assume that this is
@@ -1166,7 +1205,7 @@ class DerivaML(Dataset):
1166
1205
  )
1167
1206
  github_url = result.stdout.strip().removesuffix(".git")
1168
1207
  except subprocess.CalledProcessError:
1169
- raise DerivaMLException(f"No GIT remote found")
1208
+ raise DerivaMLException("No GIT remote found")
1170
1209
 
1171
1210
  # Find the root directory for the repository
1172
1211
  repo_root = self._get_git_root()
@@ -1188,7 +1227,7 @@ class DerivaML(Dataset):
1188
1227
 
1189
1228
  """Get SHA-1 hash of latest commit of the file in the repository"""
1190
1229
  result = subprocess.run(
1191
- ["git", "log", "-n", "1", "--pretty=format:%H" "--", self.executable_path],
1230
+ ["git", "log", "-n", "1", "--pretty=format:%H--", self.executable_path],
1192
1231
  cwd=self.executable_path.parent,
1193
1232
  capture_output=True,
1194
1233
  text=True,
deriva_ml/deriva_model.py CHANGED
@@ -21,7 +21,7 @@ from .deriva_definitions import (
21
21
 
22
22
  from collections import Counter
23
23
  from pydantic import validate_call, ConfigDict
24
- from typing import Iterable
24
+ from typing import Iterable, Optional
25
25
 
26
26
 
27
27
  class DerivaModel:
@@ -267,7 +267,7 @@ class DerivaModel:
267
267
  def _schema_to_paths(
268
268
  self,
269
269
  root: Table = None,
270
- path: list[Table] = None,
270
+ path: Optional[list[Table]] = None,
271
271
  ) -> list[list[Table]]:
272
272
  """Recursively walk over the domain schema graph and extend the current path.
273
273
 
deriva_ml/execution.py CHANGED
@@ -54,7 +54,9 @@ except ImportError: # Graceful fallback if IceCream isn't installed.
54
54
  try:
55
55
  from jupyter_server.serverapp import list_running_servers
56
56
  except ImportError:
57
- list_running_servers = lambda: []
57
+
58
+ def list_running_servers():
59
+ return []
58
60
 
59
61
 
60
62
  class Execution:
@@ -155,7 +157,6 @@ class Execution:
155
157
  self._initialize_execution(reload)
156
158
 
157
159
  def _save_runtime_environment(self):
158
-
159
160
  runtime_env_path = ExecMetadataVocab.runtime_env.value
160
161
  runtime_env_dir = self.execution_metadata_path(runtime_env_path)
161
162
  with NamedTemporaryFile(
@@ -267,7 +268,7 @@ class Execution:
267
268
  # Execution metadata cannot be in a directory, so map path into filename.
268
269
  checkpoint_path = (
269
270
  self.execution_metadata_path(ExecMetadataVocab.runtime_env.value)
270
- / f"{notebook_name.replace('/','_')}.checkpoint"
271
+ / f"{notebook_name.replace('/', '_')}.checkpoint"
271
272
  )
272
273
  with open(checkpoint_path, "w", encoding="utf-8") as f:
273
274
  json.dump(notebook_content, f)
@@ -359,7 +360,7 @@ class Execution:
359
360
  if m := is_feature_asset_dir(p):
360
361
  try:
361
362
  self.update_status(
362
- Status.running, f'Uploading feature {m["feature_name"]}...'
363
+ Status.running, f"Uploading feature {m['feature_name']}..."
363
364
  )
364
365
  feature_assets[m["target_table"], m["feature_name"]] = (
365
366
  self._ml_object.upload_assets(p)
@@ -1,12 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
- from typing import Optional, Any
4
+ from typing import Optional
5
5
 
6
6
  from pydantic import (
7
7
  BaseModel,
8
8
  conlist,
9
- ConfigDict, field_validator,
9
+ ConfigDict,
10
10
  )
11
11
  from pathlib import Path
12
12
 
@@ -36,7 +36,6 @@ class Workflow(BaseModel):
36
36
  checksum: Optional[str]
37
37
 
38
38
 
39
-
40
39
  class ExecutionConfiguration(BaseModel):
41
40
  """Define the parameters that are used to configure a specific execution.
42
41
 
@@ -69,23 +68,21 @@ class ExecutionConfiguration(BaseModel):
69
68
  config = json.load(fd)
70
69
  return ExecutionConfiguration.model_validate(config)
71
70
 
72
- def download_execution_configuration(
73
- self, configuration_rid: RID
74
- ) -> ExecutionConfiguration:
75
- """Create an ExecutionConfiguration object from a catalog RID that points to a JSON representation of that
76
- configuration in hatrac
77
-
78
- Args:
79
- configuration_rid: RID that should be to an asset table that refers to an execution configuration
80
-
81
- Returns:
82
- A ExecutionConfiguration object for configured by the parameters in the configuration file.
83
- """
84
- AssertionError("Not Implemented")
85
- return ExecutionConfiguration.load_configuration(configuration_rid)
86
-
87
- # configuration = self.retrieve_rid(configuration_rid)
88
- # with NamedTemporaryFile("w+", delete=False, suffix=".json") as dest_file:
89
- # hs = HatracStore("https", self.host_name, self.credential)
90
- # hs.get_obj(path=configuration["URL"], destfilename=dest_file.name)
91
- # return ExecutionConfiguration.load_configuration(Path(dest_file.name))
71
+ # def download_execution_configuration(
72
+ # self, configuration_rid: RID
73
+ # ) -> ExecutionConfiguration:
74
+ # """Create an ExecutionConfiguration object from a catalog RID that points to a JSON representation of that
75
+ # configuration in hatrac
76
+ #
77
+ # Args:
78
+ # configuration_rid: RID that should be to an asset table that refers to an execution configuration
79
+ #
80
+ # Returns:
81
+ # A ExecutionConfiguration object for configured by the parameters in the configuration file.
82
+ # """
83
+ # AssertionError("Not Implemented")
84
+ # configuration = self.retrieve_rid(configuration_rid)
85
+ # with NamedTemporaryFile("w+", delete=False, suffix=".json") as dest_file:
86
+ # hs = HatracStore("https", self.host_name, self.credential)
87
+ # hs.get_obj(path=configuration["URL"], destfilename=dest_file.name)
88
+ # return ExecutionConfiguration.load_configuration(Path(dest_file.name))
@@ -240,7 +240,7 @@ def main():
240
240
  parser.add_argument("--catalog_id", type=str, required=True)
241
241
  parser.add_argument("--schema_name", type=str, required=True)
242
242
  args = parser.parse_args()
243
- generate_annotation(args.catalog_id, args.schema_name)
243
+ generate_annotation(args.catalog_id)
244
244
 
245
245
 
246
246
  if __name__ == "__main__":
@@ -1,5 +1,6 @@
1
1
  import argparse
2
2
  import sys
3
+ from typing import Optional
3
4
 
4
5
  from deriva.core import DerivaServer, get_credential
5
6
  from deriva.core.ermrest_model import Model
@@ -32,7 +33,7 @@ def define_table_workflow(workflow_annotation: dict):
32
33
  )
33
34
 
34
35
 
35
- def define_table_dataset(dataset_annotation: dict = None):
36
+ def define_table_dataset(dataset_annotation: Optional[dict] = None):
36
37
  return Table.define(
37
38
  tname="Dataset",
38
39
  column_defs=[
@@ -154,7 +155,7 @@ def create_www_schema(model: Model):
154
155
 
155
156
 
156
157
  def create_ml_schema(
157
- model: Model, schema_name: str = "deriva-ml", project_name: str = None
158
+ model: Model, schema_name: str = "deriva-ml", project_name: Optional[str] = None
158
159
  ):
159
160
  if model.schemas.get(schema_name):
160
161
  model.schemas[schema_name].drop(cascade=True)
deriva_ml/upload.py CHANGED
@@ -483,7 +483,7 @@ def upload_directory(
483
483
 
484
484
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
485
485
  def upload_asset(
486
- model: DerivaModel, file: Path | str, table: Table | str, **kwargs: Any
486
+ model: DerivaModel, file: Path | str, table: Table, **kwargs: Any
487
487
  ) -> dict:
488
488
  """Upload the specified file into Hatrac and update the associated asset table.
489
489
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.8.11
3
+ Version: 1.9.0
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -0,0 +1,27 @@
1
+ deriva_ml/__init__.py,sha256=r1Z9N5vtZkAET7emqhpAx2bf_xJUp5wHOc4_DIplsG8,1082
2
+ deriva_ml/database_model.py,sha256=HaJoxKSogc-xLGaZfEviqRAWO9wUy52h7yK8by6FKxM,14838
3
+ deriva_ml/dataset.py,sha256=XIXyTej55WduvEOGitG5SJIfPYrQu36cXjCoCNHNMwQ,60746
4
+ deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
5
+ deriva_ml/dataset_bag.py,sha256=aOJxFA9t5apjE5BNBrk8Pi9R1Cp8AWnnaL-10P8ELrQ,11515
6
+ deriva_ml/demo_catalog.py,sha256=zQAHWSvrVPxMg-vyRUqoC0Jj5RhfGjkBwXW3mBksLhA,10986
7
+ deriva_ml/deriva_definitions.py,sha256=jNiInYA2Cb1GE4OOT1CofxBygdLDSOmNsw5Wl6NbZQE,8943
8
+ deriva_ml/deriva_ml_base.py,sha256=B0_0R0tgx4o30VM-QSSKIGy2BN5kOBcYKuYGvmPkwMg,46953
9
+ deriva_ml/deriva_model.py,sha256=B4gwr3-92IQU-mEZlusgNEnRyulD96esWGS67q9MzHk,12024
10
+ deriva_ml/execution.py,sha256=on8hAtuZr9qFiyxuk_vDCmnRJ9Cv4kFOgHK4HY4CmV8,29585
11
+ deriva_ml/execution_configuration.py,sha256=vsdL31J09dz7CQDd2rYXIjyBPwNlgAWvrTqsXNWi82g,3357
12
+ deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
13
+ deriva_ml/feature.py,sha256=7e8WYPCfJSrGxJh9oUTduYSnB5ekybRhXa_0HIigS_w,5459
14
+ deriva_ml/history.py,sha256=qTDLDs8Ow_6r7mDO0gZm0Fg81SWKOAgtCU5pzZoDRgM,2828
15
+ deriva_ml/test_functions.py,sha256=-eqLHjjCQCLBNAr1ofbZekNiCOfMISSACRxT_YHER8I,4396
16
+ deriva_ml/upload.py,sha256=P35ViZzlNNbsXVbnTyq-G781nGHbX4md1tiCp3c_KKI,22264
17
+ deriva_ml/schema_setup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ deriva_ml/schema_setup/annotations.py,sha256=v0gTpmWYxRqsQ-bcnQzsr8WowGv2pi9pZUsO3WWnu1U,9528
19
+ deriva_ml/schema_setup/create_schema.py,sha256=BRdYeWW5I8HxuATkB1hkKuIw4n-JQu620xod7EQoVSE,10674
20
+ deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
21
+ deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
22
+ deriva_ml-1.9.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
+ deriva_ml-1.9.0.dist-info/METADATA,sha256=REDBcboXpGhYbG7bVaICPhZP81cDLoSiCdiY7PX8GrQ,669
24
+ deriva_ml-1.9.0.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
25
+ deriva_ml-1.9.0.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
26
+ deriva_ml-1.9.0.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
27
+ deriva_ml-1.9.0.dist-info/RECORD,,
File without changes
@@ -1,36 +0,0 @@
1
- import sys
2
- import argparse
3
- from deriva.core import DerivaServer, get_credential, ErmrestCatalog
4
- from deriva.core.ermrest_model import builtin_types, Schema, Table, Column, ForeignKey
5
- from annotation_temp import generate_annotation
6
-
7
-
8
- def alter_table_annotation(catalog, schema_name: str, table_name: str, annotation: dict):
9
- model_root = catalog.getCatalogModel()
10
- table = model_root.dataset_table(schema_name, table_name)
11
- table.alter(annotations=annotation)
12
-
13
-
14
- def alter_schema_annotation(catalog, schema_name: str, annotation: dict):
15
- model_root = catalog.getCatalogModel()
16
- schema = model_root.schemas[schema_name]
17
- schema.alter(annotations=annotation)
18
-
19
- def main():
20
- parser = argparse.ArgumentParser()
21
- parser.add_argument('--hostname', type=str, required=True)
22
- parser.add_argument('--schema_name', type=str, required=True)
23
- parser.add_argument('--catalog_id', type=str, required=True)
24
- args = parser.parse_args()
25
- credentials = get_credential(args.hostname)
26
- catalog = ErmrestCatalog('https', args.hostname, args.catalog_id, credentials)
27
-
28
- annotations = generate_annotation(args.schema_name, args.catalog_id)
29
- alter_table_annotation(catalog, args.schema_name, 'Workflow', annotations["workflow_annotation"])
30
- alter_table_annotation(catalog, args.schema_name, 'Execution', annotations["execution_annotation"])
31
- alter_table_annotation(catalog, args.schema_name, 'Execution_Metadata', annotations["execution_metadata_annotation"])
32
- alter_table_annotation(catalog, args.schema_name, 'Execution_Assets', annotations["execution_assets_annotation"])
33
- alter_schema_annotation(catalog, args.schema_name, annotations["schema_annotation"])
34
-
35
- if __name__ == "__main__":
36
- sys.exit(main())
@@ -1,255 +0,0 @@
1
- import argparse
2
- import sys
3
-
4
-
5
- def generate_annotation(catalog_id: str, schema: str) -> dict:
6
- workflow_annotation = {
7
- "tag:isrd.isi.edu,2016:visible-columns": {
8
- "*": [
9
- "RID",
10
- "Name",
11
- "Description",
12
- {
13
- "display": {"markdown_pattern": "[{{{URL}}}]({{{URL}}})"},
14
- "markdown_name": "URL"
15
- },
16
- "Checksum",
17
- "Version",
18
- {
19
- "source": [
20
- {
21
- "outbound": [
22
- schema,
23
- "Workflow_Workflow_Type_fkey"
24
- ]
25
- },
26
- "RID"
27
- ]
28
- }
29
- ]
30
- }
31
- }
32
-
33
- execution_annotation = {
34
- "tag:isrd.isi.edu,2016:visible-columns": {
35
- "*": [
36
- "RID",
37
- [
38
- schema,
39
- "Execution_RCB_fkey"
40
- ],
41
- "RCT",
42
- "Description",
43
- {"source": [
44
- {"outbound": [
45
- "eye-ai",
46
- "Execution_Workflow_fkey"
47
- ]
48
- },
49
- "RID"
50
- ]
51
- },
52
- "Duration",
53
- "Status",
54
- "Status_Detail"
55
- ]
56
- },
57
- "tag:isrd.isi.edu,2016:visible-foreign-keys": {
58
- "detailed": [
59
- {
60
- "source": [
61
- {
62
- "inbound": [
63
- schema,
64
- "Dataset_Execution_Execution_fkey"
65
- ]
66
- },
67
- {
68
- "outbound": [
69
- schema,
70
- "Dataset_Execution_Dataset_fkey"
71
- ]
72
- },
73
- "RID"
74
- ],
75
- "markdown_name": "Dataset"
76
- },
77
- {
78
- "source": [
79
- {
80
- "inbound": [
81
- schema,
82
- "Execution_Assets_Execution_Execution_fkey"
83
- ]
84
- },
85
- {
86
- "outbound": [
87
- schema,
88
- "Execution_Assets_Execution_Execution_Assets_fkey"
89
- ]
90
- },
91
- "RID"
92
- ],
93
- "markdown_name": "Execution Assets"
94
- },
95
- {
96
- "source": [
97
- {
98
- "inbound": [
99
- schema,
100
- "Execution_Metadata_Execution_fkey"
101
- ]
102
- },
103
- "RID"
104
- ],
105
- "markdown_name": "Execution Metadata"
106
- }
107
- ]
108
- }
109
- }
110
-
111
- execution_assets_annotation = {
112
- "tag:isrd.isi.edu,2016:table-display": {
113
- "row_name": {
114
- "row_markdown_pattern": "{{{Filename}}}"
115
- }
116
- },
117
- "tag:isrd.isi.edu,2016:visible-columns": {
118
- "compact": [
119
- "RID",
120
- "URL",
121
- "Description",
122
- "Length",
123
- [
124
- schema,
125
- "Execution_Assets_Execution_Asset_Type_fkey"
126
- ],
127
- # {
128
- # "display": {
129
- # "template_engine": "handlebars",
130
- # "markdown_pattern": "{{#if (eq _Execution_Asset_Type \"2-5QME\")}}\n ::: iframe []("
131
- # "https://dev.eye-ai.org/~vivi/deriva-webapps/plot/?config=test-line"
132
- # "-plot&Execution_Assets_RID={{{RID}}}){class=chaise-autofill "
133
- # "style=\"min-width: 500px; min-height: 300px;\"} \\n:::\n {{/if}}"
134
- # },
135
- # "markdown_name": "ROC Plot"
136
- # }
137
- ],
138
- "detailed": [
139
- "RID",
140
- "RCT",
141
- "RMT",
142
- "RCB",
143
- "RMB",
144
- # {
145
- # "display": {
146
- # "template_engine": "handlebars",
147
- # "markdown_pattern": "{{#if (eq _Execution_Asset_Type \"2-5QME\")}} ::: iframe []("
148
- # "https://dev.eye-ai.org/~vivi/deriva-webapps/plot/?config=test-line"
149
- # "-plot&Execution_Assets_RID={{{RID}}}){style=\"min-width:1000px; "
150
- # "min-height:700px; height:70vh;\" class=\"chaise-autofill\"} \\n::: {"
151
- # "{/if}}"
152
- # },
153
- # "markdown_name": "ROC Plot"
154
- # },
155
- "URL",
156
- "Filename",
157
- "Description",
158
- "Length",
159
- "MD5",
160
- [
161
- schema,
162
- "Execution_Assets_Execution_Asset_Type_fkey"
163
- ]
164
- ]
165
- }
166
- }
167
-
168
- execution_metadata_annotation = {
169
- "tag:isrd.isi.edu,2016:table-display": {
170
- "row_name": {
171
- "row_markdown_pattern": "{{{Filename}}}"
172
- }
173
- }
174
- }
175
-
176
- schema_annotation = {
177
- "headTitle": "Catalog ML",
178
- "navbarMenu": {
179
- "newTab": False,
180
- "children": [
181
- {
182
- "name": "User Info",
183
- "children": [
184
- {
185
- "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_Client",
186
- "name": "Users"
187
- },
188
- {
189
- "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_Group",
190
- "name": "Groups"
191
- },
192
- {
193
- "url": f"/chaise/recordset/#{catalog_id}/public:ERMrest_RID_Lease",
194
- "name": "ERMrest RID Lease"
195
- }
196
- ]
197
- },
198
- {
199
- "name": "FaceBase-ML",
200
- "children": [
201
- {
202
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Workflow",
203
- "name": "Workflow"
204
- },
205
- {
206
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Workflow_Type",
207
- "name": "Workflow Type"
208
- },
209
- {
210
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution",
211
- "name": "Execution"
212
- },
213
- {
214
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Metadata",
215
- "name": "Execution Metadata"
216
- },
217
- {
218
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Metadata_Type",
219
- "name": "Execution Metadata Type"
220
- },
221
- {
222
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Assets",
223
- "name": "Execution Assets"
224
- },
225
- {
226
- "url": f"/chaise/recordset/#{catalog_id}/{schema}:Execution_Asset_Type",
227
- "name": "Execution Asset Type"
228
- }
229
- ]
230
- }
231
- ]
232
- },
233
- "navbarBrandText": "ML Data Browser",
234
- "systemColumnsDisplayEntry": ["RID"],
235
- "systemColumnsDisplayCompact": ["RID"]
236
- }
237
-
238
- return {"workflow_annotation": workflow_annotation,
239
- "execution_annotation": execution_annotation,
240
- "execution_assets_annotation": execution_assets_annotation,
241
- "execution_metadata_annotation": execution_metadata_annotation,
242
- "schema_annotation": schema_annotation
243
- }
244
-
245
-
246
- def main():
247
- parser = argparse.ArgumentParser()
248
- parser.add_argument('--catalog_id', type=str, required=True)
249
- parser.add_argument('--schema_name', type=str, required=True)
250
- args = parser.parse_args()
251
- return generate_annotation(args.catalog_id, args.schema_name)
252
-
253
-
254
- if __name__ == "__main__":
255
- sys.exit(main())
@@ -1,165 +0,0 @@
1
- import sys
2
-
3
- from deriva.core import DerivaServer, get_credential
4
- from deriva.core.ermrest_model import builtin_types, Schema, Table, Column, ForeignKey
5
- from deriva.chisel import Model, Schema, Table, Column, ForeignKey
6
- from deriva_ml.schema_setup.annotation_temp import generate_annotation
7
- import argparse
8
-
9
-
10
- def create_schema_if_not_exist(model, schema_name, schema_comment=None):
11
- if schema_name not in model.schemas:
12
- schema = model.create_schema(Schema.define(schema_name, schema_comment))
13
- return schema
14
- else:
15
- schema = model.schemas[schema_name]
16
- return schema
17
-
18
-
19
- def create_table_if_not_exist(schema, table_name, create_spec):
20
- if table_name not in schema.tables:
21
- table = schema.create_table(create_spec)
22
- return table
23
- else:
24
- table = schema.tables[table_name]
25
- return table
26
-
27
-
28
- def define_table_workflow(workflow_annotation: dict):
29
- table_def = Table.define(
30
- 'Workflow',
31
- column_defs=[
32
- Column.define('Name', builtin_types.text),
33
- Column.define('Description', builtin_types.markdown),
34
- Column.define('URL', builtin_types.ermrest_uri),
35
- Column.define('Checksum', builtin_types.text),
36
- Column.define('Version', builtin_types.text)
37
- ],
38
- fkey_defs=[
39
- ForeignKey.define(
40
- ['RCB'],
41
- 'public', 'ERMrest_Client',
42
- ['ID']
43
- )
44
- ],
45
- annotations=workflow_annotation
46
- )
47
- return table_def
48
-
49
-
50
- def define_table_execution(execution_annotation: dict):
51
- table_def = Table.define(
52
- 'Execution',
53
- column_defs=[
54
- Column.define('Description', builtin_types.markdown),
55
- Column.define('Duration', builtin_types.text),
56
- Column.define('Status', builtin_types.text),
57
- Column.define('Status_Detail', builtin_types.text)
58
- ],
59
- fkey_defs=[
60
- ForeignKey.define(
61
- ['RCB'],
62
- 'public', 'ERMrest_Client',
63
- ['ID']
64
- )
65
- ],
66
- annotations=execution_annotation
67
- )
68
- return table_def
69
-
70
-
71
- def define_asset_execution_metadata(schema: str, execution_metadata_annotation: dict):
72
- table_def = Table.define_asset(
73
- sname=schema,
74
- tname='Execution_Metadata',
75
- hatrac_template='/hatrac/metadata/{{MD5}}.{{Filename}}',
76
- fkey_defs=[
77
- ForeignKey.define(
78
- ['RCB'],
79
- 'public', 'ERMrest_Client',
80
- ['ID']
81
- )
82
- ],
83
- annotations=execution_metadata_annotation
84
- )
85
- return table_def
86
-
87
-
88
- def define_asset_execution_assets(schema: str, execution_assets_annotation: dict):
89
- table_def = Table.define_asset(
90
- sname=schema,
91
- tname='Execution_Assets',
92
- hatrac_template='/hatrac/execution_assets/{{MD5}}.{{Filename}}',
93
- fkey_defs=[
94
- ForeignKey.define(
95
- ['RCB'],
96
- 'public', 'ERMrest_Client',
97
- ['ID']
98
- )
99
- ],
100
- annotations=execution_assets_annotation
101
- )
102
- return table_def
103
-
104
-
105
- def setup_ml_workflow(model, schema_name, catalog_id):
106
- curie_template = catalog_id+':{RID}'
107
- schema = create_schema_if_not_exist(model, schema_name)
108
- # get annotations
109
- annotations = generate_annotation(catalog_id, schema_name)
110
- # Workflow
111
- workflow_table = create_table_if_not_exist(schema, 'Workflow',
112
- define_table_workflow(annotations["workflow_annotation"]))
113
- table_def_workflow_type_vocab = Table.define_vocabulary(
114
- tname='Workflow_Type', curie_template=curie_template
115
- )
116
- workflow_type_table = schema.create_table(table_def_workflow_type_vocab)
117
- workflow_table.add_reference(workflow_type_table)
118
-
119
- # Execution
120
- execution_table = create_table_if_not_exist(schema, 'Execution',
121
- define_table_execution(annotations["execution_annotation"]))
122
- execution_table.add_reference(workflow_table)
123
- # dataset_table = create_table_if_not_exist(schema, 'Dataset', define_table_dataset(schema))
124
- # association_dataset_execution = schema.create_association(dataset_table, execution_table)
125
-
126
- # Execution Metadata
127
- execution_metadata_table = create_table_if_not_exist(schema, 'Execution_Metadata',
128
- define_asset_execution_metadata(schema,
129
- annotations["execution_metadata_annotation"]))
130
- execution_metadata_table.add_reference(execution_table)
131
- table_def_metadata_type_vocab = Table.define_vocabulary(tname='Execution_Metadata_Type',
132
- curie_template=curie_template)
133
- metadata_type_table = schema.create_table(table_def_metadata_type_vocab)
134
- execution_metadata_table.add_reference(metadata_type_table)
135
-
136
- # Execution Asset
137
- execution_assets_table = create_table_if_not_exist(schema, 'Execution_Assets',
138
- define_asset_execution_assets(schema,
139
- annotations["execution_assets_annotation"]))
140
- association_execution_execution_asset = schema.create_association(execution_assets_table, execution_table)
141
-
142
- table_def_execution_product_type_vocab = Table.define_vocabulary(
143
- tname='Execution_Asset_Type', curie_template=curie_template
144
- )
145
- execution_asset_type_table = schema.create_table(table_def_execution_product_type_vocab)
146
- execution_assets_table.add_reference(execution_asset_type_table)
147
- # image_table = create_table_if_not_exist(schema, 'Image', define_asset_image(schema))
148
- # association_image_execution_asset = schema.create_association(execution_assets_table, image_table)
149
-
150
-
151
- def main():
152
- scheme = 'https'
153
- parser = argparse.ArgumentParser()
154
- parser.add_argument('--hostname', type=str, required=True)
155
- parser.add_argument('--schema_name', type=str, required=True)
156
- parser.add_argument('--catalog_id', type=str, required=True)
157
- args = parser.parse_args()
158
- credentials = get_credential(args.hostname)
159
- server = DerivaServer(scheme, args.hostname, credentials)
160
- model = Model.from_catalog(server.connect_ermrest(args.catalog_id))
161
- setup_ml_workflow(model, args.schema_name, args.catalog_id)
162
-
163
-
164
- if __name__ == "__main__":
165
- sys.exit(main())
@@ -1,56 +0,0 @@
1
- import sys
2
- from deriva.core import ErmrestCatalog, get_credential
3
- import argparse
4
- import os
5
- from pathlib import Path
6
-
7
-
8
- def update_table_comments(model, schema_name: str, table_name: str, comments_dir: str) -> None:
9
- table = model.schemas[schema_name].tables[table_name]
10
- table_comments_dir = Path(comments_dir)/Path(f"{schema_name}/{table_name}")
11
- for file in os.listdir(table_comments_dir):
12
- file_path = os.path.join(table_comments_dir, file)
13
- with open(file_path, "r") as f:
14
- comment_str = f.read()
15
- if file.split(".")[0] == table_name:
16
- table.comment = comment_str
17
- else:
18
- table.columns[file.split(".")[0]].comment = comment_str
19
-
20
-
21
- def update_schema_comments(model, schema_name: str, comments_dir: str) -> None:
22
- schema_comments_dir = Path(comments_dir)/Path(schema_name)
23
- for table in os.listdir(schema_comments_dir):
24
- if not table.endswith(".DS_Store"):
25
- update_table_comments(model, schema_name, table, comments_dir)
26
-
27
-
28
- def main():
29
- scheme = 'https'
30
- parser = argparse.ArgumentParser()
31
- parser.add_argument('--hostname', type=str, required=True)
32
- parser.add_argument('--schema_name', type=str, required=True)
33
- parser.add_argument('--catalog_id', type=str, required=True)
34
- parser.add_argument('--comments_dir', type=str, required=True,
35
- help="The directory containing the comments files for the whole catalog")
36
- parser.add_argument('--table_name', type=str,
37
- help="Only update the comments for one table")
38
- args = parser.parse_args()
39
-
40
- credentials = get_credential(args.hostname)
41
- catalog = ErmrestCatalog(scheme, args.hostname, args.catalog_id, credentials)
42
- model = catalog.getCatalogModel()
43
- if args.table_name:
44
- update_table_comments(model, args.schema_name, args.table_name, args.comments_dir)
45
- model.apply()
46
- else:
47
- update_schema_comments(model, args.schema_name, args.comments_dir)
48
- model.apply()
49
-
50
-
51
- if __name__ == '__main__':
52
- sys.exit(main())
53
-
54
-
55
-
56
- # docs/<schema-name>/<table-name>/[table|<column-name>.Md
@@ -1,55 +0,0 @@
1
- import sys
2
- import argparse
3
- from deriva.core import get_credential, ErmrestCatalog
4
- from deriva.core.ermrest_model import Model
5
-
6
- from deriva_ml.schema_setup.annotations import generate_annotation
7
-
8
-
9
- def alter_table_annotation(
10
- model_root: Model, schema_name: str, table_name: str, annotation: dict
11
- ):
12
- table = model_root.schemas[schema_name].tables[table_name]
13
- table.alter(annotations=annotation)
14
- model_root.apply()
15
-
16
-
17
- def alter_schema_annotation(model_root: Model, schema_name: str, annotation: dict):
18
- schema = model_root.schemas[schema_name]
19
- schema.alter(annotations=annotation)
20
- model_root.apply()
21
-
22
-
23
- def main():
24
- parser = argparse.ArgumentParser()
25
- parser.add_argument("--hostname", type=str, required=True)
26
- parser.add_argument("--schema_name", type=str, required=True)
27
- parser.add_argument("--catalog_id", type=str, required=True)
28
- args = parser.parse_args()
29
- credentials = get_credential(args.hostname)
30
- catalog = ErmrestCatalog("https", args.hostname, args.catalog_id, credentials)
31
- model = catalog.getCatalogModel()
32
- annotations = generate_annotation(model)
33
- alter_table_annotation(
34
- model, args.schema_name, "Workflow", annotations["workflow_annotation"]
35
- )
36
- alter_table_annotation(
37
- model, args.schema_name, "Execution", annotations["execution_annotation"]
38
- )
39
- alter_table_annotation(
40
- model,
41
- args.schema_name,
42
- "Execution_Metadata",
43
- annotations["execution_metadata_annotation"],
44
- )
45
- alter_table_annotation(
46
- model,
47
- args.schema_name,
48
- "Execution_Asset",
49
- annotations["execution_asset_annotation"],
50
- )
51
- alter_schema_annotation(model, args.schema_name, annotations["schema_annotation"])
52
-
53
-
54
- if __name__ == "__main__":
55
- sys.exit(main())
@@ -1,33 +0,0 @@
1
- deriva_ml/__init__.py,sha256=r1Z9N5vtZkAET7emqhpAx2bf_xJUp5wHOc4_DIplsG8,1082
2
- deriva_ml/database_model.py,sha256=uhoyVyd8MQmY8J9ovCH8fjxhZDxxXNkdJyYdeyEGPXA,13898
3
- deriva_ml/dataset.py,sha256=xC6QPUp4MZcJiEnOEU3NnzoLBL9RcJWtPTyzIQP0Ivw,60666
4
- deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
5
- deriva_ml/dataset_bag.py,sha256=e6IHv3saZUnZRfl0EjfnlV2NnmPeOagYYv3PuZqS1l0,11501
6
- deriva_ml/demo_catalog.py,sha256=xQPhFlflqwJskNQrQ-jdBSnGzBm2-aONBgcRxfsdNKM,11045
7
- deriva_ml/deriva_definitions.py,sha256=pZLPoUxiuJ-uGglmQ6sF9oVXsSUuOnPEqywoec78XNM,8893
8
- deriva_ml/deriva_ml_base.py,sha256=KbmJ0-mGuZn7-wuzQemzof8o8mA_3-UVlQCrnDsKod0,45741
9
- deriva_ml/deriva_model.py,sha256=LV3FjIhIlz13ckZSmu0aOJhT9EVE0-M9oVMudfkxb0g,12004
10
- deriva_ml/execution.py,sha256=uDblqngcldgR7X4W1PfMV4iPWkxwQYSr9CBmXNlIv1E,29572
11
- deriva_ml/execution_configuration.py,sha256=bjnZwXN6M7YPy5dFQwoGEBU8YjhQRSe1FW0rL0V9TaM,3422
12
- deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
13
- deriva_ml/feature.py,sha256=7e8WYPCfJSrGxJh9oUTduYSnB5ekybRhXa_0HIigS_w,5459
14
- deriva_ml/history.py,sha256=qTDLDs8Ow_6r7mDO0gZm0Fg81SWKOAgtCU5pzZoDRgM,2828
15
- deriva_ml/test_functions.py,sha256=-eqLHjjCQCLBNAr1ofbZekNiCOfMISSACRxT_YHER8I,4396
16
- deriva_ml/upload.py,sha256=CKtT-gBln3pnAll9TFaiPhFSHC-bzg9oE4ruh_OSOqY,22270
17
- deriva_ml/build/lib/schema_setup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- deriva_ml/build/lib/schema_setup/alter_annotation.py,sha256=pkwk0WystN69JfAFK4iBJZAZVQKbRs-gN9IFYuS9rfg,1739
19
- deriva_ml/build/lib/schema_setup/annotation_temp.py,sha256=Euygu8wNklZFUbR6mz-pDWJemlzdsIn9d6j0f6fCfgE,9102
20
- deriva_ml/build/lib/schema_setup/create_schema.py,sha256=9LI2ivR0xS4kVBxkz3F1Ac4hoYGGOVkXYmuhllqWvKE,6528
21
- deriva_ml/build/lib/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
22
- deriva_ml/schema_setup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- deriva_ml/schema_setup/alter_annotation.py,sha256=HyGWe4fBLQY0wbcKnfrhEoVDs4SfnoOF0l_Rz5-tDlk,1794
24
- deriva_ml/schema_setup/annotations.py,sha256=Uogm9YkRtoKSdgfQlICqRywbCATppwBO-XryVSVyso8,9546
25
- deriva_ml/schema_setup/create_schema.py,sha256=jwziMWJPbjRgjiRBT-KtidnXI8YNEFO74A9fwfptjHY,10626
26
- deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
27
- deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
28
- deriva_ml-1.8.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
29
- deriva_ml-1.8.11.dist-info/METADATA,sha256=RyttxTkz_MCnWX1hZK2g7ffPyd54txE6AAZ2GMSpQ54,670
30
- deriva_ml-1.8.11.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
31
- deriva_ml-1.8.11.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
32
- deriva_ml-1.8.11.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
33
- deriva_ml-1.8.11.dist-info/RECORD,,