deriva-ml 1.10.1__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deriva_ml/dataset.py CHANGED
@@ -805,7 +805,7 @@ class Dataset:
805
805
  dataset_elements = [
806
806
  snapshot_catalog._model.name_to_table(e)
807
807
  for e, m in snapshot_catalog.list_dataset_members(
808
- dataset_rid=dataset_rid, limit=1
808
+ dataset_rid=dataset_rid, # limit=1 Limit seems to make things run slow.
809
809
  ).items()
810
810
  if m
811
811
  ]
deriva_ml/dataset_bag.py CHANGED
@@ -168,7 +168,7 @@ class DatasetBag:
168
168
  yield dict(zip(col_names, row))
169
169
 
170
170
  @validate_call
171
- def list_dataset_members(self, recurse: bool = False) -> dict[str, list[tuple]]:
171
+ def list_dataset_members(self, recurse: bool = False) -> dict[str, dict[str, Any]]:
172
172
  """Return a list of entities associated with a specific _dataset_table.
173
173
 
174
174
  Args:
@@ -206,12 +206,19 @@ class DatasetBag:
206
206
  )
207
207
 
208
208
  with self.database as db:
209
+ col_names = [
210
+ c[1]
211
+ for c in db.execute(f'PRAGMA table_info("{sql_target}")').fetchall()
212
+ ]
213
+ select_cols = ",".join([f'"{sql_target}".{c}' for c in col_names])
209
214
  sql_cmd = (
210
- f'SELECT * FROM "{sql_member}" '
215
+ f'SELECT {select_cols} FROM "{sql_member}" '
211
216
  f'JOIN "{sql_target}" ON "{sql_member}".{member_link[0]} = "{sql_target}".{member_link[1]} '
212
217
  f'WHERE "{self.dataset_rid}" = "{sql_member}".Dataset;'
213
218
  )
214
- target_entities = db.execute(sql_cmd).fetchall()
219
+ target_entities = [
220
+ dict(zip(col_names, e)) for e in db.execute(sql_cmd).fetchall()
221
+ ]
215
222
  members[target_table.name].extend(target_entities)
216
223
 
217
224
  target_entities = [] # path.entities().fetch()
deriva_ml/demo_catalog.py CHANGED
@@ -2,9 +2,7 @@ import atexit
2
2
  from importlib.metadata import version
3
3
  from importlib.resources import files
4
4
  import logging
5
- from random import random, randint
6
- import tempfile
7
- from tempfile import TemporaryDirectory
5
+ from random import randint, random
8
6
  from typing import Optional
9
7
  import itertools
10
8
 
@@ -12,7 +10,6 @@ from deriva.config.acl_config import AclConfig
12
10
  from deriva.core import DerivaServer
13
11
  from deriva.core import ErmrestCatalog, get_credential
14
12
  from deriva.core.datapath import DataPathException
15
- from deriva.core.ermrest_model import Model
16
13
  from deriva.core.ermrest_model import builtin_types, Schema, Table, Column
17
14
  from requests import HTTPError
18
15
 
@@ -35,48 +32,51 @@ TEST_DATASET_SIZE = 4
35
32
  def reset_demo_catalog(deriva_ml: DerivaML, sname: str):
36
33
  model = deriva_ml.model
37
34
  for trial in range(3):
38
- for t in [
39
- v
40
- for v in model.schemas[sname].tables.values()
41
- if v.name not in {"Subject", "Image"}
42
- ]:
35
+ for t in [v for v in model.schemas[sname].tables.values()]:
43
36
  try:
44
37
  t.drop()
45
38
  except HTTPError:
46
39
  pass
47
-
40
+ model.schemas[sname].drop()
48
41
  # Empty out remaining tables.
49
42
  pb = deriva_ml.pathBuilder
50
43
  retry = True
51
44
  while retry:
52
- retry = False
53
- for s in [sname, "deriva-ml"]:
54
- for t in pb.schemas[s].tables.values():
55
- for e in t.entities().fetch():
56
- try:
57
- t.filter(t.RID == e["RID"]).delete()
58
- except DataPathException: # FK constraint.
59
- retry = True
60
-
45
+ for t in pb.schemas["deriva-ml"].tables.values():
46
+ for e in t.entities().fetch():
47
+ try:
48
+ t.filter(t.RID == e["RID"]).delete()
49
+ except DataPathException: # FK constraint.
50
+ retry = True
61
51
  initialize_ml_schema(model, "deriva-ml")
52
+ create_domain_schema(deriva_ml, sname)
62
53
 
63
54
 
64
55
  def populate_demo_catalog(deriva_ml: DerivaML, sname: str) -> None:
65
56
  # Delete any vocabularies and features.
66
- reset_demo_catalog(deriva_ml, sname)
67
57
  domain_schema = deriva_ml.catalog.getPathBuilder().schemas[sname]
68
58
  subject = domain_schema.tables["Subject"]
69
59
  ss = subject.insert([{"Name": f"Thing{t + 1}"} for t in range(TEST_DATASET_SIZE)])
70
-
71
- with TemporaryDirectory() as tmpdir:
72
- image_dir = deriva_ml.asset_dir("Image", prefix=tmpdir)
60
+ deriva_ml.add_term(
61
+ MLVocab.workflow_type,
62
+ "Demo Catalog Creation",
63
+ description="A workflow demonstrating how to create a demo catalog.",
64
+ )
65
+ execution = deriva_ml.create_execution(
66
+ ExecutionConfiguration(
67
+ workflow=deriva_ml.create_workflow(
68
+ name="Demo Catalog", workflow_type="Demo Catalog Creation"
69
+ )
70
+ )
71
+ )
72
+ with execution.execute() as e:
73
73
  for s in ss:
74
- image_file = image_dir.create_file(
75
- f"test_{s['RID']}.txt", {"Subject": s["RID"]}
74
+ image_file = e.asset_file_path(
75
+ "Image", f"test_{s['RID']}.txt", Subject=s["RID"]
76
76
  )
77
77
  with open(image_file, "w") as f:
78
78
  f.write(f"Hello there {random()}\n")
79
- deriva_ml.upload_assets(image_dir)
79
+ execution.upload_execution_outputs()
80
80
 
81
81
 
82
82
  def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RID]]:
@@ -84,6 +84,13 @@ def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RI
84
84
  ml_instance.add_dataset_element_type("Image")
85
85
 
86
86
  type_rid = ml_instance.add_term("Dataset_Type", "TestSet", description="A test")
87
+ training_rid = ml_instance.add_term(
88
+ "Dataset_Type", "Training", description="A traing set"
89
+ )
90
+ testing_rid = ml_instance.add_term(
91
+ "Dataset_Type", "Testing", description="A testing set"
92
+ )
93
+
87
94
  table_path = (
88
95
  ml_instance.catalog.getPathBuilder()
89
96
  .schemas[ml_instance.domain_schema]
@@ -94,7 +101,7 @@ def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RI
94
101
  dataset_rids = []
95
102
  for r in subject_rids[0:4]:
96
103
  d = ml_instance.create_dataset(
97
- type_rid.name,
104
+ type=[type_rid.name, "Testing"],
98
105
  description=f"Dataset {r}",
99
106
  version=DatasetVersion(1, 0, 0),
100
107
  )
@@ -104,7 +111,7 @@ def create_demo_datasets(ml_instance: DerivaML) -> tuple[RID, list[RID], list[RI
104
111
  nested_datasets = []
105
112
  for i in range(0, 4, 2):
106
113
  nested_dataset = ml_instance.create_dataset(
107
- type_rid.name,
114
+ type=[type_rid.name, "Training"],
108
115
  description=f"Nested Dataset {i}",
109
116
  version=DatasetVersion(1, 0, 0),
110
117
  )
@@ -132,13 +139,11 @@ def create_demo_features(ml_instance):
132
139
  "Well",
133
140
  description="The subject self reports that they feel well",
134
141
  )
135
-
136
142
  ml_instance.create_vocabulary(
137
143
  "ImageQuality", "Controlled vocabulary for image quality"
138
144
  )
139
145
  ml_instance.add_term("ImageQuality", "Good", description="The image is good")
140
146
  ml_instance.add_term("ImageQuality", "Bad", description="The image is bad")
141
-
142
147
  box_asset = ml_instance.create_asset(
143
148
  "BoundingBox", comment="A file that contains a cropped version of a image"
144
149
  )
@@ -150,7 +155,6 @@ def create_demo_features(ml_instance):
150
155
  metadata=[ColumnDefinition(name="Scale", type=BuiltinTypes.int2, nullok=True)],
151
156
  optional=["Scale"],
152
157
  )
153
-
154
158
  ml_instance.create_feature("Image", "BoundingBox", assets=[box_asset])
155
159
  ml_instance.create_feature("Image", "Quality", terms=["ImageQuality"])
156
160
 
@@ -158,78 +162,88 @@ def create_demo_features(ml_instance):
158
162
  ImageBoundingboxFeature = ml_instance.feature_record_class("Image", "BoundingBox")
159
163
  SubjectWellnessFeature = ml_instance.feature_record_class("Subject", "Health")
160
164
 
165
+ # Get the workflow for this notebook
166
+
161
167
  ml_instance.add_term(
162
168
  MLVocab.workflow_type,
163
- "API Workflow",
169
+ "Feature Notebook Workflow",
164
170
  description="A Workflow that uses Deriva ML API",
165
171
  )
166
172
  ml_instance.add_term(
167
- MLVocab.execution_asset_type,
168
- "API_Model",
169
- description="Model for our API workflow",
173
+ MLVocab.asset_type, "API_Model", description="Model for our Notebook workflow"
170
174
  )
171
-
172
- api_workflow = ml_instance.create_workflow(
173
- name="API Workflow",
174
- workflow_type="API Workflow",
175
+ notebook_workflow = ml_instance.create_workflow(
176
+ name="API Workflow", workflow_type="Feature Notebook Workflow"
175
177
  )
176
178
 
177
- api_execution = ml_instance.create_execution(
179
+ feature_execution = ml_instance.create_execution(
178
180
  ExecutionConfiguration(
179
- workflow=api_workflow, description="Our Sample Workflow instance"
181
+ workflow=notebook_workflow, description="Our Sample Workflow instance"
180
182
  )
181
183
  )
182
184
 
183
- with tempfile.TemporaryDirectory() as temp_dir:
184
- assetdir = ml_instance.asset_dir("BoundingBox", prefix=temp_dir)
185
- for i in range(10):
186
- with open(assetdir.path / f"box{i}.txt", "w") as fp:
187
- fp.write(f"Hi there {i}")
188
- bounding_box_assets = ml_instance.upload_assets(assetdir)
189
- bounding_box_rids = [a.result["RID"] for a in bounding_box_assets.values()]
190
-
191
- # Get the IDs of al of the things that we are going to want to attach features to.
192
185
  subject_rids = [
193
186
  i["RID"] for i in ml_instance.domain_path.tables["Subject"].entities().fetch()
194
187
  ]
195
188
  image_rids = [
196
189
  i["RID"] for i in ml_instance.domain_path.tables["Image"].entities().fetch()
197
190
  ]
198
-
199
191
  subject_feature_list = [
200
192
  SubjectWellnessFeature(
201
193
  Subject=subject_rid,
202
- Execution=api_execution.execution_rid,
194
+ Execution=feature_execution.execution_rid,
203
195
  SubjectHealth=["Well", "Sick"][randint(0, 1)],
204
196
  Scale=randint(1, 10),
205
197
  )
206
198
  for subject_rid in subject_rids
207
199
  ]
208
200
 
201
+ # Create a new set of images. For fun, lets wrap this in an execution so we get status updates
202
+ bounding_box_files = []
203
+ for i in range(10):
204
+ bounding_box_file = feature_execution.asset_file_path(
205
+ "BoundingBox", f"box{i}.txt"
206
+ )
207
+ with open(bounding_box_file, "w") as fp:
208
+ fp.write(f"Hi there {i}")
209
+ bounding_box_files.append(bounding_box_file)
210
+
211
+ image_bounding_box_feature_list = [
212
+ ImageBoundingboxFeature(
213
+ Image=image_rid,
214
+ BoundingBox=asset_name,
215
+ )
216
+ for image_rid, asset_name in zip(
217
+ image_rids, itertools.cycle(bounding_box_files)
218
+ )
219
+ ]
220
+
209
221
  image_quality_feature_list = [
210
222
  ImageQualityFeature(
211
223
  Image=image_rid,
212
- Execution=api_execution.execution_rid,
213
224
  ImageQuality=["Good", "Bad"][randint(0, 1)],
214
225
  )
215
226
  for image_rid in image_rids
216
227
  ]
217
228
 
218
- image_bounding_box_feature_list = [
219
- ImageBoundingboxFeature(
220
- Image=image_rid,
221
- Execution=api_execution.execution_rid,
222
- BoundingBox=asset_rid,
229
+ subject_feature_list = [
230
+ SubjectWellnessFeature(
231
+ Subject=subject_rid,
232
+ SubjectHealth=["Well", "Sick"][randint(0, 1)],
233
+ Scale=randint(1, 10),
223
234
  )
224
- for image_rid, asset_rid in zip(image_rids, itertools.cycle(bounding_box_rids))
235
+ for subject_rid in subject_rids
225
236
  ]
226
237
 
227
- ml_instance.add_features(subject_feature_list)
228
- ml_instance.add_features(image_quality_feature_list)
229
- ml_instance.add_features(image_bounding_box_feature_list)
238
+ with feature_execution.execute() as execution:
239
+ feature_execution.add_features(image_bounding_box_feature_list)
240
+ feature_execution.add_features(image_quality_feature_list)
241
+ feature_execution.add_features(subject_feature_list)
242
+
243
+ feature_execution.upload_execution_outputs()
230
244
 
231
245
 
232
- def create_domain_schema(model: Model, sname: str) -> None:
246
+ def create_domain_schema(ml_instance: DerivaML, sname: str) -> None:
233
247
  """
234
248
  Create a domain schema. Assumes that the ml-schema has already been created.
235
249
  :param model:
@@ -238,28 +252,19 @@ def create_domain_schema(model: Model, sname: str) -> None:
238
252
  """
239
253
 
240
254
  # Make sure that we have a ml schema
241
- _ = model.schemas["deriva-ml"]
255
+ _ = ml_instance.model.schemas["deriva-ml"]
242
256
 
243
- if model.schemas.get(sname):
257
+ if ml_instance.model.schemas.get(sname):
244
258
  # Clean out any old junk....
245
- model.schemas[sname].drop()
259
+ ml_instance.model.schemas[sname].drop()
246
260
 
247
- domain_schema = model.create_schema(
261
+ domain_schema = ml_instance.model.model.create_schema(
248
262
  Schema.define(sname, annotations={"name_style": {"underline_space": True}})
249
263
  )
250
264
  subject_table = domain_schema.create_table(
251
265
  Table.define("Subject", column_defs=[Column.define("Name", builtin_types.text)])
252
266
  )
253
-
254
- image_table = domain_schema.create_table(
255
- Table.define_asset(
256
- sname=sname,
257
- tname="Image",
258
- hatrac_template="/hatrac/image_asset/{{MD5}}.{{Filename}}",
259
- column_defs=[Column.define("Name", builtin_types.text)],
260
- )
261
- )
262
- image_table.create_reference(subject_table)
267
+ ml_instance.create_asset("Image", referenced_tables=[subject_table])
263
268
 
264
269
 
265
270
  def destroy_demo_catalog(catalog):
@@ -284,13 +289,14 @@ def create_demo_catalog(
284
289
 
285
290
  try:
286
291
  create_ml_schema(model, project_name=project_name)
287
- create_domain_schema(model, domain_schema)
288
292
  deriva_ml = DerivaML(
289
293
  hostname=hostname,
290
294
  catalog_id=test_catalog.catalog_id,
291
295
  project_name=project_name,
296
+ domain_schema=domain_schema,
292
297
  logging_level=logging.WARN,
293
298
  )
299
+ create_domain_schema(deriva_ml, domain_schema)
294
300
  working_dir = deriva_ml.working_dir
295
301
  dataset_table = deriva_ml.dataset_table
296
302
  dataset_table.annotations.update(
@@ -186,9 +186,9 @@ class MLVocab(StrEnum):
186
186
 
187
187
  dataset_type = "Dataset_Type"
188
188
  workflow_type = "Workflow_Type"
189
- execution_asset_type = "Execution_Asset_Type"
190
- execution_metadata_type = "Execution_Metadata_Type"
191
189
  file_type = "File_Type"
190
+ asset_type = "Asset_Type"
191
+ asset_role = "Asset_Role"
192
192
 
193
193
 
194
194
  class ExecMetadataVocab(StrEnum):
@@ -31,7 +31,6 @@ from deriva.core.datapath import DataPathException
31
31
  from deriva.core.deriva_server import DerivaServer
32
32
  from deriva.core.ermrest_catalog import ResolveRidResult
33
33
  from deriva.core.ermrest_model import Key, Table
34
- from deriva.core.hatrac_store import HatracStore
35
34
  from deriva.core.utils.globus_auth_utils import GlobusNativeLogin
36
35
  from pydantic import validate_call, ConfigDict
37
36
  from requests import RequestException
@@ -42,24 +41,17 @@ from .dataset import Dataset
42
41
  from .dataset_aux_classes import DatasetSpec
43
42
  from .dataset_bag import DatasetBag
44
43
  from .deriva_model import DerivaModel
45
- from .upload import (
46
- table_path,
47
- execution_rids,
48
- execution_metadata_dir,
49
- upload_directory,
50
- UploadAssetDirectory,
51
- )
44
+ from .upload import table_path, execution_rids, asset_file_path
52
45
  from .deriva_definitions import ColumnDefinition
53
- from .deriva_definitions import ExecMetadataVocab
54
46
  from .deriva_definitions import (
55
47
  RID,
56
48
  Status,
57
- FileUploadState,
58
49
  DerivaMLException,
59
50
  ML_SCHEMA,
60
51
  VocabularyTerm,
61
52
  MLVocab,
62
53
  FileSpec,
54
+ TableDefinition,
63
55
  )
64
56
 
65
57
  try:
@@ -346,30 +338,6 @@ class DerivaML(Dataset):
346
338
  table=self.model.name_to_table(table).name,
347
339
  )
348
340
 
349
- def asset_dir(
350
- self, table: str | Table, prefix: Optional[str | Path] = None
351
- ) -> UploadAssetDirectory:
352
- """Return a local file path in which to place a files for an asset table. T
353
-
354
- Args:
355
- table: Location of where to place files. Defaults to execution_assets_path.
356
- prefix: Root path to asset directory.
357
-
358
- Returns:
359
- Path to the directory in which asset files should be placed.
360
- """
361
- table = self.model.name_to_table(table)
362
- if not self.model.is_asset(table):
363
- raise DerivaMLException(f"The table {table} is not an asset table.")
364
-
365
- prefix = Path(prefix) if prefix else self.working_dir
366
- return UploadAssetDirectory(
367
- model=self.model,
368
- prefix=prefix,
369
- schema=table.schema.name,
370
- table=table.name,
371
- )
372
-
373
341
  def download_dir(self, cached: bool = False) -> Path:
374
342
  """Location where downloaded files are placed.
375
343
 
@@ -532,10 +500,17 @@ class DerivaML(Dataset):
532
500
  )
533
501
  )
534
502
 
503
+ def create_table(self, table: TableDefinition) -> Table:
504
+ """Create a table from a table definition."""
505
+ return self.model.schemas[self.domain_schema].create_table(table.model_dump())
506
+
507
+ @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
535
508
  def create_asset(
536
509
  self,
537
510
  asset_name: str,
538
511
  column_defs: Optional[Iterable[ColumnDefinition]] = None,
512
+ fkey_defs: Optional[Iterable[ColumnDefinition]] = None,
513
+ referenced_tables: Optional[Iterable[Table]] = None,
539
514
  comment: str = "",
540
515
  schema: Optional[str] = None,
541
516
  ) -> Table:
@@ -544,6 +519,8 @@ class DerivaML(Dataset):
544
519
  Args:
545
520
  asset_name: Name of the asset table.
546
521
  column_defs: Iterable of ColumnDefinition objects to provide additional metadata for asset.
522
+ fkey_defs: Iterable of ForeignKeyDefinition objects to provide additional metadata for asset.
523
+ referenced_tables: Iterable of Table objects to which asset should provide foreign-key references to.
547
524
  comment: Description of the asset table. (Default value = '')
548
525
  schema: Schema in which to create the asset table. Defaults to domain_schema.
549
526
  asset_name: str:
@@ -553,17 +530,82 @@ class DerivaML(Dataset):
553
530
  Table object for the asset table.
554
531
  """
555
532
  column_defs = column_defs or []
533
+ fkey_defs = fkey_defs or []
534
+ referenced_tables = referenced_tables or []
556
535
  schema = schema or self.domain_schema
536
+
537
+ self.add_term(
538
+ MLVocab.asset_type, asset_name, description=f"A {asset_name} asset"
539
+ )
557
540
  asset_table = self.model.schemas[schema].create_table(
558
541
  Table.define_asset(
559
542
  schema,
560
543
  asset_name,
561
544
  column_defs=[c.model_dump() for c in column_defs],
545
+ fkey_defs=[fk.model_dump() for fk in fkey_defs],
562
546
  comment=comment,
563
547
  )
564
548
  )
549
+
550
+ self.model.schemas[self.domain_schema].create_table(
551
+ Table.define_association(
552
+ [
553
+ (asset_table.name, asset_table),
554
+ ("Asset_Type", self.model.name_to_table("Asset_Type")),
555
+ ]
556
+ )
557
+ )
558
+ for t in referenced_tables:
559
+ asset_table.create_reference(self.model.name_to_table(t))
560
+ # Create a table to track execution that creates the asset
561
+ atable = self.model.schemas[self.domain_schema].create_table(
562
+ Table.define_association(
563
+ [
564
+ (asset_name, asset_table),
565
+ (
566
+ "Execution",
567
+ self.model.schemas[self.ml_schema].tables["Execution"],
568
+ ),
569
+ ]
570
+ )
571
+ )
572
+ atable.create_reference(self.model.name_to_table("Asset_Role"))
565
573
  return asset_table
566
574
 
575
+ # @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
576
+ def list_assets(self, asset_table: Table | str):
577
+ """Return the contents of an asset table"""
578
+
579
+ if not self.model.is_asset(asset_table):
580
+ raise DerivaMLException(f"Table {asset_table.name} is not an asset")
581
+ asset_table = self.model.name_to_table(asset_table)
582
+ pb = self._model.catalog.getPathBuilder()
583
+ asset_path = pb.schemas[asset_table.schema.name].tables[asset_table.name]
584
+
585
+ asset_type_table = self._model.find_association(asset_table, MLVocab.asset_type)
586
+ type_path = pb.schemas[asset_type_table.schema.name].tables[
587
+ asset_type_table.name
588
+ ]
589
+
590
+ # Get a list of all the asset_type values associated with this dataset_table.
591
+ assets = []
592
+ for asset in asset_path.entities().fetch():
593
+ asset_types = (
594
+ type_path.filter(type_path.columns[asset_table.name] == asset["RID"])
595
+ .attributes(type_path.Asset_Type)
596
+ .fetch()
597
+ )
598
+ assets.append(
599
+ asset
600
+ | {
601
+ MLVocab.asset_type.value: [
602
+ asset_type[MLVocab.asset_type.value]
603
+ for asset_type in asset_types
604
+ ]
605
+ }
606
+ )
607
+ return assets
608
+
567
609
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
568
610
  def create_feature(
569
611
  self,
@@ -717,24 +759,6 @@ class DerivaML(Dataset):
717
759
  """
718
760
  return self.model.find_features(table)
719
761
 
720
- @validate_call
721
- def add_features(self, features: Iterable[FeatureRecord]) -> int:
722
- """Add a set of new feature values to the catalog.
723
-
724
- Args:
725
- features: Iterable[FeatureRecord]:
726
-
727
- Returns:
728
- Number of attributes added
729
- """
730
- features = list(features)
731
- feature_table = features[0].feature.feature_table
732
- feature_path = self.pathBuilder.schemas[feature_table.schema.name].tables[
733
- feature_table.name
734
- ]
735
- entries = feature_path.insert(f.model_dump() for f in features)
736
- return len(entries)
737
-
738
762
  # noinspection PyProtectedMember
739
763
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
740
764
  def list_feature_values(
@@ -838,7 +862,8 @@ class DerivaML(Dataset):
838
862
  raise DerivaMLException(f"The table {table} is not a controlled vocabulary")
839
863
  schema_name, table_name = vocab_table.schema.name, vocab_table.name
840
864
  schema_path = self.catalog.getPathBuilder().schemas[schema_name]
841
- for term in schema_path.tables[table_name].entities():
865
+
866
+ for term in schema_path.tables[table_name].entities().fetch():
842
867
  if term_name == term["Name"] or (
843
868
  term["Synonyms"] and term_name in term["Synonyms"]
844
869
  ):
@@ -891,65 +916,6 @@ class DerivaML(Dataset):
891
916
  snapshot_catalog=DerivaML(self.host_name, self._version_snapshot(dataset)),
892
917
  )
893
918
 
894
- @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
895
- def download_asset(self, asset_rid: RID, dest_dir: Path) -> Path:
896
- """Download an asset from a URL and place it in a local directory.
897
-
898
- Args:
899
- asset_rid: URL of the asset.
900
- dest_dir: Destination directory for the asset.
901
-
902
- Returns:
903
- A Path object to the downloaded asset.
904
- """
905
- table = self.resolve_rid(asset_rid).table
906
- if not self.model.is_asset(table):
907
- raise DerivaMLException(f"RID {asset_rid} is not for an asset table.")
908
-
909
- tpath = self.pathBuilder.schemas[table.schema.name].tables[table.name]
910
- asset_metadata = list(tpath.filter(tpath.RID == asset_rid).entities())[0]
911
- asset_url = asset_metadata["URL"]
912
- asset_filename = dest_dir / asset_metadata["Filename"]
913
-
914
- hs = HatracStore("https", self.host_name, self.credential)
915
- hs.get_obj(path=asset_url, destfilename=asset_filename.as_posix())
916
- return Path(asset_filename)
917
-
918
- @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
919
- def upload_assets(
920
- self,
921
- assets_dir: str | Path | UploadAssetDirectory,
922
- ) -> dict[Any, FileUploadState] | None:
923
- """Upload assets from a directory.
924
-
925
- This routine assumes that the current upload specification includes a configuration for the specified directory.
926
- Every asset in the specified directory is uploaded
927
-
928
- Args:
929
- assets_dir: Directory containing the assets to upload.
930
-
931
- Returns:
932
- Results of the upload operation.
933
-
934
- Raises:
935
- DerivaMLException: If there is an issue uploading the assets.
936
- """
937
-
938
- def path_to_asset(path: str) -> str:
939
- """Pull the asset name out of a path to that asset in the filesystem"""
940
- components = path.split("/")
941
- return components[
942
- components.index("asset") + 2
943
- ] # Look for asset in the path to find the name
944
-
945
- if isinstance(assets_dir, UploadAssetDirectory):
946
- assets_dir = assets_dir.path
947
-
948
- if not self.model.is_asset(Path(assets_dir).name):
949
- raise DerivaMLException("Directory does not have name of an asset table.")
950
- results = upload_directory(self.model, assets_dir)
951
- return {path_to_asset(p): r for p, r in results.items()}
952
-
953
919
  def _update_status(
954
920
  self, new_status: Status, status_detail: str, execution_rid: RID
955
921
  ):
@@ -1205,7 +1171,7 @@ class DerivaML(Dataset):
1205
1171
 
1206
1172
  """
1207
1173
 
1208
- # Get repo URL from local github repo.
1174
+ # Get repo URL from local gitHub repo.
1209
1175
  try:
1210
1176
  result = subprocess.run(
1211
1177
  ["git", "remote", "get-url", "origin"],
@@ -1261,7 +1227,7 @@ class DerivaML(Dataset):
1261
1227
 
1262
1228
  Args:
1263
1229
  configuration: ExecutionConfiguration:
1264
- dryrun: Do not create an execution record or upload results.
1230
+ dry_run: Do not create an execution record or upload results.
1265
1231
 
1266
1232
  Returns:
1267
1233
  An execution object.
@@ -1283,13 +1249,11 @@ class DerivaML(Dataset):
1283
1249
  raise DerivaMLException(f"Multiple execution RIDs were found {e_rids}.")
1284
1250
 
1285
1251
  execution_rid = e_rids[0]
1286
- cfile = (
1287
- execution_metadata_dir(
1288
- self.working_dir,
1289
- exec_rid=execution_rid,
1290
- metadata_type=ExecMetadataVocab.execution_config.value,
1291
- )
1292
- / "configuration.json"
1252
+ cfile = asset_file_path(
1253
+ prefix=self.working_dir,
1254
+ exec_rid=execution_rid,
1255
+ file_name="configuration.json",
1256
+ asset_table=self.model.name_to_table("Execution_Metadata"),
1293
1257
  )
1294
1258
  configuration = ExecutionConfiguration.load_configuration(cfile)
1295
1259
  return Execution(configuration, self, reload=execution_rid)