deriva-ml 1.13.3__py3-none-any.whl → 1.14.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. deriva_ml/__init__.py +25 -30
  2. deriva_ml/core/__init__.py +39 -0
  3. deriva_ml/core/base.py +1489 -0
  4. deriva_ml/core/constants.py +36 -0
  5. deriva_ml/core/definitions.py +74 -0
  6. deriva_ml/core/enums.py +222 -0
  7. deriva_ml/core/ermrest.py +288 -0
  8. deriva_ml/core/exceptions.py +28 -0
  9. deriva_ml/core/filespec.py +116 -0
  10. deriva_ml/dataset/__init__.py +4 -0
  11. deriva_ml/{dataset_aux_classes.py → dataset/aux_classes.py} +16 -12
  12. deriva_ml/{dataset.py → dataset/dataset.py} +408 -416
  13. deriva_ml/{dataset_bag.py → dataset/dataset_bag.py} +137 -97
  14. deriva_ml/{history.py → dataset/history.py} +52 -33
  15. deriva_ml/{upload.py → dataset/upload.py} +48 -70
  16. deriva_ml/demo_catalog.py +233 -183
  17. deriva_ml/execution/environment.py +290 -0
  18. deriva_ml/{execution.py → execution/execution.py} +365 -252
  19. deriva_ml/execution/execution_configuration.py +163 -0
  20. deriva_ml/{execution_configuration.py → execution/workflow.py} +206 -218
  21. deriva_ml/feature.py +83 -46
  22. deriva_ml/model/__init__.py +0 -0
  23. deriva_ml/{deriva_model.py → model/catalog.py} +113 -132
  24. deriva_ml/{database_model.py → model/database.py} +52 -74
  25. deriva_ml/model/sql_mapper.py +44 -0
  26. deriva_ml/run_notebook.py +19 -11
  27. deriva_ml/schema/__init__.py +3 -0
  28. deriva_ml/{schema_setup → schema}/annotations.py +31 -22
  29. deriva_ml/schema/check_schema.py +104 -0
  30. deriva_ml/{schema_setup → schema}/create_schema.py +151 -104
  31. deriva_ml/schema/deriva-ml-reference.json +8525 -0
  32. deriva_ml/schema/table_comments_utils.py +57 -0
  33. {deriva_ml-1.13.3.dist-info → deriva_ml-1.14.26.dist-info}/METADATA +5 -4
  34. deriva_ml-1.14.26.dist-info/RECORD +40 -0
  35. {deriva_ml-1.13.3.dist-info → deriva_ml-1.14.26.dist-info}/entry_points.txt +1 -0
  36. deriva_ml/deriva_definitions.py +0 -372
  37. deriva_ml/deriva_ml_base.py +0 -1046
  38. deriva_ml/execution_environment.py +0 -139
  39. deriva_ml/schema_setup/table_comments_utils.py +0 -56
  40. deriva_ml/test-files/execution-parameters.json +0 -1
  41. deriva_ml/test-files/notebook-parameters.json +0 -5
  42. deriva_ml/test_functions.py +0 -141
  43. deriva_ml/test_notebook.ipynb +0 -197
  44. deriva_ml-1.13.3.dist-info/RECORD +0 -31
  45. /deriva_ml/{schema_setup → execution}/__init__.py +0 -0
  46. /deriva_ml/{schema_setup → schema}/policy.json +0 -0
  47. {deriva_ml-1.13.3.dist-info → deriva_ml-1.14.26.dist-info}/WHEEL +0 -0
  48. {deriva_ml-1.13.3.dist-info → deriva_ml-1.14.26.dist-info}/licenses/LICENSE +0 -0
  49. {deriva_ml-1.13.3.dist-info → deriva_ml-1.14.26.dist-info}/top_level.txt +0 -0
@@ -1,52 +1,78 @@
1
- """
2
- This module defined the Execution class which is used to interact with the state of an active execution.
1
+ """Execution management for DerivaML.
2
+
3
+ This module provides functionality for managing and tracking executions in DerivaML. An execution
4
+ represents a computational or manual process that operates on datasets and produces outputs.
5
+ The module includes:
6
+
7
+ - Execution class: Core class for managing execution state and context
8
+ - Asset management: Track input and output files
9
+ - Status tracking: Monitor and update execution progress
10
+ - Dataset handling: Download and materialize required datasets
11
+ - Provenance tracking: Record relationships between inputs, processes, and outputs
12
+
13
+ The Execution class serves as the primary interface for managing the lifecycle of a computational
14
+ or manual process within DerivaML.
15
+
16
+ Typical usage example:
17
+ >>> config = ExecutionConfiguration(workflow="analysis_workflow", description="Data analysis")
18
+ >>> with ml.create_execution(config) as execution:
19
+ ... execution.download_dataset_bag(dataset_spec)
20
+ ... # Run analysis
21
+ ... execution.upload_execution_outputs()
3
22
  """
4
23
 
5
24
  from __future__ import annotations
6
25
 
7
- from collections import defaultdict
8
- from datetime import datetime
9
26
  import json
10
27
  import logging
11
28
  import os
12
- from pathlib import Path
13
-
14
- from pydantic import validate_call, ConfigDict
15
- import sys
16
29
  import shutil
17
- from typing import Iterable, Any, Optional
30
+ import sys
31
+ from collections import defaultdict
32
+ from datetime import datetime
33
+ from pathlib import Path
34
+ from typing import Any, Iterable, List
18
35
 
19
36
  from deriva.core import format_exception
20
37
  from deriva.core.hatrac_store import HatracStore
21
- from .deriva_definitions import (
38
+ from pydantic import ConfigDict, validate_call
39
+
40
+ from deriva_ml.core.base import DerivaML
41
+ from deriva_ml.core.definitions import (
42
+ DRY_RUN_RID,
22
43
  RID,
23
- Status,
24
- FileUploadState,
25
- DerivaMLException,
26
- MLVocab,
27
- MLAsset,
28
- ExecMetadataType,
29
44
  ExecAssetType,
45
+ ExecMetadataType,
30
46
  FileSpec,
31
- DRY_RUN_RID,
47
+ FileUploadState,
48
+ MLAsset,
49
+ MLVocab,
50
+ Status,
32
51
  )
33
- from .deriva_ml_base import DerivaML, FeatureRecord
34
- from .dataset_aux_classes import DatasetSpec, DatasetVersion, VersionPart
35
- from .dataset_bag import DatasetBag
36
- from .execution_configuration import ExecutionConfiguration, Workflow
37
- from .execution_environment import get_execution_environment
38
- from .upload import (
52
+ from deriva_ml.core.exceptions import DerivaMLException
53
+ from deriva_ml.dataset.aux_classes import DatasetSpec, DatasetVersion, VersionPart
54
+ from deriva_ml.dataset.dataset_bag import DatasetBag
55
+ from deriva_ml.dataset.upload import (
56
+ asset_file_path,
57
+ asset_root,
58
+ asset_type_path,
39
59
  execution_root,
40
60
  feature_root,
41
- asset_root,
42
61
  feature_value_path,
43
62
  is_feature_dir,
63
+ normalize_asset_dir,
44
64
  table_path,
45
65
  upload_directory,
46
- normalize_asset_dir,
47
- asset_file_path,
48
- asset_type_path,
49
66
  )
67
+ from deriva_ml.execution.environment import get_execution_environment
68
+ from deriva_ml.execution.execution_configuration import ExecutionConfiguration
69
+ from deriva_ml.execution.workflow import Workflow
70
+ from deriva_ml.feature import FeatureRecord
71
+
72
+ # Keep pycharm from complaining about undefined references in docstrings.
73
+ execution: Execution
74
+ ml: DerivaML
75
+ dataset_spec: DatasetSpec
50
76
 
51
77
  try:
52
78
  from icecream import ic
@@ -55,7 +81,7 @@ except ImportError: # Graceful fallback if IceCream isn't installed.
55
81
 
56
82
 
57
83
  try:
58
- from IPython.display import display, Markdown
84
+ from IPython.display import Markdown, display
59
85
  except ImportError:
60
86
 
61
87
  def display(s):
@@ -69,16 +95,27 @@ except ImportError:
69
95
  if sys.version_info >= (3, 12):
70
96
 
71
97
  class AssetFilePath(Path):
72
- """
73
- Create a new Path object that has additional information related to the use of this path as an asset.
74
-
75
- Args:
76
- asset_path: Local path to the location of the asset.
77
- asset_name: The name of the asset in the catalog (e.g. the asset table name).
78
- file_name: Name of the local file that contains the contents of the asset.
79
- asset_metadata: Any additional columns associated with this asset beyond the URL, Length, and checksum.
80
- asset_types: A list of terms from the Asset_Type controlled vocabulary.
81
- asset_rid: The RID of the asset if it has been uploaded into an asset table
98
+ """Extended Path class for managing asset files.
99
+
100
+ Represents a file path with additional metadata about its role as an asset in the catalog.
101
+ This class extends the standard Path class to include information about the asset's
102
+ catalog representation and type.
103
+
104
+ Attributes:
105
+ asset_name (str): Name of the asset in the catalog (e.g., asset table name).
106
+ file_name (str): Name of the local file containing the asset.
107
+ asset_metadata (dict[str, Any]): Additional columns beyond URL, Length, and checksum.
108
+ asset_types (list[str]): Terms from the Asset_Type controlled vocabulary.
109
+ asset_rid (RID | None): Resource Identifier if uploaded to an asset table.
110
+
111
+ Example:
112
+ >>> path = AssetFilePath(
113
+ ... "/path/to/file.txt",
114
+ ... asset_name="analysis_output",
115
+ ... file_name="results.txt",
116
+ ... asset_metadata={"version": "1.0"},
117
+ ... asset_types=["text", "results"]
118
+ ... )
82
119
  """
83
120
 
84
121
  def __init__(
@@ -88,16 +125,23 @@ if sys.version_info >= (3, 12):
88
125
  file_name: str,
89
126
  asset_metadata: dict[str, Any],
90
127
  asset_types: list[str] | str,
91
- asset_rid: Optional["RID"] = None,
128
+ asset_rid: RID | None = None,
92
129
  ):
130
+ """Initializes an AssetFilePath instance.
131
+
132
+ Args:
133
+ asset_path: Local path to the asset file.
134
+ asset_name: Name of the asset in the catalog.
135
+ file_name: Name of the local file.
136
+ asset_metadata: Additional metadata columns.
137
+ asset_types: One or more asset type terms.
138
+ asset_rid: Optional Resource Identifier if already in catalog.
139
+ """
93
140
  super().__init__(asset_path)
94
- # These assignments happen after __new__ returns the instance
95
141
  self.asset_name = asset_name
96
142
  self.file_name = file_name
97
143
  self.asset_metadata = asset_metadata
98
- self.asset_types = (
99
- asset_types if isinstance(asset_types, list) else [asset_types]
100
- )
144
+ self.asset_types = asset_types if isinstance(asset_types, list) else [asset_types]
101
145
  self.asset_rid = asset_rid
102
146
  else:
103
147
 
@@ -105,9 +149,9 @@ else:
105
149
  """
106
150
  Create a new Path object that has additional information related to the use of this path as an asset.
107
151
 
108
- Args:
152
+ Attrubytes:
109
153
  asset_path: Local path to the location of the asset.
110
- asset_name: The name of the asset in the catalog (e.g. the asset table name).
154
+ asset_name: The name of the asset in the catalog (e.g., the asset table name).
111
155
  file_name: Name of the local file that contains the contents of the asset.
112
156
  asset_metadata: Any additional columns associated with this asset beyond the URL, Length, and checksum.
113
157
  asset_types: A list of terms from the Asset_Type controlled vocabulary.
@@ -121,65 +165,76 @@ else:
121
165
  file_name: str,
122
166
  asset_metadata: dict[str, Any],
123
167
  asset_types: list[str] | str,
124
- asset_rid: Optional["RID"] = None,
168
+ asset_rid: RID | None = None,
125
169
  ):
126
170
  # Only pass the path to the base Path class
127
171
  obj = super().__new__(cls, asset_path)
128
172
  obj.asset_name = asset_name
129
173
  obj.file_name = file_name
130
174
  obj.asset_metadata = asset_metadata
131
- obj.asset_types = (
132
- asset_types if isinstance(asset_types, list) else [asset_types]
133
- )
175
+ obj.asset_types = asset_types if isinstance(asset_types, list) else [asset_types]
134
176
  obj.asset_rid = asset_rid
135
177
  return obj
136
178
 
137
179
 
138
180
  class Execution:
139
- """The Execution class is used to capture the context of an activity within DerivaML. While these are primarily
140
- computational, manual processes can be represented by an execution as well.
141
-
142
- Within DerivaML, Executions are used to provide providence. Every dataset_table and data file that is generated is
143
- associated with an execution, which records which program and input parameters were used to generate that data.
144
-
145
- Execution objects are created from an ExecutionConfiguration, which provides information about what DerivaML
146
- datasets will be used, what additional files (assets) are required, what code is being run (Workflow) and an
147
- optional description of the Execution. Side effects of creating an execution object are:
148
-
149
- 1. An execution record is created in the catalog and the RID of that record recorded,
150
- 2. Any specified datasets are downloaded and materialized
151
- 3. Any additional required assets are downloaded.
181
+ """Manages the lifecycle and context of a DerivaML execution.
152
182
 
153
- Once execution is complete, a method can be called to upload any data produced by the execution. In addition, the
154
- Execution object provides methods for locating where to find downloaded datasets and assets, and also where to
155
- place any data that may be uploaded.
183
+ An Execution represents a computational or manual process within DerivaML. It provides:
184
+ - Dataset materialization and access
185
+ - Asset management (inputs and outputs)
186
+ - Status tracking and updates
187
+ - Provenance recording
188
+ - Result upload and cataloging
156
189
 
157
- Finally, the execution object can update its current state in the DerivaML catalog, allowing users to remotely
158
- track the progress of their execution.
190
+ The class handles downloading required datasets and assets, tracking execution state,
191
+ and managing the upload of results. Every dataset and file generated is associated
192
+ with an execution record for provenance tracking.
159
193
 
160
194
  Attributes:
161
- dataset_rids (list[RID]): The RIDs of the datasets to be downloaded and materialized as part of the execution.
162
- datasets (list[DatasetBag]): List of datasetBag objects that referred the materialized datasets specified in.
163
- `dataset_rids`.
164
- configuration (ExecutionConfiguration): The configuration of the execution.
165
- workflow_rid (RID): The RID of the workflow associated with the execution.
166
- status (Status): The status of the execution.
195
+ dataset_rids (list[RID]): RIDs of datasets used in the execution.
196
+ datasets (list[DatasetBag]): Materialized dataset objects.
197
+ configuration (ExecutionConfiguration): Execution settings and parameters.
198
+ workflow_rid (RID): RID of the associated workflow.
199
+ status (Status): Current execution status.
200
+ asset_paths (list[AssetFilePath]): Paths to execution assets.
201
+ parameters (dict): Execution parameters.
202
+ start_time (datetime | None): When execution started.
203
+ stop_time (datetime | None): When execution completed.
204
+
205
+ Example:
206
+ >>> config = ExecutionConfiguration(
207
+ ... workflow="analysis",
208
+ ... description="Process samples",
209
+ ... parameters={"threshold": 0.5}
210
+ ... )
211
+ >>> with ml.create_execution(config) as execution:
212
+ ... execution.download_dataset_bag(dataset_spec)
213
+ ... # Run analysis
214
+ ... execution.upload_execution_outputs()
167
215
  """
168
216
 
169
217
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
170
218
  def __init__(
171
219
  self,
172
220
  configuration: ExecutionConfiguration,
173
- ml_object: "DerivaML",
174
- reload: Optional[RID] = None,
221
+ ml_object: DerivaML,
222
+ reload: RID | None = None,
175
223
  dry_run: bool = False,
176
224
  ):
177
- """
225
+ """Initializes an Execution instance.
226
+
227
+ Creates a new execution or reloads an existing one. Initializes the execution
228
+ environment, downloads required datasets, and sets up asset tracking.
178
229
 
179
230
  Args:
180
- configuration: Execution configuration object that describes the execution.
181
- ml_object: The DerivaML instance that created the execution.
182
- reload: RID of a previously initialized execution object.
231
+ configuration: Settings and parameters for the execution.
232
+ ml_object: DerivaML instance managing the execution.
233
+ reload: Optional RID of existing execution to reload.
234
+ dry_run: If True, don't create catalog records or upload results.
235
+
236
+ Raises:
237
+ DerivaMLException: If initialization fails or configuration is invalid.
183
238
  """
184
239
  self.asset_paths: list[AssetFilePath] = []
185
240
  self.configuration = configuration
@@ -189,10 +244,10 @@ class Execution:
189
244
  self.start_time = None
190
245
  self.stop_time = None
191
246
  self.status = Status.created
192
- self.uploaded_assets: Optional[dict[str, list[AssetFilePath]]] = None
247
+ self.uploaded_assets: dict[str, list[AssetFilePath]] | None = None
193
248
  self.configuration.argv = sys.argv
194
249
 
195
- self.dataset_rids: list[RID] = []
250
+ self.dataset_rids: List[RID] = []
196
251
  self.datasets: list[DatasetBag] = []
197
252
  self.parameters = self.configuration.parameters
198
253
 
@@ -203,32 +258,21 @@ class Execution:
203
258
  # Make sure we have a good workflow.
204
259
  if isinstance(self.configuration.workflow, Workflow):
205
260
  self.workflow_rid = (
206
- self._ml_object.add_workflow(self.configuration.workflow)
207
- if not self._dry_run
208
- else DRY_RUN_RID
261
+ self._ml_object.add_workflow(self.configuration.workflow) if not self._dry_run else DRY_RUN_RID
209
262
  )
210
263
  else:
211
264
  self.workflow_rid = self.configuration.workflow
212
- if (
213
- self._ml_object.resolve_rid(configuration.workflow).table.name
214
- != "Workflow"
215
- ):
216
- raise DerivaMLException(
217
- "Workflow specified in execution configuration is not a Workflow"
218
- )
265
+ if self._ml_object.resolve_rid(configuration.workflow).table.name != "Workflow":
266
+ raise DerivaMLException("Workflow specified in execution configuration is not a Workflow")
219
267
 
220
268
  # Validate the datasets and assets to be valid.
221
269
  for d in self.configuration.datasets:
222
270
  if self._ml_object.resolve_rid(d.rid).table.name != "Dataset":
223
- raise DerivaMLException(
224
- "Dataset specified in execution configuration is not a dataset"
225
- )
271
+ raise DerivaMLException("Dataset specified in execution configuration is not a dataset")
226
272
 
227
273
  for a in self.configuration.assets:
228
274
  if not self._model.is_asset(self._ml_object.resolve_rid(a).table.name):
229
- raise DerivaMLException(
230
- "Asset specified in execution configuration is not a asset table"
231
- )
275
+ raise DerivaMLException("Asset specified in execution configuration is not a asset table")
232
276
 
233
277
  schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
234
278
  if reload:
@@ -247,16 +291,11 @@ class Execution:
247
291
  ]
248
292
  )[0]["RID"]
249
293
 
250
- if (
251
- isinstance(self.configuration.workflow, Workflow)
252
- and self.configuration.workflow.is_notebook
253
- ):
254
- # Put execution_rid into cell output so we can find it later.
255
- display(
256
- Markdown(f"Execution RID: {self._ml_object.cite(self.execution_rid)}")
257
- )
294
+ if isinstance(self.configuration.workflow, Workflow) and self.configuration.workflow.is_notebook:
295
+ # Put execution_rid into the cell output so we can find it later.
296
+ display(Markdown(f"Execution RID: {self._ml_object.cite(self.execution_rid)}"))
258
297
 
259
- # Create a directory for execution rid so we can recover state in case of a crash.
298
+ # Create a directory for execution rid so we can recover the state in case of a crash.
260
299
  execution_root(prefix=self._ml_object.working_dir, exec_rid=self.execution_rid)
261
300
  self._initialize_execution(reload)
262
301
 
@@ -266,12 +305,12 @@ class Execution:
266
305
  f"environment_snapshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
267
306
  ExecMetadataType.runtime_env.value,
268
307
  )
269
- with open(runtime_env_path, "w") as fp:
308
+ with Path(runtime_env_path).open("w") as fp:
270
309
  json.dump(get_execution_environment(), fp)
271
310
 
272
- def _initialize_execution(self, reload: Optional[RID] = None) -> None:
273
- """Initialize the execution by a configuration in the Execution_Metadata table.
274
- Setup working directory and download all the assets and data.
311
+ def _initialize_execution(self, reload: RID | None = None) -> None:
312
+ """Initialize the execution by a configuration in the Execution_Metadata table.
313
+ Set up a working directory and download all the assets and data.
275
314
 
276
315
  :raise DerivaMLException: If there is an issue initializing the execution.
277
316
 
@@ -283,9 +322,7 @@ class Execution:
283
322
  """
284
323
  # Materialize bdbag
285
324
  for dataset in self.configuration.datasets:
286
- self.update_status(
287
- Status.initializing, f"Materialize bag {dataset.rid}... "
288
- )
325
+ self.update_status(Status.initializing, f"Materialize bag {dataset.rid}... ")
289
326
  self.datasets.append(self.download_dataset_bag(dataset))
290
327
  self.dataset_rids.append(dataset.rid)
291
328
 
@@ -293,10 +330,7 @@ class Execution:
293
330
  schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
294
331
  if self.dataset_rids and not (reload or self._dry_run):
295
332
  schema_path.Dataset_Execution.insert(
296
- [
297
- {"Dataset": d, "Execution": self.execution_rid}
298
- for d in self.dataset_rids
299
- ]
333
+ [{"Dataset": d, "Execution": self.execution_rid} for d in self.dataset_rids]
300
334
  )
301
335
 
302
336
  # Download assets....
@@ -305,9 +339,7 @@ class Execution:
305
339
  for asset_rid in self.configuration.assets:
306
340
  asset_table = self._ml_object.resolve_rid(asset_rid).table.name
307
341
  dest_dir = (
308
- execution_root(self._ml_object.working_dir, self.execution_rid)
309
- / "downloaded-assets"
310
- / asset_table
342
+ execution_root(self._ml_object.working_dir, self.execution_rid) / "downloaded-assets" / asset_table
311
343
  )
312
344
  dest_dir.mkdir(parents=True, exist_ok=True)
313
345
  self.asset_paths.setdefault(asset_table, []).append(
@@ -325,7 +357,7 @@ class Execution:
325
357
  "configuration.json",
326
358
  ExecMetadataType.execution_config.value,
327
359
  )
328
- with open(cfile.as_posix(), "w", encoding="utf-8") as config_file:
360
+ with Path(cfile).open("w", encoding="utf-8") as config_file:
329
361
  json.dump(self.configuration.model_dump(), config_file)
330
362
 
331
363
  for parameter_file in self.configuration.parameters:
@@ -355,7 +387,7 @@ class Execution:
355
387
 
356
388
  @property
357
389
  def _feature_root(self) -> Path:
358
- """The root path to all execution specific files.
390
+ """The root path to all execution-specific files.
359
391
  :return:
360
392
 
361
393
  Args:
@@ -367,7 +399,7 @@ class Execution:
367
399
 
368
400
  @property
369
401
  def _asset_root(self) -> Path:
370
- """The root path to all execution specific files.
402
+ """The root path to all execution-specific files.
371
403
  :return:
372
404
 
373
405
  Args:
@@ -379,26 +411,47 @@ class Execution:
379
411
 
380
412
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
381
413
  def download_dataset_bag(self, dataset: DatasetSpec) -> DatasetBag:
382
- """Given a RID to a dataset_table, or a MINID to an existing bag, download the bag file, extract it and validate
383
- that all the metadata is correct
414
+ """Downloads and materializes a dataset for use in the execution.
415
+
416
+ Downloads the specified dataset as a BDBag and materializes it in the execution's
417
+ working directory. The dataset version is determined by the DatasetSpec.
384
418
 
385
419
  Args:
386
- dataset: A dataset specification of a dataset_table or a minid to an existing bag.
420
+ dataset: Specification of the dataset to download, including version and
421
+ materialization options.
387
422
 
388
423
  Returns:
389
- the location of the unpacked and validated dataset_table bag and the RID of the bag
424
+ DatasetBag: Object containing:
425
+ - path: Local filesystem path to downloaded dataset
426
+ - rid: Dataset's Resource Identifier
427
+ - minid: Dataset's Minimal Viable Identifier
428
+
429
+ Raises:
430
+ DerivaMLException: If download or materialization fails.
431
+
432
+ Example:
433
+ >>> spec = DatasetSpec(rid="1-abc123", version="1.2.0")
434
+ >>> bag = execution.download_dataset_bag(spec)
435
+ >>> print(f"Downloaded to {bag.path}")
390
436
  """
391
- return self._ml_object.download_dataset_bag(
392
- dataset, execution_rid=self.execution_rid
393
- )
437
+ return self._ml_object.download_dataset_bag(dataset, execution_rid=self.execution_rid)
394
438
 
395
439
  @validate_call
396
440
  def update_status(self, status: Status, msg: str) -> None:
397
- """Update the status information in the execution record in the DerivaML catalog.
441
+ """Updates the execution's status in the catalog.
442
+
443
+ Records a new status and associated message in the catalog, allowing remote
444
+ tracking of execution progress.
398
445
 
399
446
  Args:
400
- status: A value from the Status Enum
401
- msg: Additional information about the status
447
+ status: New status value (e.g., running, completed, failed).
448
+ msg: Description of the status change or current state.
449
+
450
+ Raises:
451
+ DerivaMLException: If status update fails.
452
+
453
+ Example:
454
+ >>> execution.update_status(Status.running, "Processing sample 1 of 10")
402
455
  """
403
456
  self.status = status
404
457
  self._logger.info(msg)
@@ -417,14 +470,36 @@ class Execution:
417
470
  )
418
471
 
419
472
  def execution_start(self) -> None:
420
- """Start an execution, uploading status to catalog"""
421
-
473
+ """Marks the execution as started.
474
+
475
+ Records the start time and updates the execution's status to 'running'.
476
+ This should be called before beginning the main execution work.
477
+
478
+ Example:
479
+ >>> execution.execution_start()
480
+ >>> try:
481
+ ... # Run analysis
482
+ ... execution.execution_stop()
483
+ ... except Exception:
484
+ ... execution.update_status(Status.failed, "Analysis error")
485
+ """
422
486
  self.start_time = datetime.now()
423
487
  self.uploaded_assets = None
424
488
  self.update_status(Status.initializing, "Start execution ...")
425
489
 
426
490
  def execution_stop(self) -> None:
427
- """Finish the execution and update the duration and status of execution."""
491
+ """Marks the execution as completed.
492
+
493
+ Records the stop time and updates the execution's status to 'completed'.
494
+ This should be called after all execution work is finished.
495
+
496
+ Example:
497
+ >>> try:
498
+ ... # Run analysis
499
+ ... execution.execution_stop()
500
+ ... except Exception:
501
+ ... execution.update_status(Status.failed, "Analysis error")
502
+ """
428
503
  self.stop_time = datetime.now()
429
504
  duration = self.stop_time - self.start_time
430
505
  hours, remainder = divmod(duration.total_seconds(), 3600)
@@ -433,22 +508,22 @@ class Execution:
433
508
 
434
509
  self.update_status(Status.completed, "Algorithm execution ended.")
435
510
  if not self._dry_run:
436
- self._ml_object.pathBuilder.schemas[
437
- self._ml_object.ml_schema
438
- ].Execution.update([{"RID": self.execution_rid, "Duration": duration}])
511
+ self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema].Execution.update(
512
+ [{"RID": self.execution_rid, "Duration": duration}]
513
+ )
439
514
 
440
515
  def _upload_execution_dirs(self) -> dict[str, list[AssetFilePath]]:
441
516
  """Upload execution assets at _working_dir/Execution_asset.
442
517
 
443
518
  This routine uploads the contents of the
444
- Execution_Asset directory, and then updates the execution_asset table in the ML schema to have references
519
+ Execution_Asset directory and then updates the execution_asset table in the ML schema to have references
445
520
  to these newly uploaded files.
446
521
 
447
522
  Returns:
448
523
  dict: Results of the upload operation.
449
524
 
450
525
  Raises:
451
- DerivaMLException: If there is an issue uploading the assets.
526
+ DerivaMLException: If there is an issue when uploading the assets.
452
527
  """
453
528
 
454
529
  try:
@@ -494,9 +569,7 @@ class Execution:
494
569
  return asset_map
495
570
 
496
571
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
497
- def download_asset(
498
- self, asset_rid: RID, dest_dir: Path, update_catalog=True
499
- ) -> AssetFilePath:
572
+ def download_asset(self, asset_rid: RID, dest_dir: Path, update_catalog=True) -> AssetFilePath:
500
573
  """Download an asset from a URL and place it in a local directory.
501
574
 
502
575
  Args:
@@ -513,25 +586,17 @@ class Execution:
513
586
  raise DerivaMLException(f"RID {asset_rid} is not for an asset table.")
514
587
 
515
588
  asset_record = self._ml_object.retrieve_rid(asset_rid)
516
- asset_metadata = {
517
- k: v
518
- for k, v in asset_record.items()
519
- if k in self._model.asset_metadata(asset_table)
520
- }
589
+ asset_metadata = {k: v for k, v in asset_record.items() if k in self._model.asset_metadata(asset_table)}
521
590
  asset_url = asset_record["URL"]
522
591
  asset_filename = dest_dir / asset_record["Filename"]
523
592
  hs = HatracStore("https", self._ml_object.host_name, self._ml_object.credential)
524
593
  hs.get_obj(path=asset_url, destfilename=asset_filename.as_posix())
525
594
 
526
- asset_type_table = self._model.find_association(asset_table, MLVocab.asset_type)
527
- type_path = self._ml_object.pathBuilder.schemas[
528
- asset_type_table.schema.name
529
- ].tables[asset_type_table.name]
595
+ asset_type_table, _col_l, _col_r = self._model.find_association(asset_table, MLVocab.asset_type)
596
+ type_path = self._ml_object.pathBuilder.schemas[asset_type_table.schema.name].tables[asset_type_table.name]
530
597
  asset_types = [
531
598
  asset_type[MLVocab.asset_type.value]
532
- for asset_type in type_path.filter(
533
- type_path.columns[asset_table.name] == asset_rid
534
- )
599
+ for asset_type in type_path.filter(type_path.columns[asset_table.name] == asset_rid)
535
600
  .attributes(type_path.Asset_Type)
536
601
  .fetch()
537
602
  ]
@@ -557,47 +622,58 @@ class Execution:
557
622
  self,
558
623
  assets_dir: str | Path,
559
624
  ) -> dict[Any, FileUploadState] | None:
560
- """Upload assets from a directory.
625
+ """Uploads assets from a directory to the catalog.
561
626
 
562
- This routine assumes that the current upload specification includes a configuration for the specified directory.
563
- Every asset in the specified directory is uploaded
627
+ Scans the specified directory for assets and uploads them to the catalog,
628
+ recording their metadata and types. Assets are organized by their types
629
+ and associated with the execution.
564
630
 
565
631
  Args:
566
- assets_dir: Directory containing the assets to upload.
632
+ assets_dir: Directory containing assets to upload.
567
633
 
568
634
  Returns:
569
- Results of the upload operation.
635
+ dict[Any, FileUploadState] | None: Mapping of assets to their upload states,
636
+ or None if no assets were found.
570
637
 
571
638
  Raises:
572
- DerivaMLException: If there is an issue uploading the assets.
639
+ DerivaMLException: If upload fails or assets are invalid.
640
+
641
+ Example:
642
+ >>> states = execution.upload_assets("output/results")
643
+ >>> for asset, state in states.items():
644
+ ... print(f"{asset}: {state}")
573
645
  """
574
646
 
575
647
  def path_to_asset(path: str) -> str:
576
648
  """Pull the asset name out of a path to that asset in the filesystem"""
577
649
  components = path.split("/")
578
- return components[
579
- components.index("asset") + 2
580
- ] # Look for asset in the path to find the name
650
+ return components[components.index("asset") + 2] # Look for asset in the path to find the name
581
651
 
582
652
  if not self._model.is_asset(Path(assets_dir).name):
583
653
  raise DerivaMLException("Directory does not have name of an asset table.")
584
654
  results = upload_directory(self._model, assets_dir)
585
655
  return {path_to_asset(p): r for p, r in results.items()}
586
656
 
587
- def upload_execution_outputs(
588
- self, clean_folder: bool = True
589
- ) -> dict[str, list[AssetFilePath]]:
590
- """Upload all the assets and metadata associated with the current execution.
657
+ def upload_execution_outputs(self, clean_folder: bool = True) -> dict[str, list[AssetFilePath]]:
658
+ """Uploads all outputs from the execution to the catalog.
591
659
 
592
- This will include any new assets, features, or table values.
660
+ Scans the execution's output directories for assets, features, and other results,
661
+ then uploads them to the catalog. Can optionally clean up the output folders
662
+ after successful upload.
593
663
 
594
664
  Args:
595
- clean_folder: bool: (Default value = True)
665
+ clean_folder: Whether to delete output folders after upload. Defaults to True.
596
666
 
597
667
  Returns:
598
- Results of the upload operation. Asset names are all relative to the execution upload directory.
599
- Uploaded assets with key as assets' suborder name, values as an
600
- ordered dictionary with RID and metadata in the Execution_Asset table.
668
+ dict[str, list[AssetFilePath]]: Mapping of asset types to their file paths.
669
+
670
+ Raises:
671
+ DerivaMLException: If upload fails or outputs are invalid.
672
+
673
+ Example:
674
+ >>> outputs = execution.upload_execution_outputs()
675
+ >>> for type_name, paths in outputs.items():
676
+ ... print(f"{type_name}: {len(paths)} files")
601
677
  """
602
678
  if self._dry_run:
603
679
  return {}
@@ -613,21 +689,40 @@ class Execution:
613
689
  raise e
614
690
 
615
691
  def _clean_folder_contents(self, folder_path: Path):
616
- """
692
+ """Clean up folder contents with Windows-compatible error handling.
617
693
 
618
694
  Args:
619
- folder_path: Path:
695
+ folder_path: Path to the folder to clean
620
696
  """
697
+ import time
698
+
699
+ MAX_RETRIES = 3
700
+ RETRY_DELAY = 1 # seconds
701
+
702
+ def remove_with_retry(path: Path, is_dir: bool = False) -> bool:
703
+ for attempt in range(MAX_RETRIES):
704
+ try:
705
+ if is_dir:
706
+ shutil.rmtree(path)
707
+ else:
708
+ Path(path).unlink()
709
+ return True
710
+ except (OSError, PermissionError) as e:
711
+ if attempt == MAX_RETRIES - 1:
712
+ self.update_status(Status.failed, format_exception(e))
713
+ return False
714
+ time.sleep(RETRY_DELAY)
715
+ return False
716
+
621
717
  try:
622
718
  with os.scandir(folder_path) as entries:
623
719
  for entry in entries:
624
720
  if entry.is_dir() and not entry.is_symlink():
625
- shutil.rmtree(entry.path)
721
+ remove_with_retry(Path(entry.path), is_dir=True)
626
722
  else:
627
- os.remove(entry.path)
723
+ remove_with_retry(Path(entry.path))
628
724
  except OSError as e:
629
- error = format_exception(e)
630
- self.update_status(Status.failed, error)
725
+ self.update_status(Status.failed, format_exception(e))
631
726
 
632
727
  def _update_feature_table(
633
728
  self,
@@ -642,28 +737,21 @@ class Execution:
642
737
  target_table: str:
643
738
  feature_name: str:
644
739
  feature_file: str | Path:
645
- uploaded_files: Dictionary whose key ia an asset name, file-name pair, and whose value is a filename, RID of that asset.
740
+ uploaded_files: Dictionary whose key is an asset name, file-name pair, and whose value is a filename,
741
+ RID of that asset.
646
742
  """
647
743
 
648
744
  # Get the column names of all the Feature columns that should be the RID of an asset
649
745
  asset_columns = [
650
- c.name
651
- for c in self._ml_object.feature_record_class(
652
- target_table, feature_name
653
- ).feature.asset_columns
746
+ c.name for c in self._ml_object.feature_record_class(target_table, feature_name).feature.asset_columns
654
747
  ]
655
748
 
656
749
  # Get the names of the columns in the feature that are assets.
657
750
  asset_columns = [
658
- c.name
659
- for c in self._ml_object.feature_record_class(
660
- target_table, feature_name
661
- ).feature.asset_columns
751
+ c.name for c in self._ml_object.feature_record_class(target_table, feature_name).feature.asset_columns
662
752
  ]
663
753
 
664
- feature_table = self._ml_object.feature_record_class(
665
- target_table, feature_name
666
- ).feature.feature_table.name
754
+ feature_table = self._ml_object.feature_record_class(target_table, feature_name).feature.feature_table.name
667
755
  asset_map = {
668
756
  (asset_table, asset.file_name): asset.asset_rid
669
757
  for asset_table, assets in uploaded_files.items()
@@ -677,41 +765,37 @@ class Execution:
677
765
  return e
678
766
 
679
767
  # Load the JSON file that has the set of records that contain the feature values.
680
- with open(feature_file, "r") as feature_values:
768
+ with Path(feature_file).open("r") as feature_values:
681
769
  entities = [json.loads(line.strip()) for line in feature_values]
682
770
  # Update the asset columns in the feature and add to the catalog.
683
- self._ml_object.domain_path.tables[feature_table].insert(
684
- [map_path(e) for e in entities], on_conflict_skip=True
685
- )
771
+ self._ml_object.domain_path.tables[feature_table].insert([map_path(e) for e in entities], on_conflict_skip=True)
686
772
 
687
773
  def _update_asset_execution_table(
688
774
  self,
689
775
  uploaded_assets: dict[str, list[AssetFilePath]],
690
776
  asset_role: str = "Output",
691
777
  ):
692
- """Add entry to association table connecting an asset to an execution RID
778
+ """Add entry to the association table connecting an asset to an execution RID
693
779
 
694
780
  Args:
695
- uploaded_assets: Dictionary whose key is the name of an asset table, and whose value is a list of RIDs for
781
+ uploaded_assets: Dictionary whose key is the name of an asset table and whose value is a list of RIDs for
696
782
  newly added assets to that table.
697
783
  asset_role: A term or list of terms from the Asset_Role vocabulary.
698
784
  """
699
- # Make sure the asset role is in the controlled vocabulary table.
785
+ # Make sure the asset role is in the controlled vocabulary table.
700
786
  self._ml_object.lookup_term(MLVocab.asset_role, asset_role)
701
787
 
702
788
  pb = self._ml_object.pathBuilder
703
789
  for asset_table, asset_list in uploaded_assets.items():
704
- asset_table_name = asset_table.split("/")[
705
- 1
706
- ] # Peel off the schema from the asset table
707
- asset_exe = self._model.find_association(asset_table_name, "Execution")
790
+ asset_table_name = asset_table.split("/")[1] # Peel off the schema from the asset table
791
+ asset_exe, asset_fk, execution_fk = self._model.find_association(asset_table_name, "Execution")
708
792
  asset_exe_path = pb.schemas[asset_exe.schema.name].tables[asset_exe.name]
709
793
 
710
794
  asset_exe_path.insert(
711
795
  [
712
796
  {
713
- asset_table_name: asset_path.asset_rid,
714
- "Execution": self.execution_rid,
797
+ asset_fk: asset_path.asset_rid,
798
+ execution_fk: self.execution_rid,
715
799
  "Asset_Role": asset_role,
716
800
  }
717
801
  for asset_path in asset_list
@@ -724,25 +808,20 @@ class Execution:
724
808
  if asset_role == "Input":
725
809
  return
726
810
  asset_type_map = {}
727
- with open(
811
+ with Path(
728
812
  asset_type_path(
729
813
  self._working_dir,
730
814
  self.execution_rid,
731
815
  self._model.name_to_table(asset_table_name),
732
- ),
733
- "r",
734
- ) as f:
735
- for line in f:
816
+ )
817
+ ).open("r") as asset_type_file:
818
+ for line in asset_type_file:
736
819
  asset_type_map.update(json.loads(line.strip()))
737
820
  for asset_path in asset_list:
738
821
  asset_path.asset_types = asset_type_map[asset_path.file_name]
739
822
 
740
- asset_asset_type = self._model.find_association(
741
- asset_table_name, "Asset_Type"
742
- )
743
- type_path = pb.schemas[asset_asset_type.schema.name].tables[
744
- asset_asset_type.name
745
- ]
823
+ asset_asset_type, _, _ = self._model.find_association(asset_table_name, "Asset_Type")
824
+ type_path = pb.schemas[asset_asset_type.schema.name].tables[asset_asset_type.name]
746
825
 
747
826
  type_path.insert(
748
827
  [
@@ -758,13 +837,13 @@ class Execution:
758
837
  self,
759
838
  asset_name: str,
760
839
  file_name: str | Path,
761
- asset_types: Optional[list[str] | str] = None,
840
+ asset_types: list[str] | str | None = None,
762
841
  copy_file=False,
763
842
  **kwargs,
764
843
  ) -> AssetFilePath:
765
844
  """Return a pathlib Path to the directory in which to place files for the specified execution_asset type.
766
845
 
767
- Given the name of an asset table, and a file name, register the file for upload, and return a path to that
846
+ Given the name of an asset table, and a file name, register the file for upload and return a path to that
768
847
  file in the upload directory. In addition to the filename, additional asset metadata and file asset types may
769
848
  be specified.
770
849
 
@@ -772,13 +851,13 @@ class Execution:
772
851
  to a new file with the specified name is returned. The caller can then open that file for writing.
773
852
 
774
853
  If the provided filename refers to an existing file and the copy_file argument is False (the default), then the
775
- returned path contains a symbolic link to that file. If the copy_file argument is True then the contents of
854
+ returned path contains a symbolic link to that file. If the copy_file argument is True, then the contents of
776
855
  file_name are copied into the target directory.
777
856
 
778
857
  Args:
779
858
  asset_name: Type of asset to be uploaded. Must be a term in Asset_Type controlled vocabulary.
780
859
  file_name: Name of file to be uploaded.
781
- asset_types: Type of asset to be uploaded. Defaults to name of the asset.
860
+ asset_types: Type of asset to be uploaded. Defaults to the name of the asset.
782
861
  **kwargs: Any additional metadata values that may be part of the asset table.
783
862
 
784
863
  Returns:
@@ -810,15 +889,17 @@ class Execution:
810
889
  if copy_file:
811
890
  asset_path.write_bytes(file_name.read_bytes())
812
891
  else:
813
- asset_path.symlink_to(file_name)
892
+ try:
893
+ asset_path.symlink_to(file_name)
894
+ except (OSError, PermissionError):
895
+ # Fallback to copy if symlink fails (common on Windows)
896
+ asset_path.write_bytes(file_name.read_bytes())
814
897
 
815
898
  # Persist the asset types into a file
816
- with open(
817
- asset_type_path(self._working_dir, self.execution_rid, asset_table),
818
- "a",
819
- encoding="utf-8",
820
- ) as f:
821
- f.write(json.dumps({file_name.name: asset_types}) + "\n")
899
+ with Path(
900
+ asset_type_path(self._working_dir, self.execution_rid, asset_table)
901
+ ).open("a") as asset_type_file:
902
+ asset_type_file.write(json.dumps({file_name.name: asset_types}) + "\n")
822
903
 
823
904
  return AssetFilePath(
824
905
  asset_path=asset_path,
@@ -838,26 +919,33 @@ class Execution:
838
919
  Pathlib path to the file in which to place table values.
839
920
  """
840
921
  if table not in self._model.schemas[self._ml_object.domain_schema].tables:
841
- raise DerivaMLException(
842
- "Table '{}' not found in domain schema".format(table)
843
- )
922
+ raise DerivaMLException("Table '{}' not found in domain schema".format(table))
844
923
 
845
- return table_path(
846
- self._working_dir, schema=self._ml_object.domain_schema, table=table
847
- )
924
+ return table_path(self._working_dir, schema=self._ml_object.domain_schema, table=table)
848
925
 
849
926
  def execute(self) -> Execution:
850
- """Initiate an execution with provided configuration. Can be used in a context manager."""
927
+ """Initiate an execution with the provided configuration. Can be used in a context manager."""
851
928
  self.execution_start()
852
929
  return self
853
930
 
854
931
  @validate_call
855
932
  def add_features(self, features: Iterable[FeatureRecord]) -> None:
856
- """Given a collection of Feature records, write out a CSV file in the appropriate assets directory so that this
857
- feature gets uploaded when the execution is complete.
933
+ """Adds feature records to the catalog.
934
+
935
+ Associates feature records with this execution and uploads them to the catalog.
936
+ Features represent measurable properties or characteristics of records.
937
+
938
+ NOTE: The catalog is not updated until upload_execution_outputs() is called.
858
939
 
859
940
  Args:
860
- features: Iterable of Feature records to write.
941
+ features: Feature records to add, each containing a value and metadata.
942
+
943
+ Raises:
944
+ DerivaMLException: If feature addition fails or features are invalid.
945
+
946
+ Example:
947
+ >>> feature = FeatureRecord(value="high", confidence=0.95)
948
+ >>> execution.add_features([feature])
861
949
  """
862
950
 
863
951
  # Make sure feature list is homogeneous:
@@ -878,7 +966,7 @@ class Execution:
878
966
  feature_name=feature.feature_name,
879
967
  exec_rid=self.execution_rid,
880
968
  )
881
- with open(json_path, "a", encoding="utf-8") as file:
969
+ with Path(json_path).open("a", encoding="utf-8") as file:
882
970
  for feature in features:
883
971
  feature.Execution = self.execution_rid
884
972
  file.write(json.dumps(feature.model_dump(mode="json")) + "\n")
@@ -888,7 +976,7 @@ class Execution:
888
976
  self,
889
977
  dataset_types: str | list[str],
890
978
  description: str,
891
- version: Optional[DatasetVersion] = None,
979
+ version: DatasetVersion | None = None,
892
980
  ) -> RID:
893
981
  """Create a new dataset with specified types.
894
982
 
@@ -900,14 +988,12 @@ class Execution:
900
988
  Returns:
901
989
  RID of the newly created dataset.
902
990
  """
903
- return self._ml_object.create_dataset(
904
- dataset_types, description, self.execution_rid, version=version
905
- )
991
+ return self._ml_object.create_dataset(dataset_types, description, self.execution_rid, version=version)
906
992
 
907
993
  def add_dataset_members(
908
994
  self,
909
995
  dataset_rid: RID,
910
- members: list[RID],
996
+ members: list[RID] | dict[str, list[RID]],
911
997
  validate: bool = True,
912
998
  description: str = "",
913
999
  ) -> None:
@@ -920,7 +1006,7 @@ class Execution:
920
1006
  been configured to be a dataset element type.
921
1007
 
922
1008
  Args:
923
- dataset_rid: RID of dataset_table to extend or None if new dataset_table is to be created.
1009
+ dataset_rid: RID of dataset_table to extend or None if a new dataset_table is to be created.
924
1010
  members: List of RIDs of members to add to the dataset_table. RID must be to a table type that is a
925
1011
  dataset element type (see DerivaML.add_dataset_element_type).
926
1012
  validate: Check rid_list to make sure elements are not already in the dataset_table.
@@ -943,7 +1029,7 @@ class Execution:
943
1029
  dataset_rid: RID to a dataset_table
944
1030
  component: Which version of the dataset_table to increment.
945
1031
  dataset_rid: RID of the dataset whose version is to be incremented.
946
- component: Major, Minor or Patch
1032
+ component: Major, Minor, or Patch
947
1033
  description: Description of the version update of the dataset_table.
948
1034
 
949
1035
  Returns:
@@ -963,13 +1049,42 @@ class Execution:
963
1049
  def add_files(
964
1050
  self,
965
1051
  files: Iterable[FileSpec],
966
- file_types: str | list[str],
967
- ) -> Iterable[RID]:
968
- """Add files to the file table"""
1052
+ dataset_types: str | list[str] | None = None,
1053
+ description: str = "",
1054
+ ) -> RID:
1055
+ """Adds files to the catalog with their metadata.
1056
+
1057
+ Registers files in the catalog along with their metadata (MD5, length, URL) and associates them with
1058
+ specified file types.
1059
+
1060
+ Args:
1061
+ files: File specifications containing MD5 checksum, length, and URL.
1062
+ dataset_types: One or more dataset type terms from File_Type vocabulary.
1063
+ description: Description of the files.
1064
+
1065
+ Returns:
1066
+ RID: Dataset RID that identifes newly added files. Will be nested to mirror origioanl directory structure
1067
+ of the files.
1068
+
1069
+ Raises:
1070
+ DerivaMLInvalidTerm: If file_types are invalid or execution_rid is not an execution record.
1071
+
1072
+ Examples:
1073
+ Add a single file type:
1074
+ >>> files = [FileSpec(url="path/to/file.txt", md5="abc123", length=1000)]
1075
+ >>> rids = exe.add_files(files, file_types="text")
1076
+
1077
+ Add multiple file types:
1078
+ >>> rids = exe.add_files(
1079
+ ... files=[FileSpec(url="image.png", md5="def456", length=2000)],
1080
+ ... file_types=["image", "png"],
1081
+ ... )
1082
+ """
969
1083
  return self._ml_object.add_files(
970
1084
  files=files,
971
- file_types=file_types,
1085
+ dataset_types=dataset_types,
972
1086
  execution_rid=self.execution_rid,
1087
+ description=description,
973
1088
  )
974
1089
 
975
1090
  def __str__(self):
@@ -1015,7 +1130,5 @@ class Execution:
1015
1130
  Status.failed,
1016
1131
  f"Exception type: {exc_type}, Exception value: {exc_value}",
1017
1132
  )
1018
- logging.error(
1019
- f"Exception type: {exc_type}, Exception value: {exc_value}, Exception traceback: {exc_tb}"
1020
- )
1133
+ logging.error(f"Exception type: {exc_type}, Exception value: {exc_value}, Exception traceback: {exc_tb}")
1021
1134
  return False