deriva-ml 1.17.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. deriva_ml/.DS_Store +0 -0
  2. deriva_ml/__init__.py +79 -0
  3. deriva_ml/bump_version.py +142 -0
  4. deriva_ml/core/__init__.py +39 -0
  5. deriva_ml/core/base.py +1527 -0
  6. deriva_ml/core/config.py +69 -0
  7. deriva_ml/core/constants.py +36 -0
  8. deriva_ml/core/definitions.py +74 -0
  9. deriva_ml/core/enums.py +222 -0
  10. deriva_ml/core/ermrest.py +288 -0
  11. deriva_ml/core/exceptions.py +28 -0
  12. deriva_ml/core/filespec.py +116 -0
  13. deriva_ml/dataset/__init__.py +12 -0
  14. deriva_ml/dataset/aux_classes.py +225 -0
  15. deriva_ml/dataset/dataset.py +1519 -0
  16. deriva_ml/dataset/dataset_bag.py +450 -0
  17. deriva_ml/dataset/history.py +109 -0
  18. deriva_ml/dataset/upload.py +439 -0
  19. deriva_ml/demo_catalog.py +495 -0
  20. deriva_ml/execution/__init__.py +26 -0
  21. deriva_ml/execution/environment.py +290 -0
  22. deriva_ml/execution/execution.py +1180 -0
  23. deriva_ml/execution/execution_configuration.py +147 -0
  24. deriva_ml/execution/workflow.py +413 -0
  25. deriva_ml/feature.py +228 -0
  26. deriva_ml/install_kernel.py +71 -0
  27. deriva_ml/model/__init__.py +0 -0
  28. deriva_ml/model/catalog.py +485 -0
  29. deriva_ml/model/database.py +719 -0
  30. deriva_ml/protocols/dataset.py +19 -0
  31. deriva_ml/run_notebook.py +228 -0
  32. deriva_ml/schema/__init__.py +3 -0
  33. deriva_ml/schema/annotations.py +473 -0
  34. deriva_ml/schema/check_schema.py +104 -0
  35. deriva_ml/schema/create_schema.py +393 -0
  36. deriva_ml/schema/deriva-ml-reference.json +8525 -0
  37. deriva_ml/schema/policy.json +81 -0
  38. deriva_ml/schema/table_comments_utils.py +57 -0
  39. deriva_ml/test.py +94 -0
  40. deriva_ml-1.17.10.dist-info/METADATA +38 -0
  41. deriva_ml-1.17.10.dist-info/RECORD +45 -0
  42. deriva_ml-1.17.10.dist-info/WHEEL +5 -0
  43. deriva_ml-1.17.10.dist-info/entry_points.txt +9 -0
  44. deriva_ml-1.17.10.dist-info/licenses/LICENSE +201 -0
  45. deriva_ml-1.17.10.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1180 @@
1
+ """Execution management for DerivaML.
2
+
3
+ This module provides functionality for managing and tracking executions in DerivaML. An execution
4
+ represents a computational or manual process that operates on datasets and produces outputs.
5
+ The module includes:
6
+
7
+ - Execution class: Core class for managing execution state and context
8
+ - Asset management: Track input and output files
9
+ - Status tracking: Monitor and update execution progress
10
+ - Dataset handling: Download and materialize required datasets
11
+ - Provenance tracking: Record relationships between inputs, processes, and outputs
12
+
13
+ The Execution class serves as the primary interface for managing the lifecycle of a computational
14
+ or manual process within DerivaML.
15
+
16
+ Typical usage example:
17
+ >>> config = ExecutionConfiguration(workflow="analysis_workflow", description="Data analysis")
18
+ >>> with ml.create_execution(config) as execution:
19
+ ... execution.download_dataset_bag(dataset_spec)
20
+ ... # Run analysis
21
+ ... execution.upload_execution_outputs()
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import json
27
+ import logging
28
+ import os
29
+ import shutil
30
+ import sys
31
+ from collections import defaultdict
32
+ from datetime import datetime
33
+ from pathlib import Path
34
+ from typing import Any, Iterable, List
35
+
36
+ from deriva.core import format_exception
37
+ from deriva.core.hatrac_store import HatracStore
38
+ from pydantic import ConfigDict, validate_call
39
+
40
+ from deriva_ml.core.base import DerivaML
41
+ from deriva_ml.core.definitions import (
42
+ DRY_RUN_RID,
43
+ RID,
44
+ ExecMetadataType,
45
+ FileSpec,
46
+ FileUploadState,
47
+ MLAsset,
48
+ MLVocab,
49
+ Status,
50
+ )
51
+ from deriva_ml.core.exceptions import DerivaMLException
52
+ from deriva_ml.dataset.aux_classes import DatasetSpec, DatasetVersion, VersionPart
53
+ from deriva_ml.dataset.dataset_bag import DatasetBag
54
+ from deriva_ml.dataset.upload import (
55
+ asset_file_path,
56
+ asset_root,
57
+ asset_type_path,
58
+ execution_root,
59
+ feature_root,
60
+ feature_value_path,
61
+ is_feature_dir,
62
+ normalize_asset_dir,
63
+ table_path,
64
+ upload_directory,
65
+ )
66
+ from deriva_ml.execution.environment import get_execution_environment
67
+ from deriva_ml.execution.execution_configuration import ExecutionConfiguration
68
+ from deriva_ml.execution.workflow import Workflow
69
+ from deriva_ml.feature import FeatureRecord
70
+
71
+ # Keep pycharm from complaining about undefined references in docstrings.
72
+ execution: Execution
73
+ ml: DerivaML
74
+ dataset_spec: DatasetSpec
75
+
76
+ try:
77
+ from icecream import ic
78
+ except ImportError: # Graceful fallback if IceCream isn't installed.
79
+ ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
80
+
81
+
82
+ try:
83
+ from IPython.display import Markdown, display
84
+ except ImportError:
85
+
86
+ def display(s):
87
+ print(s)
88
+
89
+ def Markdown(s):
90
+ return s
91
+
92
+
93
+ # Platform-specific base class
94
+ if sys.version_info >= (3, 12):
95
+
96
+ class AssetFilePath(Path):
97
+ """Extended Path class for managing asset files.
98
+
99
+ Represents a file path with additional metadata about its role as an asset in the catalog.
100
+ This class extends the standard Path class to include information about the asset's
101
+ catalog representation and type.
102
+
103
+ Attributes:
104
+ asset_name (str): Name of the asset in the catalog (e.g., asset table name).
105
+ file_name (str): Name of the local file containing the asset.
106
+ asset_metadata (dict[str, Any]): Additional columns beyond URL, Length, and checksum.
107
+ asset_types (list[str]): Terms from the Asset_Type controlled vocabulary.
108
+ asset_rid (RID | None): Resource Identifier if uploaded to an asset table.
109
+
110
+ Example:
111
+ >>> path = AssetFilePath(
112
+ ... "/path/to/file.txt",
113
+ ... asset_name="analysis_output",
114
+ ... file_name="results.txt",
115
+ ... asset_metadata={"version": "1.0"},
116
+ ... asset_types=["text", "results"]
117
+ ... )
118
+ """
119
+
120
+ def __init__(
121
+ self,
122
+ asset_path: str | Path,
123
+ asset_name: str,
124
+ file_name: str,
125
+ asset_metadata: dict[str, Any],
126
+ asset_types: list[str] | str,
127
+ asset_rid: RID | None = None,
128
+ ):
129
+ """Initializes an AssetFilePath instance.
130
+
131
+ Args:
132
+ asset_path: Local path to the asset file.
133
+ asset_name: Name of the asset in the catalog.
134
+ file_name: Name of the local file.
135
+ asset_metadata: Additional metadata columns.
136
+ asset_types: One or more asset type terms.
137
+ asset_rid: Optional Resource Identifier if already in catalog.
138
+ """
139
+ super().__init__(asset_path)
140
+ self.asset_name = asset_name
141
+ self.file_name = file_name
142
+ self.asset_metadata = asset_metadata
143
+ self.asset_types = asset_types if isinstance(asset_types, list) else [asset_types]
144
+ self.asset_rid = asset_rid
145
+ else:
146
+
147
+ class AssetFilePath(type(Path())):
148
+ """
149
+ Create a new Path object that has additional information related to the use of this path as an asset.
150
+
151
+ Attrubytes:
152
+ asset_path: Local path to the location of the asset.
153
+ asset_name: The name of the asset in the catalog (e.g., the asset table name).
154
+ file_name: Name of the local file that contains the contents of the asset.
155
+ asset_metadata: Any additional columns associated with this asset beyond the URL, Length, and checksum.
156
+ asset_types: A list of terms from the Asset_Type controlled vocabulary.
157
+ asset_rid: The RID of the asset if it has been uploaded into an asset table
158
+ """
159
+
160
+ def __new__(
161
+ cls,
162
+ asset_path: str | Path,
163
+ asset_name: str,
164
+ file_name: str,
165
+ asset_metadata: dict[str, Any],
166
+ asset_types: list[str] | str,
167
+ asset_rid: RID | None = None,
168
+ ):
169
+ # Only pass the path to the base Path class
170
+ obj = super().__new__(cls, asset_path)
171
+ obj.asset_name = asset_name
172
+ obj.file_name = file_name
173
+ obj.asset_metadata = asset_metadata
174
+ obj.asset_types = asset_types if isinstance(asset_types, list) else [asset_types]
175
+ obj.asset_rid = asset_rid
176
+ return obj
177
+
178
+
179
+ class Execution:
180
+ """Manages the lifecycle and context of a DerivaML execution.
181
+
182
+ An Execution represents a computational or manual process within DerivaML. It provides:
183
+ - Dataset materialization and access
184
+ - Asset management (inputs and outputs)
185
+ - Status tracking and updates
186
+ - Provenance recording
187
+ - Result upload and cataloging
188
+
189
+ The class handles downloading required datasets and assets, tracking execution state,
190
+ and managing the upload of results. Every dataset and file generated is associated
191
+ with an execution record for provenance tracking.
192
+
193
+ Attributes:
194
+ dataset_rids (list[RID]): RIDs of datasets used in the execution.
195
+ datasets (list[DatasetBag]): Materialized dataset objects.
196
+ configuration (ExecutionConfiguration): Execution settings and parameters.
197
+ workflow_rid (RID): RID of the associated workflow.
198
+ status (Status): Current execution status.
199
+ asset_paths (list[AssetFilePath]): Paths to execution assets.
200
+ start_time (datetime | None): When execution started.
201
+ stop_time (datetime | None): When execution completed.
202
+
203
+ Example:
204
+ >>> config = ExecutionConfiguration(
205
+ ... workflow="analysis",
206
+ ... description="Process samples",
207
+ ... )
208
+ >>> with ml.create_execution(config) as execution:
209
+ ... execution.download_dataset_bag(dataset_spec)
210
+ ... # Run analysis
211
+ ... execution.upload_execution_outputs()
212
+ """
213
+
214
+ @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
215
+ def __init__(
216
+ self,
217
+ configuration: ExecutionConfiguration,
218
+ ml_object: DerivaML,
219
+ workflow: Workflow | RID | None = None,
220
+ reload: RID | None = None,
221
+ dry_run: bool = False,
222
+ ):
223
+ """Initializes an Execution instance.
224
+
225
+ Creates a new execution or reloads an existing one. Initializes the execution
226
+ environment, downloads required datasets, and sets up asset tracking.
227
+
228
+ Args:
229
+ configuration: Settings and parameters for the execution.
230
+ ml_object: DerivaML instance managing the execution.
231
+ workflow: Optional workflow RID or Workflow object. If not specified, the workflow RID is taken from
232
+ the ExecutionConfiguration object
233
+ reload: Optional RID of existing execution to reload.
234
+ dry_run: If True, don't create catalog records or upload results.
235
+
236
+ Raises:
237
+ DerivaMLException: If initialization fails or configuration is invalid.
238
+ """
239
+
240
+ self.asset_paths: dict[str, list[AssetFilePath]] = {}
241
+ self.configuration = configuration
242
+ self._ml_object = ml_object
243
+ self._model = ml_object.model
244
+ self._logger = ml_object._logger
245
+ self.start_time = None
246
+ self.stop_time = None
247
+ self.status = Status.created
248
+ self.uploaded_assets: dict[str, list[AssetFilePath]] | None = None
249
+ self.configuration.argv = sys.argv
250
+
251
+ self.dataset_rids: List[RID] = []
252
+ self.datasets: list[DatasetBag] = []
253
+
254
+ self._working_dir = self._ml_object.working_dir
255
+ self._cache_dir = self._ml_object.cache_dir
256
+ self._dry_run = dry_run
257
+
258
+ # Make sure we have a good workflow.
259
+ if workflow:
260
+ self.configuration.workflow = workflow
261
+ if isinstance(self.configuration.workflow, Workflow):
262
+ self._ml_object.lookup_term(MLVocab.workflow_type, configuration.workflow.workflow_type)
263
+ self.workflow_rid = (
264
+ self._ml_object.add_workflow(self.configuration.workflow) if not self._dry_run else DRY_RUN_RID
265
+ )
266
+ else:
267
+ self.workflow_rid = self.configuration.workflow
268
+ if self._ml_object.resolve_rid(configuration.workflow).table.name != "Workflow":
269
+ raise DerivaMLException("Workflow specified in execution configuration is not a Workflow")
270
+
271
+ # Validate the datasets and assets to be valid.
272
+ for d in self.configuration.datasets:
273
+ if self._ml_object.resolve_rid(d.rid).table.name != "Dataset":
274
+ raise DerivaMLException("Dataset specified in execution configuration is not a dataset")
275
+
276
+ for a in self.configuration.assets:
277
+ if not self._model.is_asset(self._ml_object.resolve_rid(a).table.name):
278
+ raise DerivaMLException("Asset specified in execution configuration is not a asset table")
279
+
280
+ schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
281
+ if reload:
282
+ self.execution_rid = reload
283
+ if self.execution_rid == DRY_RUN_RID:
284
+ self._dry_run = True
285
+ elif self._dry_run:
286
+ self.execution_rid = DRY_RUN_RID
287
+ else:
288
+ self.execution_rid = schema_path.Execution.insert(
289
+ [
290
+ {
291
+ "Description": self.configuration.description,
292
+ "Workflow": self.workflow_rid,
293
+ }
294
+ ]
295
+ )[0]["RID"]
296
+
297
+ if rid_path := os.environ.get("DERIVA_ML_SAVE_EXECUTION_RID", None):
298
+ # Put execution_rid into the provided file path so we can find it later.
299
+ with Path(rid_path).open("w") as f:
300
+ json.dump(
301
+ {
302
+ "hostname": self._ml_object.host_name,
303
+ "catalog_id": self._ml_object.catalog_id,
304
+ "workflow_rid": self.workflow_rid,
305
+ "execution_rid": self.execution_rid,
306
+ },
307
+ f,
308
+ )
309
+
310
+ # Create a directory for execution rid so we can recover the state in case of a crash.
311
+ execution_root(prefix=self._ml_object.working_dir, exec_rid=self.execution_rid)
312
+ self._initialize_execution(reload)
313
+
314
+ def _save_runtime_environment(self):
315
+ runtime_env_path = self.asset_file_path(
316
+ asset_name="Execution_Metadata",
317
+ file_name=f"environment_snapshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
318
+ asset_types=ExecMetadataType.runtime_env.value,
319
+ )
320
+ with Path(runtime_env_path).open("w") as fp:
321
+ json.dump(get_execution_environment(), fp)
322
+
323
+ def _upload_hydra_config_assets(self):
324
+ """Upload hydra assets to the catalog."""
325
+ hydra_runtime_output_dir = self._ml_object.hydra_runtime_output_dir
326
+ if hydra_runtime_output_dir:
327
+ timestamp = hydra_runtime_output_dir.parts[-1]
328
+ for hydra_asset in hydra_runtime_output_dir.rglob("*"):
329
+ if hydra_asset.is_dir():
330
+ continue
331
+ asset = self.asset_file_path(
332
+ asset_name=MLAsset.execution_metadata,
333
+ file_name=hydra_runtime_output_dir / hydra_asset,
334
+ rename_file=f"hydra-{timestamp}-{hydra_asset.name}",
335
+ asset_types=ExecMetadataType.execution_config.value,
336
+ )
337
+
338
+ def _initialize_execution(self, reload: RID | None = None) -> None:
339
+ """Initialize the execution by a configuration in the Execution_Metadata table.
340
+ Set up a working directory and download all the assets and data.
341
+
342
+ :raise DerivaMLException: If there is an issue initializing the execution.
343
+
344
+ Args:
345
+ reload: RID of previously initialized execution.
346
+
347
+ Returns:
348
+
349
+ """
350
+ # Materialize bdbag
351
+ for dataset in self.configuration.datasets:
352
+ self.update_status(Status.initializing, f"Materialize bag {dataset.rid}... ")
353
+ self.datasets.append(self.download_dataset_bag(dataset))
354
+ self.dataset_rids.append(dataset.rid)
355
+
356
+ # Update execution info
357
+ schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
358
+ if self.dataset_rids and not (reload or self._dry_run):
359
+ schema_path.Dataset_Execution.insert(
360
+ [{"Dataset": d, "Execution": self.execution_rid} for d in self.dataset_rids]
361
+ )
362
+
363
+ # Download assets....
364
+ self.update_status(Status.running, "Downloading assets ...")
365
+ self.asset_paths = {}
366
+ for asset_rid in self.configuration.assets:
367
+ asset_table = self._ml_object.resolve_rid(asset_rid).table.name
368
+ dest_dir = (
369
+ execution_root(self._ml_object.working_dir, self.execution_rid) / "downloaded-assets" / asset_table
370
+ )
371
+ dest_dir.mkdir(parents=True, exist_ok=True)
372
+ self.asset_paths.setdefault(asset_table, []).append(
373
+ self.download_asset(
374
+ asset_rid=asset_rid,
375
+ dest_dir=dest_dir,
376
+ update_catalog=not (reload or self._dry_run),
377
+ )
378
+ )
379
+
380
+ # Save configuration details for later upload
381
+ if not reload:
382
+ cfile = self.asset_file_path(
383
+ asset_name=MLAsset.execution_metadata,
384
+ file_name="configuration.json",
385
+ asset_types=ExecMetadataType.execution_config.value,
386
+ )
387
+
388
+ with Path(cfile).open("w", encoding="utf-8") as config_file:
389
+ json.dump(self.configuration.model_dump(mode="json"), config_file)
390
+ lock_file = Path(self.configuration.workflow.git_root) / "uv.lock"
391
+ if lock_file.exists():
392
+ _ = self.asset_file_path(
393
+ asset_name=MLAsset.execution_metadata,
394
+ file_name=lock_file,
395
+ asset_types=ExecMetadataType.execution_config.value,
396
+ )
397
+
398
+ self._upload_hydra_config_assets()
399
+
400
+ # save runtime env
401
+ self._save_runtime_environment()
402
+
403
+ # Now upload the files so we have the info in case the execution fails.
404
+ self.uploaded_assets = self._upload_execution_dirs()
405
+ self.start_time = datetime.now()
406
+ self.update_status(Status.pending, "Initialize status finished.")
407
+
408
+ @property
409
+ def working_dir(self) -> Path:
410
+ """Return the working directory for the execution."""
411
+ return self._execution_root
412
+
413
+ @property
414
+ def _execution_root(self) -> Path:
415
+ """
416
+
417
+ Args:
418
+
419
+ Returns:
420
+ :return:
421
+
422
+ """
423
+ return execution_root(self._working_dir, self.execution_rid)
424
+
425
+ @property
426
+ def _feature_root(self) -> Path:
427
+ """The root path to all execution-specific files.
428
+ :return:
429
+
430
+ Args:
431
+
432
+ Returns:
433
+
434
+ """
435
+ return feature_root(self._working_dir, self.execution_rid)
436
+
437
+ @property
438
+ def _asset_root(self) -> Path:
439
+ """The root path to all execution-specific files.
440
+ :return:
441
+
442
+ Args:
443
+
444
+ Returns:
445
+
446
+ """
447
+ return asset_root(self._working_dir, self.execution_rid)
448
+
449
+ @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
450
+ def download_dataset_bag(self, dataset: DatasetSpec) -> DatasetBag:
451
+ """Downloads and materializes a dataset for use in the execution.
452
+
453
+ Downloads the specified dataset as a BDBag and materializes it in the execution's
454
+ working directory. The dataset version is determined by the DatasetSpec.
455
+
456
+ Args:
457
+ dataset: Specification of the dataset to download, including version and
458
+ materialization options.
459
+
460
+ Returns:
461
+ DatasetBag: Object containing:
462
+ - path: Local filesystem path to downloaded dataset
463
+ - rid: Dataset's Resource Identifier
464
+ - minid: Dataset's Minimal Viable Identifier
465
+
466
+ Raises:
467
+ DerivaMLException: If download or materialization fails.
468
+
469
+ Example:
470
+ >>> spec = DatasetSpec(rid="1-abc123", version="1.2.0")
471
+ >>> bag = execution.download_dataset_bag(spec)
472
+ >>> print(f"Downloaded to {bag.path}")
473
+ """
474
+ return self._ml_object.download_dataset_bag(dataset, execution_rid=self.execution_rid)
475
+
476
+ @validate_call
477
+ def update_status(self, status: Status, msg: str) -> None:
478
+ """Updates the execution's status in the catalog.
479
+
480
+ Records a new status and associated message in the catalog, allowing remote
481
+ tracking of execution progress.
482
+
483
+ Args:
484
+ status: New status value (e.g., running, completed, failed).
485
+ msg: Description of the status change or current state.
486
+
487
+ Raises:
488
+ DerivaMLException: If status update fails.
489
+
490
+ Example:
491
+ >>> execution.update_status(Status.running, "Processing sample 1 of 10")
492
+ """
493
+ self.status = status
494
+ self._logger.info(msg)
495
+
496
+ if self._dry_run:
497
+ return
498
+
499
+ self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema].Execution.update(
500
+ [
501
+ {
502
+ "RID": self.execution_rid,
503
+ "Status": self.status.value,
504
+ "Status_Detail": msg,
505
+ }
506
+ ]
507
+ )
508
+
509
+ def execution_start(self) -> None:
510
+ """Marks the execution as started.
511
+
512
+ Records the start time and updates the execution's status to 'running'.
513
+ This should be called before beginning the main execution work.
514
+
515
+ Example:
516
+ >>> execution.execution_start()
517
+ >>> try:
518
+ ... # Run analysis
519
+ ... execution.execution_stop()
520
+ ... except Exception:
521
+ ... execution.update_status(Status.failed, "Analysis error")
522
+ """
523
+ self.start_time = datetime.now()
524
+ self.uploaded_assets = None
525
+ self.update_status(Status.initializing, "Start execution ...")
526
+
527
+ def execution_stop(self) -> None:
528
+ """Marks the execution as completed.
529
+
530
+ Records the stop time and updates the execution's status to 'completed'.
531
+ This should be called after all execution work is finished.
532
+
533
+ Example:
534
+ >>> try:
535
+ ... # Run analysis
536
+ ... execution.execution_stop()
537
+ ... except Exception:
538
+ ... execution.update_status(Status.failed, "Analysis error")
539
+ """
540
+ self.stop_time = datetime.now()
541
+ duration = self.stop_time - self.start_time
542
+ hours, remainder = divmod(duration.total_seconds(), 3600)
543
+ minutes, seconds = divmod(remainder, 60)
544
+ duration = f"{round(hours, 0)}H {round(minutes, 0)}min {round(seconds, 4)}sec"
545
+
546
+ self.update_status(Status.completed, "Algorithm execution ended.")
547
+ if not self._dry_run:
548
+ self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema].Execution.update(
549
+ [{"RID": self.execution_rid, "Duration": duration}]
550
+ )
551
+
552
+ def _upload_execution_dirs(self) -> dict[str, list[AssetFilePath]]:
553
+ """Upload execution assets at _working_dir/Execution_asset.
554
+
555
+ This routine uploads the contents of the
556
+ Execution_Asset directory and then updates the execution_asset table in the ML schema to have references
557
+ to these newly uploaded files.
558
+
559
+ Returns:
560
+ dict: Results of the upload operation.
561
+
562
+ Raises:
563
+ DerivaMLException: If there is an issue when uploading the assets.
564
+ """
565
+
566
+ try:
567
+ self.update_status(Status.running, "Uploading execution files...")
568
+ results = upload_directory(self._model, self._asset_root)
569
+ except RuntimeError as e:
570
+ error = format_exception(e)
571
+ self.update_status(Status.failed, error)
572
+ raise DerivaMLException(f"Fail to upload execution_assets. Error: {error}")
573
+
574
+ asset_map = {}
575
+ for path, status in results.items():
576
+ asset_table, file_name = normalize_asset_dir(path)
577
+
578
+ asset_map.setdefault(asset_table, []).append(
579
+ AssetFilePath(
580
+ asset_path=path,
581
+ asset_name=asset_table,
582
+ file_name=file_name,
583
+ asset_metadata={
584
+ k: v
585
+ for k, v in status.result.items()
586
+ if k in self._model.asset_metadata(asset_table.split("/")[1])
587
+ },
588
+ asset_types=[],
589
+ asset_rid=status.result["RID"],
590
+ )
591
+ )
592
+ self._update_asset_execution_table(asset_map)
593
+ self.update_status(Status.running, "Updating features...")
594
+
595
+ for p in self._feature_root.glob("**/*.jsonl"):
596
+ m = is_feature_dir(p.parent)
597
+ self._update_feature_table(
598
+ target_table=m["target_table"],
599
+ feature_name=m["feature_name"],
600
+ feature_file=p,
601
+ uploaded_files=asset_map,
602
+ )
603
+
604
+ self.update_status(Status.running, "Upload assets complete")
605
+ return asset_map
606
+
607
+ @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
608
+ def download_asset(self, asset_rid: RID, dest_dir: Path, update_catalog=True) -> AssetFilePath:
609
+ """Download an asset from a URL and place it in a local directory.
610
+
611
+ Args:
612
+ asset_rid: RID of the asset.
613
+ dest_dir: Destination directory for the asset.
614
+ update_catalog: Whether to update the catalog execution information after downloading.
615
+
616
+ Returns:
617
+ A tuple with the name of the asset table and a Path object to the downloaded asset.
618
+ """
619
+
620
+ asset_table = self._ml_object.resolve_rid(asset_rid).table
621
+ if not self._model.is_asset(asset_table):
622
+ raise DerivaMLException(f"RID {asset_rid} is not for an asset table.")
623
+
624
+ asset_record = self._ml_object.retrieve_rid(asset_rid)
625
+ asset_metadata = {k: v for k, v in asset_record.items() if k in self._model.asset_metadata(asset_table)}
626
+ asset_url = asset_record["URL"]
627
+ asset_filename = dest_dir / asset_record["Filename"]
628
+ hs = HatracStore("https", self._ml_object.host_name, self._ml_object.credential)
629
+ hs.get_obj(path=asset_url, destfilename=asset_filename.as_posix())
630
+
631
+ asset_type_table, _col_l, _col_r = self._model.find_association(asset_table, MLVocab.asset_type)
632
+ type_path = self._ml_object.pathBuilder.schemas[asset_type_table.schema.name].tables[asset_type_table.name]
633
+ asset_types = [
634
+ asset_type[MLVocab.asset_type.value]
635
+ for asset_type in type_path.filter(type_path.columns[asset_table.name] == asset_rid)
636
+ .attributes(type_path.Asset_Type)
637
+ .fetch()
638
+ ]
639
+
640
+ asset_path = AssetFilePath(
641
+ file_name=asset_filename,
642
+ asset_rid=asset_rid,
643
+ asset_path=asset_filename,
644
+ asset_metadata=asset_metadata,
645
+ asset_name=asset_table.name,
646
+ asset_types=asset_types,
647
+ )
648
+
649
+ if update_catalog:
650
+ self._update_asset_execution_table(
651
+ {f"{asset_table.schema.name}/{asset_table.name}": [asset_path]},
652
+ asset_role="Input",
653
+ )
654
+ return asset_path
655
+
656
+ @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
657
+ def upload_assets(
658
+ self,
659
+ assets_dir: str | Path,
660
+ ) -> dict[Any, FileUploadState] | None:
661
+ """Uploads assets from a directory to the catalog.
662
+
663
+ Scans the specified directory for assets and uploads them to the catalog,
664
+ recording their metadata and types. Assets are organized by their types
665
+ and associated with the execution.
666
+
667
+ Args:
668
+ assets_dir: Directory containing assets to upload.
669
+
670
+ Returns:
671
+ dict[Any, FileUploadState] | None: Mapping of assets to their upload states,
672
+ or None if no assets were found.
673
+
674
+ Raises:
675
+ DerivaMLException: If upload fails or assets are invalid.
676
+
677
+ Example:
678
+ >>> states = execution.upload_assets("output/results")
679
+ >>> for asset, state in states.items():
680
+ ... print(f"{asset}: {state}")
681
+ """
682
+
683
+ def path_to_asset(path: str) -> str:
684
+ """Pull the asset name out of a path to that asset in the filesystem"""
685
+ components = path.split("/")
686
+ return components[components.index("asset") + 2] # Look for asset in the path to find the name
687
+
688
+ if not self._model.is_asset(Path(assets_dir).name):
689
+ raise DerivaMLException("Directory does not have name of an asset table.")
690
+ results = upload_directory(self._model, assets_dir)
691
+ return {path_to_asset(p): r for p, r in results.items()}
692
+
693
+ def upload_execution_outputs(self, clean_folder: bool = True) -> dict[str, list[AssetFilePath]]:
694
+ """Uploads all outputs from the execution to the catalog.
695
+
696
+ Scans the execution's output directories for assets, features, and other results,
697
+ then uploads them to the catalog. Can optionally clean up the output folders
698
+ after successful upload.
699
+
700
+ Args:
701
+ clean_folder: Whether to delete output folders after upload. Defaults to True.
702
+
703
+ Returns:
704
+ dict[str, list[AssetFilePath]]: Mapping of asset types to their file paths.
705
+
706
+ Raises:
707
+ DerivaMLException: If upload fails or outputs are invalid.
708
+
709
+ Example:
710
+ >>> outputs = execution.upload_execution_outputs()
711
+ >>> for type_name, paths in outputs.items():
712
+ ... print(f"{type_name}: {len(paths)} files")
713
+ """
714
+ if self._dry_run:
715
+ return {}
716
+ try:
717
+ self.uploaded_assets = self._upload_execution_dirs()
718
+ self.update_status(Status.completed, "Successfully end the execution.")
719
+ if clean_folder:
720
+ self._clean_folder_contents(self._execution_root)
721
+ return self.uploaded_assets
722
+ except Exception as e:
723
+ error = format_exception(e)
724
+ self.update_status(Status.failed, error)
725
+ raise e
726
+
727
+ def _clean_folder_contents(self, folder_path: Path):
728
+ """Clean up folder contents with Windows-compatible error handling.
729
+
730
+ Args:
731
+ folder_path: Path to the folder to clean
732
+ """
733
+ import time
734
+
735
+ MAX_RETRIES = 3
736
+ RETRY_DELAY = 1 # seconds
737
+
738
+ def remove_with_retry(path: Path, is_dir: bool = False) -> bool:
739
+ for attempt in range(MAX_RETRIES):
740
+ try:
741
+ if is_dir:
742
+ shutil.rmtree(path)
743
+ else:
744
+ Path(path).unlink()
745
+ return True
746
+ except (OSError, PermissionError) as e:
747
+ if attempt == MAX_RETRIES - 1:
748
+ self.update_status(Status.failed, format_exception(e))
749
+ return False
750
+ time.sleep(RETRY_DELAY)
751
+ return False
752
+
753
+ try:
754
+ with os.scandir(folder_path) as entries:
755
+ for entry in entries:
756
+ if entry.is_dir() and not entry.is_symlink():
757
+ remove_with_retry(Path(entry.path), is_dir=True)
758
+ else:
759
+ remove_with_retry(Path(entry.path))
760
+ except OSError as e:
761
+ self.update_status(Status.failed, format_exception(e))
762
+
763
+ def _update_feature_table(
764
+ self,
765
+ target_table: str,
766
+ feature_name: str,
767
+ feature_file: str | Path,
768
+ uploaded_files: dict[str, list[AssetFilePath]],
769
+ ) -> None:
770
+ """
771
+
772
+ Args:
773
+ target_table: str:
774
+ feature_name: str:
775
+ feature_file: str | Path:
776
+ uploaded_files: Dictionary whose key is an asset name, file-name pair, and whose value is a filename,
777
+ RID of that asset.
778
+ """
779
+
780
+ # Get the column names of all the Feature columns that should be the RID of an asset
781
+ asset_columns = [
782
+ c.name for c in self._ml_object.feature_record_class(target_table, feature_name).feature.asset_columns
783
+ ]
784
+
785
+ # Get the names of the columns in the feature that are assets.
786
+ asset_columns = [
787
+ c.name for c in self._ml_object.feature_record_class(target_table, feature_name).feature.asset_columns
788
+ ]
789
+
790
+ feature_table = self._ml_object.feature_record_class(target_table, feature_name).feature.feature_table.name
791
+ asset_map = {
792
+ (asset_table, asset.file_name): asset.asset_rid
793
+ for asset_table, assets in uploaded_files.items()
794
+ for asset in assets
795
+ }
796
+
797
+ def map_path(e):
798
+ """Go through the asset columns and replace the file name with the RID for the uploaded file."""
799
+ for c in asset_columns:
800
+ e[c] = asset_map[normalize_asset_dir(e[c])]
801
+ return e
802
+
803
+ # Load the JSON file that has the set of records that contain the feature values.
804
+ with Path(feature_file).open("r") as feature_values:
805
+ entities = [json.loads(line.strip()) for line in feature_values]
806
+ # Update the asset columns in the feature and add to the catalog.
807
+ self._ml_object.domain_path.tables[feature_table].insert([map_path(e) for e in entities], on_conflict_skip=True)
808
+
809
+ def _update_asset_execution_table(
810
+ self,
811
+ uploaded_assets: dict[str, list[AssetFilePath]],
812
+ asset_role: str = "Output",
813
+ ) -> None:
814
+ """Add entry to the association table connecting an asset to an execution RID
815
+
816
+ Args:
817
+ uploaded_assets: Dictionary whose key is the name of an asset table and whose value is a list of RIDs for
818
+ newly added assets to that table.
819
+ asset_role: A term or list of terms from the Asset_Role vocabulary.
820
+ """
821
+ # Make sure the asset role is in the controlled vocabulary table.
822
+ if self._dry_run:
823
+ # Don't do any updates of we are doing a dry run.
824
+ return
825
+ self._ml_object.lookup_term(MLVocab.asset_role, asset_role)
826
+
827
+ pb = self._ml_object.pathBuilder
828
+ for asset_table, asset_list in uploaded_assets.items():
829
+ asset_table_name = asset_table.split("/")[1] # Peel off the schema from the asset table
830
+ asset_exe, asset_fk, execution_fk = self._model.find_association(asset_table_name, "Execution")
831
+ asset_exe_path = pb.schemas[asset_exe.schema.name].tables[asset_exe.name]
832
+
833
+ asset_exe_path.insert(
834
+ [
835
+ {
836
+ asset_fk: asset_path.asset_rid,
837
+ execution_fk: self.execution_rid,
838
+ "Asset_Role": asset_role,
839
+ }
840
+ for asset_path in asset_list
841
+ ],
842
+ on_conflict_skip=True,
843
+ )
844
+
845
+ # Now add in the type names via the asset_asset_type association table.
846
+ # Get the list of types for each file in the asset.
847
+ if asset_role == "Input":
848
+ return
849
+ asset_type_map = {}
850
+ with Path(
851
+ asset_type_path(
852
+ self._working_dir,
853
+ self.execution_rid,
854
+ self._model.name_to_table(asset_table_name),
855
+ )
856
+ ).open("r") as asset_type_file:
857
+ for line in asset_type_file:
858
+ asset_type_map.update(json.loads(line.strip()))
859
+ for asset_path in asset_list:
860
+ asset_path.asset_types = asset_type_map[asset_path.file_name]
861
+
862
+ asset_asset_type, _, _ = self._model.find_association(asset_table_name, "Asset_Type")
863
+ type_path = pb.schemas[asset_asset_type.schema.name].tables[asset_asset_type.name]
864
+
865
+ type_path.insert(
866
+ [
867
+ {asset_table_name: asset.asset_rid, "Asset_Type": t}
868
+ for asset in asset_list
869
+ for t in asset_type_map[asset.file_name]
870
+ ],
871
+ on_conflict_skip=True,
872
+ )
873
+
874
+ @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
875
+ def asset_file_path(
876
+ self,
877
+ asset_name: str,
878
+ file_name: str | Path,
879
+ asset_types: list[str] | str | None = None,
880
+ copy_file=False,
881
+ rename_file: str | None = None,
882
+ **kwargs,
883
+ ) -> AssetFilePath:
884
+ """Return a pathlib Path to the directory in which to place files for the specified execution_asset type.
885
+
886
+ Given the name of an asset table, and a file name, register the file for upload and return a path to that
887
+ file in the upload directory. In addition to the filename, additional asset metadata and file asset types may
888
+ be specified.
889
+
890
+ This routine has three modes, depending on if file_name refers to an existing file. If it doesn't, a path
891
+ to a new file with the specified name is returned. The caller can then open that file for writing.
892
+
893
+ If the provided filename refers to an existing file and the copy_file argument is False (the default), then the
894
+ returned path contains a symbolic link to that file. If the copy_file argument is True, then the contents of
895
+ file_name are copied into the target directory.
896
+
897
+ Args:
898
+ asset_name: Type of asset to be uploaded. Must be a term in Asset_Type controlled vocabulary.
899
+ file_name: Name of file to be uploaded.
900
+ asset_types: Type of asset to be uploaded. Defaults to the name of the asset.
901
+ copy_file: Whether to copy the file rather than creating a symbolic link.
902
+ rename_file: If provided, the file will be renamed to this name if the file already exists..
903
+ **kwargs: Any additional metadata values that may be part of the asset table.
904
+
905
+ Returns:
906
+ Path in which to place asset files.
907
+
908
+ Raises:
909
+ DerivaException: If the asset type is not defined.
910
+ """
911
+ if not self._model.is_asset(asset_name):
912
+ DerivaMLException(f"Table {asset_name} is not an asset")
913
+
914
+ asset_table = self._model.name_to_table(asset_name)
915
+
916
+ asset_types = asset_types or kwargs.get("Asset_Type", None) or asset_name
917
+ asset_types = [asset_types] if isinstance(asset_types, str) else asset_types
918
+ for t in asset_types:
919
+ self._ml_object.lookup_term(MLVocab.asset_type, t)
920
+
921
+ # Determine if we will need to rename an existing file as the asset.
922
+ file_name = Path(file_name)
923
+ if file_name.name == "_implementations.log":
924
+ # There is a funny bug with S3 hatrac if we have the leading _ in the filename.
925
+ file_name = file_name.with_name("-implementations.log")
926
+
927
+ target_name = Path(rename_file) if file_name.exists() and rename_file else file_name
928
+ asset_path = asset_file_path(
929
+ prefix=self._working_dir,
930
+ exec_rid=self.execution_rid,
931
+ asset_table=self._model.name_to_table(asset_name),
932
+ file_name=target_name.name,
933
+ metadata=kwargs,
934
+ )
935
+
936
+ if file_name.exists():
937
+ if copy_file:
938
+ asset_path.write_bytes(file_name.read_bytes())
939
+ else:
940
+ try:
941
+ asset_path.symlink_to(file_name)
942
+ except (OSError, PermissionError):
943
+ # Fallback to copy if symlink fails (common on Windows)
944
+ asset_path.write_bytes(file_name.read_bytes())
945
+
946
+ # Persist the asset types into a file
947
+ with Path(asset_type_path(self._working_dir, self.execution_rid, asset_table)).open("a") as asset_type_file:
948
+ asset_type_file.write(json.dumps({target_name.name: asset_types}) + "\n")
949
+
950
+ return AssetFilePath(
951
+ asset_path=asset_path,
952
+ asset_name=asset_name,
953
+ file_name=target_name.name,
954
+ asset_metadata=kwargs,
955
+ asset_types=asset_types,
956
+ )
957
+
958
+ def table_path(self, table: str) -> Path:
959
+ """Return a local file path to a CSV to add values to a table on upload.
960
+
961
+ Args:
962
+ table: Name of table to be uploaded.
963
+
964
+ Returns:
965
+ Pathlib path to the file in which to place table values.
966
+ """
967
+ if table not in self._model.schemas[self._ml_object.domain_schema].tables:
968
+ raise DerivaMLException("Table '{}' not found in domain schema".format(table))
969
+
970
+ return table_path(self._working_dir, schema=self._ml_object.domain_schema, table=table)
971
+
972
+ def execute(self) -> Execution:
973
+ """Initiate an execution with the provided configuration. Can be used in a context manager."""
974
+ self.execution_start()
975
+ return self
976
+
977
+ @validate_call
978
+ def add_features(self, features: Iterable[FeatureRecord]) -> None:
979
+ """Adds feature records to the catalog.
980
+
981
+ Associates feature records with this execution and uploads them to the catalog.
982
+ Features represent measurable properties or characteristics of records.
983
+
984
+ NOTE: The catalog is not updated until upload_execution_outputs() is called.
985
+
986
+ Args:
987
+ features: Feature records to add, each containing a value and metadata.
988
+
989
+ Raises:
990
+ DerivaMLException: If feature addition fails or features are invalid.
991
+
992
+ Example:
993
+ >>> feature = FeatureRecord(value="high", confidence=0.95)
994
+ >>> execution.add_features([feature])
995
+ """
996
+
997
+ # Make sure feature list is homogeneous:
998
+ sorted_features = defaultdict(list)
999
+ for f in features:
1000
+ sorted_features[type(f)].append(f)
1001
+ for fs in sorted_features.values():
1002
+ self._add_features(fs)
1003
+
1004
+ def _add_features(self, features: list[FeatureRecord]) -> None:
1005
+ # Update feature records to include current execution_rid
1006
+ first_row = features[0]
1007
+ feature = first_row.feature
1008
+ json_path = feature_value_path(
1009
+ self._working_dir,
1010
+ schema=self._ml_object.domain_schema,
1011
+ target_table=feature.target_table.name,
1012
+ feature_name=feature.feature_name,
1013
+ exec_rid=self.execution_rid,
1014
+ )
1015
+ with Path(json_path).open("a", encoding="utf-8") as file:
1016
+ for feature in features:
1017
+ feature.Execution = self.execution_rid
1018
+ file.write(json.dumps(feature.model_dump(mode="json")) + "\n")
1019
+
1020
+ @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
1021
+ def create_dataset(
1022
+ self,
1023
+ dataset_types: str | list[str],
1024
+ description: str,
1025
+ version: DatasetVersion | None = None,
1026
+ ) -> RID:
1027
+ """Create a new dataset with specified types.
1028
+
1029
+ Args:
1030
+ dataset_types: param description:
1031
+ description: Markdown description of the dataset being created.
1032
+ version: Version to assign to the dataset. Defaults to 0.1.0
1033
+
1034
+ Returns:
1035
+ RID of the newly created dataset.
1036
+ """
1037
+ return self._ml_object.create_dataset(dataset_types, description, self.execution_rid, version=version)
1038
+
1039
+ def add_dataset_members(
1040
+ self,
1041
+ dataset_rid: RID,
1042
+ members: list[RID] | dict[str, list[RID]],
1043
+ validate: bool = True,
1044
+ description: str = "",
1045
+ ) -> None:
1046
+ """Add additional elements to an existing dataset_table.
1047
+
1048
+ Add new elements to an existing dataset. In addition to adding new members, the minor version number of the
1049
+ dataset is incremented and the description, if provide is applied to that new version.
1050
+
1051
+ The RIDs in the list to not have to be all from the same table, but they must be from a table that has
1052
+ been configured to be a dataset element type.
1053
+
1054
+ Args:
1055
+ dataset_rid: RID of dataset_table to extend or None if a new dataset_table is to be created.
1056
+ members: List of RIDs of members to add to the dataset_table. RID must be to a table type that is a
1057
+ dataset element type (see DerivaML.add_dataset_element_type).
1058
+ validate: Check rid_list to make sure elements are not already in the dataset_table.
1059
+ description: Markdown description of the updated dataset.
1060
+ """
1061
+ return self._ml_object.add_dataset_members(
1062
+ dataset_rid=dataset_rid,
1063
+ members=members,
1064
+ validate=validate,
1065
+ description=description,
1066
+ execution_rid=self.execution_rid,
1067
+ )
1068
+
1069
+ def increment_dataset_version(
1070
+ self, dataset_rid: RID, component: VersionPart, description: str = ""
1071
+ ) -> DatasetVersion:
1072
+ """Increment the version of the specified dataset_table.
1073
+
1074
+ Args:
1075
+ dataset_rid: RID to a dataset_table
1076
+ component: Which version of the dataset_table to increment.
1077
+ dataset_rid: RID of the dataset whose version is to be incremented.
1078
+ component: Major, Minor, or Patch
1079
+ description: Description of the version update of the dataset_table.
1080
+
1081
+ Returns:
1082
+ new semantic version of the dataset_table as a 3-tuple
1083
+
1084
+ Raises:
1085
+ DerivaMLException: if provided RID is not to a dataset_table.
1086
+ """
1087
+ return self._ml_object.increment_dataset_version(
1088
+ dataset_rid=dataset_rid,
1089
+ component=component,
1090
+ description=description,
1091
+ execution_rid=self.execution_rid,
1092
+ )
1093
+
1094
+ @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
1095
+ def add_files(
1096
+ self,
1097
+ files: Iterable[FileSpec],
1098
+ dataset_types: str | list[str] | None = None,
1099
+ description: str = "",
1100
+ ) -> RID:
1101
+ """Adds files to the catalog with their metadata.
1102
+
1103
+ Registers files in the catalog along with their metadata (MD5, length, URL) and associates them with
1104
+ specified file types.
1105
+
1106
+ Args:
1107
+ files: File specifications containing MD5 checksum, length, and URL.
1108
+ dataset_types: One or more dataset type terms from File_Type vocabulary.
1109
+ description: Description of the files.
1110
+
1111
+ Returns:
1112
+ RID: Dataset RID that identifies newly added files. Will be nested to mirror original directory structure
1113
+ of the files.
1114
+
1115
+ Raises:
1116
+ DerivaMLInvalidTerm: If file_types are invalid or execution_rid is not an execution record.
1117
+
1118
+ Examples:
1119
+ Add a single file type:
1120
+ >>> files = [FileSpec(url="path/to/file.txt", md5="abc123", length=1000)]
1121
+ >>> rids = exe.add_files(files, file_types="text")
1122
+
1123
+ Add multiple file types:
1124
+ >>> rids = exe.add_files(
1125
+ ... files=[FileSpec(url="image.png", md5="def456", length=2000)],
1126
+ ... file_types=["image", "png"],
1127
+ ... )
1128
+ """
1129
+ return self._ml_object.add_files(
1130
+ files=files,
1131
+ dataset_types=dataset_types,
1132
+ execution_rid=self.execution_rid,
1133
+ description=description,
1134
+ )
1135
+
1136
+ def __str__(self):
1137
+ items = [
1138
+ f"caching_dir: {self._cache_dir}",
1139
+ f"_working_dir: {self._working_dir}",
1140
+ f"execution_rid: {self.execution_rid}",
1141
+ f"workflow_rid: {self.workflow_rid}",
1142
+ f"asset_paths: {self.asset_paths}",
1143
+ f"configuration: {self.configuration}",
1144
+ ]
1145
+ return "\n".join(items)
1146
+
1147
+ def __enter__(self):
1148
+ """
1149
+ Method invoked when entering the context.
1150
+
1151
+ Returns:
1152
+ - self: The instance itself.
1153
+
1154
+ """
1155
+ self.execution_start()
1156
+ return self
1157
+
1158
+ def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> bool:
1159
+ """
1160
+ Method invoked when exiting the context.
1161
+
1162
+ Args:
1163
+ exc_type: Exception type.
1164
+ exc_value: Exception value.
1165
+ exc_tb: Exception traceback.
1166
+
1167
+ Returns:
1168
+ bool: True if execution completed successfully, False otherwise.
1169
+ """
1170
+ if not exc_type:
1171
+ self.update_status(Status.running, "Successfully run Ml.")
1172
+ self.execution_stop()
1173
+ return True
1174
+ else:
1175
+ self.update_status(
1176
+ Status.failed,
1177
+ f"Exception type: {exc_type}, Exception value: {exc_value}",
1178
+ )
1179
+ logging.error(f"Exception type: {exc_type}, Exception value: {exc_value}, Exception traceback: {exc_tb}")
1180
+ return False