deriva-ml 1.11.0__tar.gz → 1.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {deriva_ml-1.11.0/src/deriva_ml.egg-info → deriva_ml-1.12.0}/PKG-INFO +2 -1
  2. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/README.md +1 -0
  3. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/Notebooks/DerivaML Execution.ipynb +2 -2
  4. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/user-guide/datasets.md +0 -2
  5. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/database_model.py +3 -2
  6. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/dataset.py +6 -15
  7. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/dataset_bag.py +1 -1
  8. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/deriva_ml_base.py +20 -11
  9. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/deriva_model.py +8 -2
  10. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/execution.py +43 -13
  11. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/execution_configuration.py +4 -0
  12. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/upload.py +5 -5
  13. {deriva_ml-1.11.0 → deriva_ml-1.12.0/src/deriva_ml.egg-info}/PKG-INFO +2 -1
  14. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/.github/workflows/publish-docs.yml +0 -0
  15. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/.gitignore +0 -0
  16. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/LICENSE +0 -0
  17. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/.DS_Store +0 -0
  18. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
  19. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
  20. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/Notebooks/DerivaML Features.ipynb +0 -0
  21. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
  22. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/assets/ERD.png +0 -0
  23. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/assets/Launcher.png +0 -0
  24. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/assets/copy_minid.png +0 -0
  25. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/assets/deriva-logo.png +0 -0
  26. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/assets/deriva-ml.pdf +0 -0
  27. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/assets/sharing-at-home.pdf +0 -0
  28. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/code-docs/dataset.md +0 -0
  29. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/code-docs/dataset_aux_classes.md +0 -0
  30. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/code-docs/dataset_bag.md +0 -0
  31. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/code-docs/deriva_ml_base.md +0 -0
  32. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/code-docs/deriva_model.md +0 -0
  33. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/code-docs/execution.md +0 -0
  34. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/code-docs/execution_configuration.md +0 -0
  35. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/code-docs/feature.md +0 -0
  36. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/code-docs/upload.md +0 -0
  37. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/deriva_ml_structure.md +0 -0
  38. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/index.md +0 -0
  39. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/release-notes.md +0 -0
  40. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/user-guide/execution-configuration.md +0 -0
  41. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/user-guide/identifiers.md +0 -0
  42. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/user-guide/install.md +0 -0
  43. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/docs/user-guide/ml_workflow_instruction.md +0 -0
  44. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/mkdocs.yml +0 -0
  45. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/pyproject.toml +0 -0
  46. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/release.sh +0 -0
  47. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/setup.cfg +0 -0
  48. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/__init__.py +0 -0
  49. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/dataset_aux_classes.py +0 -0
  50. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/demo_catalog.py +0 -0
  51. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/deriva_definitions.py +0 -0
  52. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/execution_environment.py +0 -0
  53. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/feature.py +0 -0
  54. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/history.py +0 -0
  55. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/schema_setup/__init__.py +0 -0
  56. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/schema_setup/annotations.py +0 -0
  57. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/schema_setup/create_schema.py +0 -0
  58. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/schema_setup/policy.json +0 -0
  59. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/schema_setup/table_comments_utils.py +0 -0
  60. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml/test_functions.py +0 -0
  61. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml.egg-info/SOURCES.txt +0 -0
  62. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
  63. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml.egg-info/entry_points.txt +0 -0
  64. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml.egg-info/requires.txt +0 -0
  65. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/src/deriva_ml.egg-info/top_level.txt +0 -0
  66. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/tests/__init__.py +0 -0
  67. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/tests/derivaml_test.py +0 -0
  68. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/tests/runner.py +0 -0
  69. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/tests/test_basic_tables.py +0 -0
  70. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/tests/test_dataset.py +0 -0
  71. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/tests/test_download.py +0 -0
  72. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/tests/test_execution.py +0 -0
  73. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/tests/test_features.py +0 -0
  74. {deriva_ml-1.11.0 → deriva_ml-1.12.0}/tests/test_upload.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.11.0
3
+ Version: 1.12.0
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -28,3 +28,4 @@ The script release.sh will create a new release tag in GitHub. This script requ
28
28
  GitHUB CLI be installed.
29
29
 
30
30
  See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
31
+
@@ -9,3 +9,4 @@ The script release.sh will create a new release tag in GitHub. This script requ
9
9
  GitHUB CLI be installed.
10
10
 
11
11
  See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
12
+
@@ -721,12 +721,12 @@
721
721
  " pass\n",
722
722
  "\n",
723
723
  " # Write a new model\n",
724
- " model_file = manual_execution.asset_path('API_Model') / 'modelfile.txt'\n",
724
+ " model_file = manual_execution.asset_path('API_Model', 'modelfile.txt')\n",
725
725
  " with open(model_file, 'w') as f:\n",
726
726
  " f.write(\"Hello there a new model;\\n\")\n",
727
727
  "\n",
728
728
  " # Create some new feature values.\n",
729
- " bb_csv_path, bb_asset_paths = ml_execution.feature_paths('Image', 'BoundingBox')\n",
729
+ " bb_csv_path, bb_asset_paths = ml_execution.asset_path('Image', 'BoundingBox')\n",
730
730
  " bounding_box_files = [bb_asset_paths['BoundingBox'] / f\"box{i}.txt\" for i in range(10)]\n",
731
731
  " for i in range(10):\n",
732
732
  " bounding_box_files.append(fn := bb_asset_paths['BoundingBox'] / f\"box{i}.txt\")\n",
@@ -17,7 +17,6 @@ Dataset types are assigned from a controlled vocabulary called `MLVocab.dataset_
17
17
  as you need:
18
18
  ```
19
19
  from deriva_ml import MLVocab
20
- ...
21
20
  ml_instance.add_term(MLVocab.dataset_type, "DemoSet", description="A test dataset_table")
22
21
  ```
23
22
  When you create a dataset, you can provide as many dataset types as required to streamline orginizing and discovering
@@ -30,7 +29,6 @@ Its important to know how a dataset was created, so the most common way to creat
30
29
  # Now lets create model configuration for our program.
31
30
  api_workflow = Workflow(
32
31
  name="API Workflow",
33
- url="https://github.com/informatics-isi-edu/deriva-ml/blob/main/docs/Notebooks/DerivaML%20Dataset.ipynb",
34
32
  workflow_type="Create Dataset Notebook"
35
33
  )
36
34
 
@@ -1,4 +1,4 @@
1
- """Ths module contains the definition of the DatabaseModel class. The role of this class is to provide an nterface between the BDBag representation
1
+ """Ths module contains the definition of the DatabaseModel class. The role of this class is to provide an interface between the BDBag representation
2
2
  of a dataset and a sqllite database in which the contents of the bag are stored.
3
3
  """
4
4
 
@@ -51,7 +51,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
51
51
  appear in more than one database. To help manage this, a global list of all the datasets that have been loaded
52
52
  into DatabaseModels, is kept in the class variable `_rid_map`.
53
53
 
54
- Because you can load diffent versions of a dataset simultaniously, the dataset RID and version number are tracked, and a new
54
+ Because you can load different versions of a dataset simultaneously, the dataset RID and version number are tracked, and a new
55
55
  sqllite instance is created for every new dataset version present.
56
56
 
57
57
  Attributes:
@@ -290,6 +290,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
290
290
  return DatasetBag(self, dataset_rid or self.dataset_rid)
291
291
 
292
292
  def dataset_version(self, dataset_rid: Optional[RID] = None) -> DatasetVersion:
293
+ """Return the version of the specified dataset."""
293
294
  if dataset_rid and dataset_rid not in self.bag_rids:
294
295
  DerivaMLException(f"Dataset RID {dataset_rid} is not in model.")
295
296
  return self.bag_rids[dataset_rid]
@@ -232,12 +232,10 @@ class Dataset:
232
232
  """Increment the version of the specified dataset_table.
233
233
 
234
234
  Args:
235
- dataset_rid: RID to a dataset_table
236
- component: Which version of the dataset_table to increment.
237
- dataset_rid: RID of the dataset whose version is to be incremented.
238
- component: Major, Minor or Patch
239
- description: Description of the version update of the dataset_table.
240
- execution_rid: Which execution is performing increment.
235
+ dataset_rid: RID of the dataset whose version is to be incremented.
236
+ component: Which version of the dataset_table to increment. Major, Minor or Patch
237
+ description: Description of the version update of the dataset_table.
238
+ execution_rid: Which execution is performing increment.
241
239
 
242
240
  Returns:
243
241
  new semantic version of the dataset_table as a 3-tuple
@@ -275,9 +273,6 @@ class Dataset:
275
273
  description: Description of the dataset_table.
276
274
  execution_rid: Execution under which the dataset_table will be created.
277
275
  version: Version of the dataset_table.
278
- type: str | list[str]:
279
- description: str:
280
-
281
276
 
282
277
  Returns:
283
278
  New dataset_table RID.
@@ -349,7 +344,6 @@ class Dataset:
349
344
  Args:
350
345
  dataset_rid: RID of the dataset_table to delete.
351
346
  recurse: If True, delete the dataset_table along with any nested datasets. (Default value = False)
352
- dataset_rid: RID:
353
347
  """
354
348
  # Get association table entries for this dataset_table
355
349
  # Delete association table entries
@@ -397,7 +391,7 @@ class Dataset:
397
391
  filtered_path = dataset_path
398
392
  else:
399
393
  filtered_path = dataset_path.filter(
400
- (dataset_path.Deleted == False) | (dataset_path.Deleted == None)
394
+ (dataset_path.Deleted == False) | (dataset_path.Deleted == None) # noqa: E712
401
395
  )
402
396
 
403
397
  # Get a list of all the dataset_type values associated with this dataset_table.
@@ -439,8 +433,7 @@ class Dataset:
439
433
  routine makes it possible to add objects from the specified table to a dataset_table.
440
434
 
441
435
  Args:
442
- element: Name or the table or table object that is to be added to the dataset_table.
443
- element: str | Table:
436
+ element: Name of the table or table object that is to be added to the dataset_table.
444
437
 
445
438
  Returns:
446
439
  The table object that was added to the dataset_table.
@@ -464,7 +457,6 @@ class Dataset:
464
457
 
465
458
  Args:
466
459
  dataset_rid: param recurse: If this is a nested dataset_table, list the members of the contained datasets
467
- dataset_rid: RID:
468
460
  recurse: (Default value = False)
469
461
  limit: If provided, the maximum number of members to return for each element type.
470
462
 
@@ -677,7 +669,6 @@ class Dataset:
677
669
 
678
670
  Args:
679
671
  dataset_rid: return: RID of the parent dataset_table.
680
- dataset_rid: RID:
681
672
 
682
673
  Returns:
683
674
  RID of the parent dataset_table.
@@ -168,7 +168,7 @@ class DatasetBag:
168
168
  yield dict(zip(col_names, row))
169
169
 
170
170
  @validate_call
171
- def list_dataset_members(self, recurse: bool = False) -> dict[str, dict[str, Any]]:
171
+ def list_dataset_members(self, recurse: bool = False) -> dict[str, dict[str, list]]:
172
172
  """Return a list of entities associated with a specific _dataset_table.
173
173
 
174
174
  Args:
@@ -265,10 +265,13 @@ class DerivaML(Dataset):
265
265
  is_notebook = True
266
266
  else:
267
267
  stack = inspect.stack()
268
+ # Get the caller's filename, which is two up the stack from here.
268
269
  if len(stack) > 1:
269
- filename = Path(
270
- stack[2].filename
271
- ) # Get the caller's filename, which is two up the stack from here.
270
+ filename = Path(stack[2].filename)
271
+ if not filename.exists():
272
+ # Begin called from command line interpreter.
273
+ filename = "REPL"
274
+ # Get the caller's filename, which is two up the stack from here.
272
275
  else:
273
276
  raise DerivaMLException(
274
277
  "Looking for caller failed"
@@ -326,7 +329,6 @@ class DerivaML(Dataset):
326
329
  """Return a local file path in which to place a CSV to add values to a table on upload.
327
330
 
328
331
  Args:
329
- table: return:
330
332
  table: str | Table:
331
333
 
332
334
  Returns:
@@ -1143,13 +1145,17 @@ class DerivaML(Dataset):
1143
1145
  if self._is_notebook
1144
1146
  else f"git hash-object {self.executable_path}"
1145
1147
  )
1146
- checksum = subprocess.run(
1147
- cmd,
1148
- capture_output=True,
1149
- text=True,
1150
- check=True,
1151
- shell=True,
1152
- ).stdout.strip()
1148
+ checksum = (
1149
+ subprocess.run(
1150
+ cmd,
1151
+ capture_output=True,
1152
+ text=True,
1153
+ check=False,
1154
+ shell=True,
1155
+ ).stdout.strip()
1156
+ if self.executable_path != "REPL"
1157
+ else "1"
1158
+ )
1153
1159
 
1154
1160
  return Workflow(
1155
1161
  name=name,
@@ -1172,6 +1178,8 @@ class DerivaML(Dataset):
1172
1178
  """
1173
1179
 
1174
1180
  # Get repo URL from local gitHub repo.
1181
+ if self.executable_path == "REPL":
1182
+ return "REPL", True
1175
1183
  try:
1176
1184
  result = subprocess.run(
1177
1185
  ["git", "remote", "get-url", "origin"],
@@ -1240,6 +1248,7 @@ class DerivaML(Dataset):
1240
1248
  # @validate_call
1241
1249
  def restore_execution(self, execution_rid: Optional[RID] = None) -> "Execution":
1242
1250
  """Return an Execution object for a previously started execution with the specified RID."""
1251
+
1243
1252
  from .execution import Execution
1244
1253
 
1245
1254
  # Find path to execution
@@ -27,6 +27,8 @@ from typing import Iterable, Optional
27
27
  class DerivaModel:
28
28
  """Augmented interface to deriva model class.
29
29
 
30
+ This class provides a number of DerivaML specific methods that augment the interface in the deriva model class.
31
+
30
32
  Attributes:
31
33
  domain_schema: Schema name for domain specific tables and relationships.
32
34
  model: ERMRest model for the catalog.
@@ -71,6 +73,10 @@ class DerivaModel:
71
73
  # No domain schema defined.
72
74
  self.domain_schema = domain_schema
73
75
 
76
+ def __getattr__(self, name):
77
+ # Called only if `name` is not found in Manager. Delegate attributes to model class.
78
+ return getattr(self.model, name)
79
+
74
80
  def name_to_table(self, table: str | Table) -> Table:
75
81
  """Return the table object corresponding to the given table name.
76
82
 
@@ -129,7 +135,7 @@ class DerivaModel:
129
135
  def find_association(self, table1: Table | str, table2: Table | str) -> Table:
130
136
  """Given two tables, return an association table that connects the two.
131
137
 
132
- Raises"
138
+ Raises:
133
139
  DerivaML exception if there is either not an association table or more than one association table.
134
140
  """
135
141
  table1 = self.name_to_table(table1)
@@ -138,7 +144,7 @@ class DerivaModel:
138
144
  tables = [
139
145
  a.table
140
146
  for a in table1.find_associations(pure=False)
141
- if (t := a.other_fkeys.pop().pk_table) == table2
147
+ if a.other_fkeys.pop().pk_table == table2
142
148
  ]
143
149
  if len(tables) == 1:
144
150
  return tables[0]
@@ -66,7 +66,6 @@ class AssetFilePath(type(Path())):
66
66
  asset_rid: The RID of the asset if it has been uploaded into an asset table
67
67
  """
68
68
 
69
-
70
69
  def __new__(
71
70
  cls,
72
71
  asset_path,
@@ -76,6 +75,17 @@ class AssetFilePath(type(Path())):
76
75
  asset_types: list[str] | str,
77
76
  asset_rid: Optional[RID] = None,
78
77
  ):
78
+ """
79
+ Create a new Path object that has additional information related to the use of this path as an asset.
80
+
81
+ Args:
82
+ asset_path: Local path to the location of the asset.
83
+ asset_name: The name of the asset in the catalog (e.g. the asset table name).
84
+ file_name: Name of the local file that contains the contents of the asset.
85
+ asset_metadata: Any additional columns associated with this asset beyond the URL, Length, and checksum.
86
+ asset_types: A list of terms from the Asset_Type controlled vocabulary.
87
+ asset_rid: The RID of the asset if it has been uploaded into an asset table
88
+ """
79
89
  obj = super().__new__(cls, asset_path)
80
90
  obj.asset_types = (
81
91
  asset_types if isinstance(asset_types, list) else [asset_types]
@@ -133,7 +143,7 @@ class Execution:
133
143
  ml_object: The DerivaML instance that created the execution.
134
144
  reload: RID of previously initialized execution object.
135
145
  """
136
- self.asset_paths: list[Path] = []
146
+ self.asset_paths: list[AssetFilePath] = []
137
147
  self.configuration = configuration
138
148
  self._ml_object = ml_object
139
149
  self._model = ml_object.model
@@ -141,7 +151,7 @@ class Execution:
141
151
  self.start_time = None
142
152
  self.stop_time = None
143
153
  self.status = Status.created
144
- self.uploaded_assets: list[Path] = []
154
+ self.uploaded_assets: Optional[dict[str, list[AssetFilePath]]] = None
145
155
  self.configuration.argv = sys.argv
146
156
 
147
157
  self.dataset_rids: list[RID] = []
@@ -152,6 +162,7 @@ class Execution:
152
162
  self._cache_dir = self._ml_object.cache_dir
153
163
  self._dry_run = dry_run
154
164
 
165
+ # Make sure we have a good workflow.
155
166
  if isinstance(self.configuration.workflow, Workflow):
156
167
  self.workflow_rid = (
157
168
  self._ml_object.add_workflow(self.configuration.workflow)
@@ -168,6 +179,7 @@ class Execution:
168
179
  "Workflow specified in execution configuration is not a Workflow"
169
180
  )
170
181
 
182
+ # Validate the datasets and assets to be valid.
171
183
  for d in self.configuration.datasets:
172
184
  if self._ml_object.resolve_rid(d.rid).table.name != "Dataset":
173
185
  raise DerivaMLException(
@@ -265,7 +277,7 @@ class Execution:
265
277
  file_name="configuration.json",
266
278
  asset_types=ExecMetadataVocab.execution_config.value,
267
279
  )
268
- with open(cfile, "w", encoding="utf-8") as config_file:
280
+ with open(cfile.as_posix(), "w", encoding="utf-8") as config_file:
269
281
  json.dump(self.configuration.model_dump(), config_file)
270
282
 
271
283
  # save runtime env
@@ -387,7 +399,7 @@ class Execution:
387
399
  try:
388
400
  self.update_status(Status.running, "Uploading execution files...")
389
401
  results = upload_directory(self._model, self._asset_root)
390
- except Exception as e:
402
+ except RuntimeError as e:
391
403
  error = format_exception(e)
392
404
  self.update_status(Status.failed, error)
393
405
  raise DerivaMLException(f"Fail to upload execution_assets. Error: {error}")
@@ -519,7 +531,7 @@ class Execution:
519
531
 
520
532
  def upload_execution_outputs(
521
533
  self, clean_folder: bool = True
522
- ) -> dict[str, AssetFilePath]:
534
+ ) -> dict[str, list[AssetFilePath]]:
523
535
  """Upload all the assets and metadata associated with the current execution.
524
536
 
525
537
  This will include any new assets, features, or table values.
@@ -535,11 +547,11 @@ class Execution:
535
547
  if self._dry_run:
536
548
  return {}
537
549
  try:
538
- uploaded_assets = self._upload_execution_dirs()
550
+ self.uploaded_assets = self._upload_execution_dirs()
539
551
  self.update_status(Status.completed, "Successfully end the execution.")
540
552
  if clean_folder:
541
553
  self._clean_folder_contents(self._execution_root)
542
- return uploaded_assets
554
+ return self.uploaded_assets
543
555
  except Exception as e:
544
556
  error = format_exception(e)
545
557
  self.update_status(Status.failed, error)
@@ -688,16 +700,26 @@ class Execution:
688
700
  asset_name: str,
689
701
  file_name: str,
690
702
  asset_types: Optional[list[str] | str] = None,
703
+ copy_file=False,
691
704
  **kwargs,
692
705
  ) -> AssetFilePath:
693
706
  """Return a pathlib Path to the directory in which to place files for the specified execution_asset type.
694
707
 
695
- These files are uploaded as part of the upload_execution method in DerivaML class.
708
+ Given the name of an asset table, and a file name, register the file for upload, and return a path to that
709
+ file in the upload directory. In addition to the filename, additioal asset metadata and file asset types may
710
+ be specified.
711
+
712
+ This routine has three modes, depending on if file_name refers to an existing file. If it doesn't, a path
713
+ to a new file with the specified name is returned. The caller can then open that file for writing.
714
+
715
+ If the provided filename refers to an existing file and the copy_file argument is False (the default), then the
716
+ returned path contains a symbolic link to that file. If the copy_file argument is True then the contents of
717
+ file_name are copied into the target directory.
696
718
 
697
719
  Args:
698
720
  asset_name: Type of asset to be uploaded. Must be a term in Asset_Type controlled vocabulary.
699
- asset_types: Type of asset to be uploaded. Defaults to name of the asset.
700
721
  file_name: Name of file to be uploaded.
722
+ asset_types: Type of asset to be uploaded. Defaults to name of the asset.
701
723
  **kwargs: Any additional metadata values that may be part of the asset table.
702
724
 
703
725
  Returns:
@@ -716,26 +738,33 @@ class Execution:
716
738
  for t in asset_types:
717
739
  self._ml_object.lookup_term(MLVocab.asset_type, t)
718
740
 
741
+ file_name = Path(file_name)
719
742
  asset_path = asset_file_path(
720
743
  self._working_dir,
721
744
  self.execution_rid,
722
745
  self._model.name_to_table(asset_name),
723
- file_name,
746
+ file_name.name,
724
747
  metadata=kwargs,
725
748
  )
726
749
 
750
+ if file_name.exists():
751
+ if copy_file:
752
+ asset_path.write_bytes(file_name.read_bytes())
753
+ else:
754
+ asset_path.symlink_to(file_name)
755
+
727
756
  # Persist the asset types into a file
728
757
  with open(
729
758
  asset_type_path(self._working_dir, self.execution_rid, asset_table),
730
759
  "a",
731
760
  encoding="utf-8",
732
761
  ) as f:
733
- f.write(json.dumps({file_name: asset_types}) + "\n")
762
+ f.write(json.dumps({file_name.name: asset_types}) + "\n")
734
763
 
735
764
  return AssetFilePath(
736
765
  asset_path=asset_path,
737
766
  asset_name=asset_name,
738
- file_name=file_name,
767
+ file_name=file_name.name,
739
768
  asset_metadata=kwargs,
740
769
  asset_types=asset_types,
741
770
  )
@@ -760,6 +789,7 @@ class Execution:
760
789
 
761
790
  def execute(self) -> Execution:
762
791
  """Initiate an execution with provided configuration. Can be used in a context manager."""
792
+ self.execution_start()
763
793
  return self
764
794
 
765
795
  @validate_call
@@ -1,3 +1,7 @@
1
+ """
2
+ Classes that are used to define an execution configuration.
3
+ """
4
+
1
5
  from __future__ import annotations
2
6
 
3
7
  import json
@@ -216,10 +216,10 @@ def asset_table_upload_spec(model: DerivaModel, asset_table: str | Table):
216
216
  "checksum_types": ["sha256", "md5"],
217
217
  "hatrac_options": {"versioned_urls": True},
218
218
  "hatrac_templates": {
219
- "hatrac_uri": f"/hatrac/{asset_table.name}/{{md5}}.{{file_name}}",
219
+ "hatrac_uri": f"/hatrac/{asset_table.name}/{{md5}}.{{file_name}}.{{file_ext}}",
220
220
  "content-disposition": "filename*=UTF-8''{file_name}.{file_ext}",
221
221
  },
222
- "record_query_template": "/entity/{target_table}/MD5={{md5}}&Filename={{file_name}}",
222
+ "record_query_template": "/entity/{target_table}/MD5={{md5}}&Filename={file_name}.{file_ext}",
223
223
  }
224
224
 
225
225
 
@@ -252,10 +252,10 @@ def bulk_upload_configuration(model: DerivaModel) -> dict[str, Any]:
252
252
  "checksum_types": ["sha256", "md5"],
253
253
  "hatrac_options": {"versioned_urls": True},
254
254
  "hatrac_templates": {
255
- "hatrac_uri": "/hatrac/{asset_table}/{md5}.{file_name}",
255
+ "hatrac_uri": "/hatrac/{asset_table}/{md5}.{file_name}.{file_ext}",
256
256
  "content-disposition": "filename*=UTF-8''{file_name}.{file_ext}",
257
257
  },
258
- "record_query_template": "/entity/{target_table}/MD5={{md5}}&Filename={{file_name}}",
258
+ "record_query_template": "/entity/{target_table}/MD5={md5}&Filename={file_name}.{file_ext}",
259
259
  },
260
260
  # {
261
261
  # Upload the records into a table
@@ -448,7 +448,7 @@ def asset_type_path(prefix: Path | str, exec_rid: RID, asset_table: Table) -> Pa
448
448
  asset_table: Table in which to place assets.
449
449
 
450
450
  Returns:
451
- Path to the file in which to place asset_type values for the named asset..
451
+ Path to the file in which to place asset_type values for the named asset.
452
452
  """
453
453
  path = (
454
454
  execution_root(prefix, exec_rid=exec_rid)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.11.0
3
+ Version: 1.12.0
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -28,3 +28,4 @@ The script release.sh will create a new release tag in GitHub. This script requ
28
28
  GitHUB CLI be installed.
29
29
 
30
30
  See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
31
+
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes