deriva-ml 1.13.1__py3-none-any.whl → 1.13.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deriva_ml/dataset.py CHANGED
@@ -964,7 +964,8 @@ class Dataset:
964
964
  for the dataset.
965
965
  """
966
966
  if (
967
- execution_rid != DRY_RUN_RID
967
+ execution_rid
968
+ and execution_rid != DRY_RUN_RID
968
969
  and self._model.catalog.resolve_rid(execution_rid).table.name != "Execution"
969
970
  ):
970
971
  raise DerivaMLException(f"RID {execution_rid} is not an execution")
@@ -1120,17 +1121,18 @@ class Dataset:
1120
1121
 
1121
1122
  def update_status(status: Status, msg: str) -> None:
1122
1123
  """Update the current status for this execution in the catalog"""
1123
- self._model.catalog.getPathBuilder().schemas[
1124
- self._ml_schema
1125
- ].Execution.update(
1126
- [
1127
- {
1128
- "RID": execution_rid,
1129
- "Status": status.value,
1130
- "Status_Detail": msg,
1131
- }
1132
- ]
1133
- )
1124
+ if execution_rid and execution_rid != DRY_RUN_RID:
1125
+ self._model.catalog.getPathBuilder().schemas[
1126
+ self._ml_schema
1127
+ ].Execution.update(
1128
+ [
1129
+ {
1130
+ "RID": execution_rid,
1131
+ "Status": status.value,
1132
+ "Status_Detail": msg,
1133
+ }
1134
+ ]
1135
+ )
1134
1136
  self._logger.info(msg)
1135
1137
 
1136
1138
  def fetch_progress_callback(current, total):
@@ -974,7 +974,7 @@ class DerivaML(Dataset):
974
974
  ) -> Workflow:
975
975
  """Identify current executing program and return a workflow RID for it
976
976
 
977
- Determine the notebook or script that is currently being executed. Assume that this is
977
+ Determine the notebook or script that is currently being executed. Assume that this is
978
978
  being executed from a cloned GitHub repository. Determine the remote repository name for
979
979
  this object. Then either retrieve an existing workflow for this executable or create
980
980
  a new one.
@@ -983,6 +983,9 @@ class DerivaML(Dataset):
983
983
  name: The name of the workflow.
984
984
  workflow_type: The type of the workflow.
985
985
  description: The description of the workflow.
986
+
987
+ Returns:
988
+ A workflow object.
986
989
  """
987
990
  # Make sure type is correct.
988
991
  self.lookup_term(MLVocab.workflow_type, workflow_type)
@@ -1001,6 +1004,9 @@ class DerivaML(Dataset):
1001
1004
  1. The datasets specified in the configuration are downloaded and placed in the cache-dir. If a version is
1002
1005
  not specified in the configuration, then a new minor version number is created for the dataset and downloaded.
1003
1006
 
1007
+ 2. If any execution assets are provided in the configuration, they are downloaded and placed in the working directory.
1008
+
1009
+
1004
1010
  Args:
1005
1011
  configuration: ExecutionConfiguration:
1006
1012
  dry_run: Do not create an execution record or upload results.
deriva_ml/execution.py CHANGED
@@ -12,13 +12,11 @@ import os
12
12
  from pathlib import Path
13
13
 
14
14
  from pydantic import validate_call, ConfigDict
15
- import regex as re
16
15
  import sys
17
16
  import shutil
18
17
  from typing import Iterable, Any, Optional
19
18
 
20
19
  from deriva.core import format_exception
21
- from deriva.core.datapath import DataPathException
22
20
  from deriva.core.hatrac_store import HatracStore
23
21
  from .deriva_definitions import (
24
22
  RID,
@@ -66,29 +64,43 @@ except ImportError:
66
64
  return s
67
65
 
68
66
 
69
- class AssetFilePath(type(Path())):
70
- """Derived class of Path that also includes information about a downloaded.
67
+ # Platform-specific base class
68
+ if sys.version_info >= (3, 12):
71
69
 
72
- An AssetFilePath has all the methods associated with a pathlib.Path object. In addition, it defines additional
73
- attributes associated with a DerviaML asset.
70
+ class AssetFilePath(Path):
71
+ """
72
+ Create a new Path object that has additional information related to the use of this path as an asset.
74
73
 
75
- Attributes:
76
- asset_types: A list of the types associated with this asset. From the Asset_Type controlled vocabulary.
77
- asset_metadata: A dictionary of names and values of any additional columns associated with this asset.
78
- asset_name: The name of the asset table
79
- file_name: The name of the file in the local file system that has the asset contents
80
- asset_rid: The RID of the asset if it has been uploaded into an asset table
81
- """
74
+ Args:
75
+ asset_path: Local path to the location of the asset.
76
+ asset_name: The name of the asset in the catalog (e.g. the asset table name).
77
+ file_name: Name of the local file that contains the contents of the asset.
78
+ asset_metadata: Any additional columns associated with this asset beyond the URL, Length, and checksum.
79
+ asset_types: A list of terms from the Asset_Type controlled vocabulary.
80
+ asset_rid: The RID of the asset if it has been uploaded into an asset table
81
+ """
82
82
 
83
- def __new__(
84
- cls,
85
- asset_path,
86
- asset_name: str,
87
- file_name: str,
88
- asset_metadata: dict[str, Any],
89
- asset_types: list[str] | str,
90
- asset_rid: Optional[RID] = None,
91
- ):
83
+ def __init__(
84
+ self,
85
+ asset_path: str | Path,
86
+ asset_name: str,
87
+ file_name: str,
88
+ asset_metadata: dict[str, Any],
89
+ asset_types: list[str] | str,
90
+ asset_rid: Optional["RID"] = None,
91
+ ):
92
+ super().__init__(asset_path)
93
+ # These assignments happen after __new__ returns the instance
94
+ self.asset_name = asset_name
95
+ self.file_name = file_name
96
+ self.asset_metadata = asset_metadata
97
+ self.asset_types = (
98
+ asset_types if isinstance(asset_types, list) else [asset_types]
99
+ )
100
+ self.asset_rid = asset_rid
101
+ else:
102
+
103
+ class AssetFilePath(type(Path())):
92
104
  """
93
105
  Create a new Path object that has additional information related to the use of this path as an asset.
94
106
 
@@ -100,15 +112,26 @@ class AssetFilePath(type(Path())):
100
112
  asset_types: A list of terms from the Asset_Type controlled vocabulary.
101
113
  asset_rid: The RID of the asset if it has been uploaded into an asset table
102
114
  """
103
- obj = super().__new__(cls, asset_path)
104
- obj.asset_types = (
105
- asset_types if isinstance(asset_types, list) else [asset_types]
106
- )
107
- obj.asset_metadata = asset_metadata
108
- obj.asset_name = asset_name
109
- obj.file_name = file_name
110
- obj.asset_rid = asset_rid
111
- return obj
115
+
116
+ def __new__(
117
+ cls,
118
+ asset_path: str | Path,
119
+ asset_name: str,
120
+ file_name: str,
121
+ asset_metadata: dict[str, Any],
122
+ asset_types: list[str] | str,
123
+ asset_rid: Optional["RID"] = None,
124
+ ):
125
+ # Only pass the path to the base Path class
126
+ obj = super().__new__(cls, asset_path)
127
+ obj.asset_name = asset_name
128
+ obj.file_name = file_name
129
+ obj.asset_metadata = asset_metadata
130
+ obj.asset_types = (
131
+ asset_types if isinstance(asset_types, list) else [asset_types]
132
+ )
133
+ obj.asset_rid = asset_rid
134
+ return obj
112
135
 
113
136
 
114
137
  class Execution:
@@ -155,7 +178,7 @@ class Execution:
155
178
  Args:
156
179
  configuration: Execution configuration object that describes the execution.
157
180
  ml_object: The DerivaML instance that created the execution.
158
- reload: RID of previously initialized execution object.
181
+ reload: RID of a previously initialized execution object.
159
182
  """
160
183
  self.asset_paths: list[AssetFilePath] = []
161
184
  self.configuration = configuration
@@ -476,7 +499,7 @@ class Execution:
476
499
  """Download an asset from a URL and place it in a local directory.
477
500
 
478
501
  Args:
479
- asset_rid: URL of the asset.
502
+ asset_rid: RID of the asset.
480
503
  dest_dir: Destination directory for the asset.
481
504
  update_catalog: Whether to update the catalog execution information after downloading.
482
505
 
@@ -656,20 +679,9 @@ class Execution:
656
679
  with open(feature_file, "r") as feature_values:
657
680
  entities = [json.loads(line.strip()) for line in feature_values]
658
681
  # Update the asset columns in the feature and add to the catalog.
659
- try:
660
- self._ml_object.domain_path.tables[feature_table].insert(
661
- [map_path(e) for e in entities]
662
- )
663
- except DataPathException as e:
664
- if re.match(
665
- rf'DETAIL: +Key +\("Execution", +"{target_table}", +"Feature_Name"\)=\(.*\) already exists',
666
- e.message,
667
- ):
668
- self._logger.info(
669
- f"Skipping reload of feature values for {feature_table}"
670
- )
671
- else:
672
- raise e
682
+ self._ml_object.domain_path.tables[feature_table].insert(
683
+ [map_path(e) for e in entities], on_conflict_skip=True
684
+ )
673
685
 
674
686
  def _update_asset_execution_table(
675
687
  self,
@@ -694,27 +706,17 @@ class Execution:
694
706
  asset_exe = self._model.find_association(asset_table_name, "Execution")
695
707
  asset_exe_path = pb.schemas[asset_exe.schema.name].tables[asset_exe.name]
696
708
 
697
- try:
698
- asset_exe_path.insert(
699
- [
700
- {
701
- asset_table_name: asset_path.asset_rid,
702
- "Execution": self.execution_rid,
703
- "Asset_Role": asset_role,
704
- }
705
- for asset_path in asset_list
706
- ]
707
- )
708
- except DataPathException as e:
709
- if re.match(
710
- rf'DETAIL: +Key +\("{asset_table_name}", +"Execution"\)=\(.*\) already exists',
711
- e.message,
712
- ):
713
- self._logger.info(
714
- f"Skipping reload of execution assocations for {asset_table_name}"
715
- )
716
- else:
717
- raise e
709
+ asset_exe_path.insert(
710
+ [
711
+ {
712
+ asset_table_name: asset_path.asset_rid,
713
+ "Execution": self.execution_rid,
714
+ "Asset_Role": asset_role,
715
+ }
716
+ for asset_path in asset_list
717
+ ],
718
+ on_conflict_skip=True,
719
+ )
718
720
 
719
721
  # Now add in the type names via the asset_asset_type association table.
720
722
  # Get the list of types for each file in the asset.
@@ -740,24 +742,15 @@ class Execution:
740
742
  type_path = pb.schemas[asset_asset_type.schema.name].tables[
741
743
  asset_asset_type.name
742
744
  ]
743
- try:
744
- type_path.insert(
745
- [
746
- {asset_table_name: asset.asset_rid, "Asset_Type": t}
747
- for asset in asset_list
748
- for t in asset_type_map[asset.file_name]
749
- ]
750
- )
751
- except DataPathException as e:
752
- if re.match(
753
- rf'DETAIL: +Key +\("{asset_table_name}", +"Asset_Type"\)=\(.*\) already exists',
754
- e.message,
755
- ):
756
- self._logger.info(
757
- f"Skipping reload of execution asset types for {asset_table_name}"
758
- )
759
- else:
760
- raise e
745
+
746
+ type_path.insert(
747
+ [
748
+ {asset_table_name: asset.asset_rid, "Asset_Type": t}
749
+ for asset in asset_list
750
+ for t in asset_type_map[asset.file_name]
751
+ ],
752
+ on_conflict_skip=True,
753
+ )
761
754
 
762
755
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
763
756
  def asset_file_path(
@@ -325,7 +325,8 @@ class ExecutionConfiguration(BaseModel):
325
325
  should be materialized.
326
326
  assets: List of assets to be downloaded prior to execution. The values must be RIDs in an asset table
327
327
  parameters: Either a dictionary or a path to a JSON file that contains configuration parameters for the execution.
328
- workflow: A RID for a workflow instance. Must have a name, URI to the workflow instance, and a type.
328
+ workflow: Either a Workflow object, or a RID for a workflow instance.
329
+ parameters: Either a dictionary or a path to a JSON file that contains configuration parameters for the execution.
329
330
  description: A description of the execution. Can use Markdown format.
330
331
  """
331
332
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.13.1
3
+ Version: 1.13.2
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -1,14 +1,14 @@
1
1
  deriva_ml/__init__.py,sha256=GfneBq7xDphMqUQY96sW9ixRj74M3UTUCmD4KMIRSaM,1101
2
2
  deriva_ml/database_model.py,sha256=lMbAEqn4n0m7h_JstMX_LX9gbvBIEydG3sRilPn3eLU,14885
3
- deriva_ml/dataset.py,sha256=OyWUKWnYeP0ctimSBQ4em-uJrzCNOohx4GPT2uIl6R4,60649
3
+ deriva_ml/dataset.py,sha256=W1TSHgkdXNw2v5hC0UBrivCKadMK1LaFd6YIjHE9jZA,60786
4
4
  deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
5
5
  deriva_ml/dataset_bag.py,sha256=yS8oYVshfFtRDyhGPRqtbvxjyd3ZFF29lrB783OP4vM,11849
6
6
  deriva_ml/demo_catalog.py,sha256=9Qo3JD4bUIwnL3ngPctc2QBeWApvMR_5UyaK9ockTrY,11536
7
7
  deriva_ml/deriva_definitions.py,sha256=avdOgxtB60yb8XsWm-AYtCdvg2QkQbyfkZuA9xx9t2U,9221
8
- deriva_ml/deriva_ml_base.py,sha256=JYTG_a8SURhrPQBTz6OaGMk0D0sSPWpXqCnoVnSNViI,38501
8
+ deriva_ml/deriva_ml_base.py,sha256=FYSTQl4mNePC8IxC70rS5D0VmLNPccfFkkiVneDxJpY,38678
9
9
  deriva_ml/deriva_model.py,sha256=wytGCAHutiUaRfnRKr80Ks_P6ci0_wXRU3vq3lthfYU,13260
10
- deriva_ml/execution.py,sha256=Oyja5wonSBUDUIVSC01w3AojGEkWyw_8_kBMv3MTZBM,38126
11
- deriva_ml/execution_configuration.py,sha256=KKg2HhvOiOmYc3jJ9iJeeHYyRu05Bb8JpojmPn1gYW0,14072
10
+ deriva_ml/execution.py,sha256=otMkdjF15SEWg99mvWrTpnKz7-BWp9b8XbFf6iwfmtg,37697
11
+ deriva_ml/execution_configuration.py,sha256=7fiIbtzz9nmkxA9-GTiN6Ln2twfaOLivwJwGZb8gAL0,14163
12
12
  deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
13
13
  deriva_ml/feature.py,sha256=07g0uSrhumdopJluWuWSRMrzagaikAOihqB09bzXBP4,5475
14
14
  deriva_ml/history.py,sha256=qTDLDs8Ow_6r7mDO0gZm0Fg81SWKOAgtCU5pzZoDRgM,2828
@@ -23,9 +23,9 @@ deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjt
23
23
  deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
24
24
  deriva_ml/test-files/execution-parameters.json,sha256=1vBqXlaMa0cysonE20TweVDfTGRdSi9CUuAkW1xiYNo,36
25
25
  deriva_ml/test-files/notebook-parameters.json,sha256=7uEE2sLQSrSc9cEGQ_RKE7t5dwkEYv0qLo5mRbzo8Og,108
26
- deriva_ml-1.13.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
- deriva_ml-1.13.1.dist-info/METADATA,sha256=OKuCDvSR63ii7fO1W6tw-7-6RtYaKMHR59AbiURo_tI,999
28
- deriva_ml-1.13.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
29
- deriva_ml-1.13.1.dist-info/entry_points.txt,sha256=cJnALMa6pjdk6RQCt4HFbKHqALpVa0k6wPeQDPedLJI,295
30
- deriva_ml-1.13.1.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
31
- deriva_ml-1.13.1.dist-info/RECORD,,
26
+ deriva_ml-1.13.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
+ deriva_ml-1.13.2.dist-info/METADATA,sha256=uuvCztFgxOwWM34egjr65pW8-2pYGCtV_xofT5TmcLg,999
28
+ deriva_ml-1.13.2.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
29
+ deriva_ml-1.13.2.dist-info/entry_points.txt,sha256=cJnALMa6pjdk6RQCt4HFbKHqALpVa0k6wPeQDPedLJI,295
30
+ deriva_ml-1.13.2.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
31
+ deriva_ml-1.13.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5