deriva-ml 1.13.1__py3-none-any.whl → 1.13.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/dataset.py +14 -12
- deriva_ml/deriva_ml_base.py +7 -1
- deriva_ml/execution.py +79 -86
- deriva_ml/execution_configuration.py +2 -1
- {deriva_ml-1.13.1.dist-info → deriva_ml-1.13.2.dist-info}/METADATA +1 -1
- {deriva_ml-1.13.1.dist-info → deriva_ml-1.13.2.dist-info}/RECORD +10 -10
- {deriva_ml-1.13.1.dist-info → deriva_ml-1.13.2.dist-info}/WHEEL +1 -1
- {deriva_ml-1.13.1.dist-info → deriva_ml-1.13.2.dist-info}/entry_points.txt +0 -0
- {deriva_ml-1.13.1.dist-info → deriva_ml-1.13.2.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.13.1.dist-info → deriva_ml-1.13.2.dist-info}/top_level.txt +0 -0
deriva_ml/dataset.py
CHANGED
|
@@ -964,7 +964,8 @@ class Dataset:
|
|
|
964
964
|
for the dataset.
|
|
965
965
|
"""
|
|
966
966
|
if (
|
|
967
|
-
execution_rid
|
|
967
|
+
execution_rid
|
|
968
|
+
and execution_rid != DRY_RUN_RID
|
|
968
969
|
and self._model.catalog.resolve_rid(execution_rid).table.name != "Execution"
|
|
969
970
|
):
|
|
970
971
|
raise DerivaMLException(f"RID {execution_rid} is not an execution")
|
|
@@ -1120,17 +1121,18 @@ class Dataset:
|
|
|
1120
1121
|
|
|
1121
1122
|
def update_status(status: Status, msg: str) -> None:
|
|
1122
1123
|
"""Update the current status for this execution in the catalog"""
|
|
1123
|
-
|
|
1124
|
-
self.
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1124
|
+
if execution_rid and execution_rid != DRY_RUN_RID:
|
|
1125
|
+
self._model.catalog.getPathBuilder().schemas[
|
|
1126
|
+
self._ml_schema
|
|
1127
|
+
].Execution.update(
|
|
1128
|
+
[
|
|
1129
|
+
{
|
|
1130
|
+
"RID": execution_rid,
|
|
1131
|
+
"Status": status.value,
|
|
1132
|
+
"Status_Detail": msg,
|
|
1133
|
+
}
|
|
1134
|
+
]
|
|
1135
|
+
)
|
|
1134
1136
|
self._logger.info(msg)
|
|
1135
1137
|
|
|
1136
1138
|
def fetch_progress_callback(current, total):
|
deriva_ml/deriva_ml_base.py
CHANGED
|
@@ -974,7 +974,7 @@ class DerivaML(Dataset):
|
|
|
974
974
|
) -> Workflow:
|
|
975
975
|
"""Identify current executing program and return a workflow RID for it
|
|
976
976
|
|
|
977
|
-
Determine the notebook or script that is currently being executed. Assume that
|
|
977
|
+
Determine the notebook or script that is currently being executed. Assume that this is
|
|
978
978
|
being executed from a cloned GitHub repository. Determine the remote repository name for
|
|
979
979
|
this object. Then either retrieve an existing workflow for this executable or create
|
|
980
980
|
a new one.
|
|
@@ -983,6 +983,9 @@ class DerivaML(Dataset):
|
|
|
983
983
|
name: The name of the workflow.
|
|
984
984
|
workflow_type: The type of the workflow.
|
|
985
985
|
description: The description of the workflow.
|
|
986
|
+
|
|
987
|
+
Returns:
|
|
988
|
+
A workflow object.
|
|
986
989
|
"""
|
|
987
990
|
# Make sure type is correct.
|
|
988
991
|
self.lookup_term(MLVocab.workflow_type, workflow_type)
|
|
@@ -1001,6 +1004,9 @@ class DerivaML(Dataset):
|
|
|
1001
1004
|
1. The datasets specified in the configuration are downloaded and placed in the cache-dir. If a version is
|
|
1002
1005
|
not specified in the configuration, then a new minor version number is created for the dataset and downloaded.
|
|
1003
1006
|
|
|
1007
|
+
2. If any execution assets are provided in the configuration, they are downloaded and placed in the working directory.
|
|
1008
|
+
|
|
1009
|
+
|
|
1004
1010
|
Args:
|
|
1005
1011
|
configuration: ExecutionConfiguration:
|
|
1006
1012
|
dry_run: Do not create an execution record or upload results.
|
deriva_ml/execution.py
CHANGED
|
@@ -12,13 +12,11 @@ import os
|
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
|
|
14
14
|
from pydantic import validate_call, ConfigDict
|
|
15
|
-
import regex as re
|
|
16
15
|
import sys
|
|
17
16
|
import shutil
|
|
18
17
|
from typing import Iterable, Any, Optional
|
|
19
18
|
|
|
20
19
|
from deriva.core import format_exception
|
|
21
|
-
from deriva.core.datapath import DataPathException
|
|
22
20
|
from deriva.core.hatrac_store import HatracStore
|
|
23
21
|
from .deriva_definitions import (
|
|
24
22
|
RID,
|
|
@@ -66,29 +64,43 @@ except ImportError:
|
|
|
66
64
|
return s
|
|
67
65
|
|
|
68
66
|
|
|
69
|
-
class
|
|
70
|
-
|
|
67
|
+
# Platform-specific base class
|
|
68
|
+
if sys.version_info >= (3, 12):
|
|
71
69
|
|
|
72
|
-
|
|
73
|
-
|
|
70
|
+
class AssetFilePath(Path):
|
|
71
|
+
"""
|
|
72
|
+
Create a new Path object that has additional information related to the use of this path as an asset.
|
|
74
73
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
74
|
+
Args:
|
|
75
|
+
asset_path: Local path to the location of the asset.
|
|
76
|
+
asset_name: The name of the asset in the catalog (e.g. the asset table name).
|
|
77
|
+
file_name: Name of the local file that contains the contents of the asset.
|
|
78
|
+
asset_metadata: Any additional columns associated with this asset beyond the URL, Length, and checksum.
|
|
79
|
+
asset_types: A list of terms from the Asset_Type controlled vocabulary.
|
|
80
|
+
asset_rid: The RID of the asset if it has been uploaded into an asset table
|
|
81
|
+
"""
|
|
82
82
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
83
|
+
def __init__(
|
|
84
|
+
self,
|
|
85
|
+
asset_path: str | Path,
|
|
86
|
+
asset_name: str,
|
|
87
|
+
file_name: str,
|
|
88
|
+
asset_metadata: dict[str, Any],
|
|
89
|
+
asset_types: list[str] | str,
|
|
90
|
+
asset_rid: Optional["RID"] = None,
|
|
91
|
+
):
|
|
92
|
+
super().__init__(asset_path)
|
|
93
|
+
# These assignments happen after __new__ returns the instance
|
|
94
|
+
self.asset_name = asset_name
|
|
95
|
+
self.file_name = file_name
|
|
96
|
+
self.asset_metadata = asset_metadata
|
|
97
|
+
self.asset_types = (
|
|
98
|
+
asset_types if isinstance(asset_types, list) else [asset_types]
|
|
99
|
+
)
|
|
100
|
+
self.asset_rid = asset_rid
|
|
101
|
+
else:
|
|
102
|
+
|
|
103
|
+
class AssetFilePath(type(Path())):
|
|
92
104
|
"""
|
|
93
105
|
Create a new Path object that has additional information related to the use of this path as an asset.
|
|
94
106
|
|
|
@@ -100,15 +112,26 @@ class AssetFilePath(type(Path())):
|
|
|
100
112
|
asset_types: A list of terms from the Asset_Type controlled vocabulary.
|
|
101
113
|
asset_rid: The RID of the asset if it has been uploaded into an asset table
|
|
102
114
|
"""
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
115
|
+
|
|
116
|
+
def __new__(
|
|
117
|
+
cls,
|
|
118
|
+
asset_path: str | Path,
|
|
119
|
+
asset_name: str,
|
|
120
|
+
file_name: str,
|
|
121
|
+
asset_metadata: dict[str, Any],
|
|
122
|
+
asset_types: list[str] | str,
|
|
123
|
+
asset_rid: Optional["RID"] = None,
|
|
124
|
+
):
|
|
125
|
+
# Only pass the path to the base Path class
|
|
126
|
+
obj = super().__new__(cls, asset_path)
|
|
127
|
+
obj.asset_name = asset_name
|
|
128
|
+
obj.file_name = file_name
|
|
129
|
+
obj.asset_metadata = asset_metadata
|
|
130
|
+
obj.asset_types = (
|
|
131
|
+
asset_types if isinstance(asset_types, list) else [asset_types]
|
|
132
|
+
)
|
|
133
|
+
obj.asset_rid = asset_rid
|
|
134
|
+
return obj
|
|
112
135
|
|
|
113
136
|
|
|
114
137
|
class Execution:
|
|
@@ -155,7 +178,7 @@ class Execution:
|
|
|
155
178
|
Args:
|
|
156
179
|
configuration: Execution configuration object that describes the execution.
|
|
157
180
|
ml_object: The DerivaML instance that created the execution.
|
|
158
|
-
reload: RID of previously initialized execution object.
|
|
181
|
+
reload: RID of a previously initialized execution object.
|
|
159
182
|
"""
|
|
160
183
|
self.asset_paths: list[AssetFilePath] = []
|
|
161
184
|
self.configuration = configuration
|
|
@@ -476,7 +499,7 @@ class Execution:
|
|
|
476
499
|
"""Download an asset from a URL and place it in a local directory.
|
|
477
500
|
|
|
478
501
|
Args:
|
|
479
|
-
asset_rid:
|
|
502
|
+
asset_rid: RID of the asset.
|
|
480
503
|
dest_dir: Destination directory for the asset.
|
|
481
504
|
update_catalog: Whether to update the catalog execution information after downloading.
|
|
482
505
|
|
|
@@ -656,20 +679,9 @@ class Execution:
|
|
|
656
679
|
with open(feature_file, "r") as feature_values:
|
|
657
680
|
entities = [json.loads(line.strip()) for line in feature_values]
|
|
658
681
|
# Update the asset columns in the feature and add to the catalog.
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
)
|
|
663
|
-
except DataPathException as e:
|
|
664
|
-
if re.match(
|
|
665
|
-
rf'DETAIL: +Key +\("Execution", +"{target_table}", +"Feature_Name"\)=\(.*\) already exists',
|
|
666
|
-
e.message,
|
|
667
|
-
):
|
|
668
|
-
self._logger.info(
|
|
669
|
-
f"Skipping reload of feature values for {feature_table}"
|
|
670
|
-
)
|
|
671
|
-
else:
|
|
672
|
-
raise e
|
|
682
|
+
self._ml_object.domain_path.tables[feature_table].insert(
|
|
683
|
+
[map_path(e) for e in entities], on_conflict_skip=True
|
|
684
|
+
)
|
|
673
685
|
|
|
674
686
|
def _update_asset_execution_table(
|
|
675
687
|
self,
|
|
@@ -694,27 +706,17 @@ class Execution:
|
|
|
694
706
|
asset_exe = self._model.find_association(asset_table_name, "Execution")
|
|
695
707
|
asset_exe_path = pb.schemas[asset_exe.schema.name].tables[asset_exe.name]
|
|
696
708
|
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
except DataPathException as e:
|
|
709
|
-
if re.match(
|
|
710
|
-
rf'DETAIL: +Key +\("{asset_table_name}", +"Execution"\)=\(.*\) already exists',
|
|
711
|
-
e.message,
|
|
712
|
-
):
|
|
713
|
-
self._logger.info(
|
|
714
|
-
f"Skipping reload of execution assocations for {asset_table_name}"
|
|
715
|
-
)
|
|
716
|
-
else:
|
|
717
|
-
raise e
|
|
709
|
+
asset_exe_path.insert(
|
|
710
|
+
[
|
|
711
|
+
{
|
|
712
|
+
asset_table_name: asset_path.asset_rid,
|
|
713
|
+
"Execution": self.execution_rid,
|
|
714
|
+
"Asset_Role": asset_role,
|
|
715
|
+
}
|
|
716
|
+
for asset_path in asset_list
|
|
717
|
+
],
|
|
718
|
+
on_conflict_skip=True,
|
|
719
|
+
)
|
|
718
720
|
|
|
719
721
|
# Now add in the type names via the asset_asset_type association table.
|
|
720
722
|
# Get the list of types for each file in the asset.
|
|
@@ -740,24 +742,15 @@ class Execution:
|
|
|
740
742
|
type_path = pb.schemas[asset_asset_type.schema.name].tables[
|
|
741
743
|
asset_asset_type.name
|
|
742
744
|
]
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
if re.match(
|
|
753
|
-
rf'DETAIL: +Key +\("{asset_table_name}", +"Asset_Type"\)=\(.*\) already exists',
|
|
754
|
-
e.message,
|
|
755
|
-
):
|
|
756
|
-
self._logger.info(
|
|
757
|
-
f"Skipping reload of execution asset types for {asset_table_name}"
|
|
758
|
-
)
|
|
759
|
-
else:
|
|
760
|
-
raise e
|
|
745
|
+
|
|
746
|
+
type_path.insert(
|
|
747
|
+
[
|
|
748
|
+
{asset_table_name: asset.asset_rid, "Asset_Type": t}
|
|
749
|
+
for asset in asset_list
|
|
750
|
+
for t in asset_type_map[asset.file_name]
|
|
751
|
+
],
|
|
752
|
+
on_conflict_skip=True,
|
|
753
|
+
)
|
|
761
754
|
|
|
762
755
|
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
|
|
763
756
|
def asset_file_path(
|
|
@@ -325,7 +325,8 @@ class ExecutionConfiguration(BaseModel):
|
|
|
325
325
|
should be materialized.
|
|
326
326
|
assets: List of assets to be downloaded prior to execution. The values must be RIDs in an asset table
|
|
327
327
|
parameters: Either a dictionary or a path to a JSON file that contains configuration parameters for the execution.
|
|
328
|
-
workflow:
|
|
328
|
+
workflow: Either a Workflow object, or a RID for a workflow instance.
|
|
329
|
+
parameters: Either a dictionary or a path to a JSON file that contains configuration parameters for the execution.
|
|
329
330
|
description: A description of the execution. Can use Markdown format.
|
|
330
331
|
"""
|
|
331
332
|
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
deriva_ml/__init__.py,sha256=GfneBq7xDphMqUQY96sW9ixRj74M3UTUCmD4KMIRSaM,1101
|
|
2
2
|
deriva_ml/database_model.py,sha256=lMbAEqn4n0m7h_JstMX_LX9gbvBIEydG3sRilPn3eLU,14885
|
|
3
|
-
deriva_ml/dataset.py,sha256=
|
|
3
|
+
deriva_ml/dataset.py,sha256=W1TSHgkdXNw2v5hC0UBrivCKadMK1LaFd6YIjHE9jZA,60786
|
|
4
4
|
deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
|
|
5
5
|
deriva_ml/dataset_bag.py,sha256=yS8oYVshfFtRDyhGPRqtbvxjyd3ZFF29lrB783OP4vM,11849
|
|
6
6
|
deriva_ml/demo_catalog.py,sha256=9Qo3JD4bUIwnL3ngPctc2QBeWApvMR_5UyaK9ockTrY,11536
|
|
7
7
|
deriva_ml/deriva_definitions.py,sha256=avdOgxtB60yb8XsWm-AYtCdvg2QkQbyfkZuA9xx9t2U,9221
|
|
8
|
-
deriva_ml/deriva_ml_base.py,sha256=
|
|
8
|
+
deriva_ml/deriva_ml_base.py,sha256=FYSTQl4mNePC8IxC70rS5D0VmLNPccfFkkiVneDxJpY,38678
|
|
9
9
|
deriva_ml/deriva_model.py,sha256=wytGCAHutiUaRfnRKr80Ks_P6ci0_wXRU3vq3lthfYU,13260
|
|
10
|
-
deriva_ml/execution.py,sha256=
|
|
11
|
-
deriva_ml/execution_configuration.py,sha256=
|
|
10
|
+
deriva_ml/execution.py,sha256=otMkdjF15SEWg99mvWrTpnKz7-BWp9b8XbFf6iwfmtg,37697
|
|
11
|
+
deriva_ml/execution_configuration.py,sha256=7fiIbtzz9nmkxA9-GTiN6Ln2twfaOLivwJwGZb8gAL0,14163
|
|
12
12
|
deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
|
|
13
13
|
deriva_ml/feature.py,sha256=07g0uSrhumdopJluWuWSRMrzagaikAOihqB09bzXBP4,5475
|
|
14
14
|
deriva_ml/history.py,sha256=qTDLDs8Ow_6r7mDO0gZm0Fg81SWKOAgtCU5pzZoDRgM,2828
|
|
@@ -23,9 +23,9 @@ deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjt
|
|
|
23
23
|
deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
|
|
24
24
|
deriva_ml/test-files/execution-parameters.json,sha256=1vBqXlaMa0cysonE20TweVDfTGRdSi9CUuAkW1xiYNo,36
|
|
25
25
|
deriva_ml/test-files/notebook-parameters.json,sha256=7uEE2sLQSrSc9cEGQ_RKE7t5dwkEYv0qLo5mRbzo8Og,108
|
|
26
|
-
deriva_ml-1.13.
|
|
27
|
-
deriva_ml-1.13.
|
|
28
|
-
deriva_ml-1.13.
|
|
29
|
-
deriva_ml-1.13.
|
|
30
|
-
deriva_ml-1.13.
|
|
31
|
-
deriva_ml-1.13.
|
|
26
|
+
deriva_ml-1.13.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
27
|
+
deriva_ml-1.13.2.dist-info/METADATA,sha256=uuvCztFgxOwWM34egjr65pW8-2pYGCtV_xofT5TmcLg,999
|
|
28
|
+
deriva_ml-1.13.2.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
|
29
|
+
deriva_ml-1.13.2.dist-info/entry_points.txt,sha256=cJnALMa6pjdk6RQCt4HFbKHqALpVa0k6wPeQDPedLJI,295
|
|
30
|
+
deriva_ml-1.13.2.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
|
|
31
|
+
deriva_ml-1.13.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|