mlrun 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +39 -121
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +39 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +73 -46
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +73 -1
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +46 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +44 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +11 -1
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +31 -4
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +28 -1
- mlrun/common/schemas/auth.py +13 -2
- mlrun/common/schemas/client_spec.py +2 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +58 -28
- mlrun/common/schemas/frontend_spec.py +8 -0
- mlrun/common/schemas/function.py +11 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +21 -4
- mlrun/common/schemas/model_monitoring/constants.py +136 -42
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
- mlrun/common/schemas/notification.py +69 -12
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +7 -0
- mlrun/common/schemas/project.py +67 -16
- mlrun/common/schemas/runs.py +17 -0
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +14 -1
- mlrun/config.py +233 -58
- mlrun/data_types/data_types.py +11 -1
- mlrun/data_types/spark.py +5 -4
- mlrun/data_types/to_pandas.py +75 -34
- mlrun/datastore/__init__.py +8 -10
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +131 -43
- mlrun/datastore/base.py +107 -47
- mlrun/datastore/datastore.py +17 -7
- mlrun/datastore/datastore_profile.py +91 -7
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +92 -32
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +3 -2
- mlrun/datastore/s3.py +30 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +274 -59
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +387 -119
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +28 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +245 -20
- mlrun/db/factory.py +1 -4
- mlrun/db/httpdb.py +909 -231
- mlrun/db/nopdb.py +279 -14
- mlrun/errors.py +35 -5
- mlrun/execution.py +111 -38
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +46 -53
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +13 -2
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +38 -19
- mlrun/features.py +6 -14
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +4 -4
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +57 -12
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +15 -5
- mlrun/launcher/remote.py +10 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +297 -48
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +152 -357
- mlrun/model_monitoring/applications/__init__.py +10 -0
- mlrun/model_monitoring/applications/_application_steps.py +190 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +130 -303
- mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +177 -39
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +165 -398
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +67 -228
- mlrun/projects/__init__.py +6 -1
- mlrun/projects/operations.py +47 -20
- mlrun/projects/pipelines.py +396 -249
- mlrun/projects/project.py +1176 -406
- mlrun/render.py +28 -22
- mlrun/run.py +208 -181
- mlrun/runtimes/__init__.py +76 -11
- mlrun/runtimes/base.py +54 -24
- mlrun/runtimes/daskjob.py +9 -2
- mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +39 -10
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +188 -68
- mlrun/runtimes/nuclio/serving.py +57 -60
- mlrun/runtimes/pod.py +191 -58
- mlrun/runtimes/remotesparkjob.py +11 -8
- mlrun/runtimes/sparkjob/spark3job.py +17 -18
- mlrun/runtimes/utils.py +40 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +89 -64
- mlrun/serving/server.py +54 -26
- mlrun/serving/states.py +187 -56
- mlrun/serving/utils.py +19 -11
- mlrun/serving/v2_serving.py +136 -63
- mlrun/track/tracker.py +2 -1
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +26 -6
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +375 -105
- mlrun/utils/http.py +2 -2
- mlrun/utils/logger.py +75 -9
- mlrun/utils/notifications/notification/__init__.py +14 -10
- mlrun/utils/notifications/notification/base.py +48 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +63 -2
- mlrun/utils/notifications/notification_pusher.py +146 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +2 -3
- mlrun/utils/version/version.json +2 -2
- mlrun-1.7.2.dist-info/METADATA +390 -0
- mlrun-1.7.2.dist-info/RECORD +351 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/prometheus.py +0 -216
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc4.dist-info/METADATA +0 -269
- mlrun-1.7.0rc4.dist-info/RECORD +0 -321
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
mlrun/artifacts/manager.py
CHANGED
|
@@ -12,11 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import pathlib
|
|
15
|
+
import re
|
|
15
16
|
import typing
|
|
16
17
|
from os.path import exists, isdir
|
|
17
18
|
from urllib.parse import urlparse
|
|
18
19
|
|
|
20
|
+
import mlrun.common.schemas.artifact
|
|
19
21
|
import mlrun.config
|
|
22
|
+
import mlrun.utils.regex
|
|
20
23
|
from mlrun.utils.helpers import (
|
|
21
24
|
get_local_file_schema,
|
|
22
25
|
template_artifact_path,
|
|
@@ -24,7 +27,6 @@ from mlrun.utils.helpers import (
|
|
|
24
27
|
)
|
|
25
28
|
|
|
26
29
|
from ..utils import (
|
|
27
|
-
is_legacy_artifact,
|
|
28
30
|
is_relative_path,
|
|
29
31
|
logger,
|
|
30
32
|
validate_artifact_key_name,
|
|
@@ -33,56 +35,28 @@ from ..utils import (
|
|
|
33
35
|
from .base import (
|
|
34
36
|
Artifact,
|
|
35
37
|
DirArtifact,
|
|
36
|
-
LegacyArtifact,
|
|
37
|
-
LegacyDirArtifact,
|
|
38
|
-
LegacyLinkArtifact,
|
|
39
38
|
LinkArtifact,
|
|
40
39
|
)
|
|
41
40
|
from .dataset import (
|
|
42
41
|
DatasetArtifact,
|
|
43
|
-
LegacyDatasetArtifact,
|
|
44
|
-
LegacyTableArtifact,
|
|
45
42
|
TableArtifact,
|
|
46
43
|
)
|
|
47
|
-
from .model import
|
|
44
|
+
from .model import ModelArtifact
|
|
48
45
|
from .plots import (
|
|
49
|
-
BokehArtifact,
|
|
50
|
-
ChartArtifact,
|
|
51
|
-
LegacyBokehArtifact,
|
|
52
|
-
LegacyChartArtifact,
|
|
53
|
-
LegacyPlotArtifact,
|
|
54
|
-
LegacyPlotlyArtifact,
|
|
55
46
|
PlotArtifact,
|
|
56
47
|
PlotlyArtifact,
|
|
57
48
|
)
|
|
58
49
|
|
|
59
|
-
# TODO - Remove deprecated types when deleted in 1.7.0
|
|
60
50
|
artifact_types = {
|
|
61
51
|
"": Artifact,
|
|
62
52
|
"artifact": Artifact,
|
|
63
53
|
"dir": DirArtifact,
|
|
64
54
|
"link": LinkArtifact,
|
|
65
55
|
"plot": PlotArtifact,
|
|
66
|
-
"chart": ChartArtifact,
|
|
67
56
|
"table": TableArtifact,
|
|
68
57
|
"model": ModelArtifact,
|
|
69
58
|
"dataset": DatasetArtifact,
|
|
70
59
|
"plotly": PlotlyArtifact,
|
|
71
|
-
"bokeh": BokehArtifact,
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
# TODO - Remove this when legacy types are deleted in 1.7.0
|
|
75
|
-
legacy_artifact_types = {
|
|
76
|
-
"": LegacyArtifact,
|
|
77
|
-
"dir": LegacyDirArtifact,
|
|
78
|
-
"link": LegacyLinkArtifact,
|
|
79
|
-
"plot": LegacyPlotArtifact,
|
|
80
|
-
"chart": LegacyChartArtifact,
|
|
81
|
-
"table": LegacyTableArtifact,
|
|
82
|
-
"model": LegacyModelArtifact,
|
|
83
|
-
"dataset": LegacyDatasetArtifact,
|
|
84
|
-
"plotly": LegacyPlotlyArtifact,
|
|
85
|
-
"bokeh": LegacyBokehArtifact,
|
|
86
60
|
}
|
|
87
61
|
|
|
88
62
|
|
|
@@ -98,19 +72,45 @@ class ArtifactProducer:
|
|
|
98
72
|
self.inputs = {}
|
|
99
73
|
|
|
100
74
|
def get_meta(self) -> dict:
|
|
101
|
-
return {
|
|
75
|
+
return {
|
|
76
|
+
"kind": self.kind,
|
|
77
|
+
"name": self.name,
|
|
78
|
+
"tag": self.tag,
|
|
79
|
+
"owner": self.owner,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def uid(self):
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def parse_uri(uri: str) -> tuple[str, str, str]:
|
|
88
|
+
"""Parse artifact producer's uri
|
|
89
|
+
|
|
90
|
+
:param uri: artifact producer's uri in the format <project>/<uid>[-<iteration>]
|
|
91
|
+
:returns: tuple of project, uid, iteration
|
|
92
|
+
"""
|
|
93
|
+
uri_pattern = mlrun.utils.regex.artifact_producer_uri_pattern
|
|
94
|
+
match = re.match(uri_pattern, uri)
|
|
95
|
+
if not match:
|
|
96
|
+
return "", "", ""
|
|
97
|
+
group_dict = match.groupdict()
|
|
98
|
+
|
|
99
|
+
return (
|
|
100
|
+
group_dict["project"] or "",
|
|
101
|
+
group_dict["uid"] or "",
|
|
102
|
+
group_dict["iteration"] or "",
|
|
103
|
+
)
|
|
102
104
|
|
|
103
105
|
|
|
104
106
|
def dict_to_artifact(struct: dict) -> Artifact:
|
|
105
|
-
# Need to distinguish between LegacyArtifact classes and Artifact classes. Use existence of the "metadata"
|
|
106
|
-
# property to make this distinction
|
|
107
107
|
kind = struct.get("kind", "")
|
|
108
108
|
|
|
109
|
-
|
|
109
|
+
# TODO: remove this in 1.8.0
|
|
110
|
+
if mlrun.utils.is_legacy_artifact(struct):
|
|
110
111
|
return mlrun.artifacts.base.convert_legacy_artifact_to_new_format(struct)
|
|
111
112
|
|
|
112
113
|
artifact_class = artifact_types[kind]
|
|
113
|
-
|
|
114
114
|
return artifact_class.from_dict(struct)
|
|
115
115
|
|
|
116
116
|
|
|
@@ -180,11 +180,13 @@ class ArtifactManager:
|
|
|
180
180
|
upload=None,
|
|
181
181
|
labels=None,
|
|
182
182
|
db_key=None,
|
|
183
|
+
project=None,
|
|
184
|
+
is_retained_producer=None,
|
|
183
185
|
**kwargs,
|
|
184
186
|
) -> Artifact:
|
|
185
187
|
"""
|
|
186
188
|
Log an artifact to the DB and upload it to the artifact store.
|
|
187
|
-
:param producer: The producer of the artifact, the producer depends
|
|
189
|
+
:param producer: The producer of the artifact, the producer depends on where the artifact is being logged.
|
|
188
190
|
:param item: The artifact to log.
|
|
189
191
|
:param body: The body of the artifact.
|
|
190
192
|
:param target_path: The target path of the artifact. (cannot be a relative path)
|
|
@@ -198,10 +200,15 @@ class ArtifactManager:
|
|
|
198
200
|
:param artifact_path: The path to store the artifact.
|
|
199
201
|
If not provided, the artifact will be stored in the default artifact path.
|
|
200
202
|
:param format: The format of the artifact. (e.g. csv, json, html, etc.)
|
|
201
|
-
:param upload: Whether to upload the artifact
|
|
203
|
+
:param upload: Whether to upload the artifact to the datastore. If not provided, and the
|
|
204
|
+
`local_path` is not a directory, upload occurs by default. Directories are uploaded only when this
|
|
205
|
+
flag is explicitly set to `True`.
|
|
202
206
|
:param labels: Labels to add to the artifact.
|
|
203
207
|
:param db_key: The key to use when logging the artifact to the DB.
|
|
204
208
|
If not provided, will generate a key based on the producer name and the artifact key.
|
|
209
|
+
:param project: The project to log the artifact to. If not provided, will use the producer's project.
|
|
210
|
+
:param is_retained_producer: Whether the producer is retained or not. Relevant to register artifacts flow
|
|
211
|
+
where a project may log artifacts which were produced by another producer.
|
|
205
212
|
:param kwargs: Arguments to pass to the artifact class.
|
|
206
213
|
:return: The logged artifact.
|
|
207
214
|
"""
|
|
@@ -226,7 +233,7 @@ class ArtifactManager:
|
|
|
226
233
|
|
|
227
234
|
if db_key is None:
|
|
228
235
|
# set the default artifact db key
|
|
229
|
-
if producer.kind == "run":
|
|
236
|
+
if producer.kind == "run" and not is_retained_producer:
|
|
230
237
|
# When the producer's type is "run,"
|
|
231
238
|
# we generate a different db_key than the one we obtained in the request.
|
|
232
239
|
# As a result, a new artifact for the requested key will be created,
|
|
@@ -251,8 +258,11 @@ class ArtifactManager:
|
|
|
251
258
|
item.labels.update({"workflow-id": item.producer.get("workflow")})
|
|
252
259
|
|
|
253
260
|
item.iter = producer.iteration
|
|
254
|
-
project = producer.project
|
|
261
|
+
project = project or producer.project
|
|
255
262
|
item.project = project
|
|
263
|
+
if is_retained_producer:
|
|
264
|
+
# if the producer is retained, we want to use the original target path
|
|
265
|
+
target_path = target_path or item.target_path
|
|
256
266
|
|
|
257
267
|
# if target_path is provided and not relative, then no need to upload the artifact as it already exists
|
|
258
268
|
if target_path:
|
|
@@ -260,7 +270,8 @@ class ArtifactManager:
|
|
|
260
270
|
raise ValueError(
|
|
261
271
|
f"target_path ({target_path}) param cannot be relative"
|
|
262
272
|
)
|
|
263
|
-
upload
|
|
273
|
+
if upload is None:
|
|
274
|
+
upload = False
|
|
264
275
|
|
|
265
276
|
# if target_path wasn't provided, but src_path is not relative, then no need to upload the artifact as it
|
|
266
277
|
# already exists. In this case set the target_path to the src_path and set upload to False
|
|
@@ -287,7 +298,9 @@ class ArtifactManager:
|
|
|
287
298
|
|
|
288
299
|
if target_path and item.is_dir and not target_path.endswith("/"):
|
|
289
300
|
target_path += "/"
|
|
290
|
-
target_path = template_artifact_path(
|
|
301
|
+
target_path = template_artifact_path(
|
|
302
|
+
artifact_path=target_path, project=producer.project, run_uid=producer.uid
|
|
303
|
+
)
|
|
291
304
|
item.target_path = target_path
|
|
292
305
|
|
|
293
306
|
item.before_log()
|
|
@@ -297,13 +310,10 @@ class ArtifactManager:
|
|
|
297
310
|
# before uploading the item, we want to ensure that its tags are valid,
|
|
298
311
|
# so that we don't upload something that won't be stored later
|
|
299
312
|
validate_tag_name(item.metadata.tag, "artifact.metadata.tag")
|
|
300
|
-
|
|
301
|
-
item.upload()
|
|
302
|
-
else:
|
|
303
|
-
item.upload(artifact_path=artifact_path)
|
|
313
|
+
item.upload(artifact_path=artifact_path)
|
|
304
314
|
|
|
305
315
|
if db_key:
|
|
306
|
-
self._log_to_db(db_key,
|
|
316
|
+
self._log_to_db(db_key, project, producer.inputs, item)
|
|
307
317
|
size = str(item.size) or "?"
|
|
308
318
|
db_str = "Y" if (self.artifact_db and db_key) else "N"
|
|
309
319
|
logger.debug(
|
|
@@ -371,6 +381,23 @@ class ArtifactManager:
|
|
|
371
381
|
project=project,
|
|
372
382
|
)
|
|
373
383
|
|
|
384
|
+
def delete_artifact(
|
|
385
|
+
self,
|
|
386
|
+
item: Artifact,
|
|
387
|
+
deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
|
|
388
|
+
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
389
|
+
),
|
|
390
|
+
secrets: dict = None,
|
|
391
|
+
):
|
|
392
|
+
self.artifact_db.del_artifact(
|
|
393
|
+
key=item.db_key,
|
|
394
|
+
project=item.project,
|
|
395
|
+
tag=item.tag,
|
|
396
|
+
tree=item.tree,
|
|
397
|
+
deletion_strategy=deletion_strategy,
|
|
398
|
+
secrets=secrets,
|
|
399
|
+
)
|
|
400
|
+
|
|
374
401
|
|
|
375
402
|
def extend_artifact_path(artifact_path: str, default_artifact_path: str):
|
|
376
403
|
artifact_path = str(artifact_path or "")
|
mlrun/artifacts/model.py
CHANGED
|
@@ -11,13 +11,14 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
14
15
|
import tempfile
|
|
16
|
+
import warnings
|
|
15
17
|
from os import path
|
|
16
|
-
from typing import Any
|
|
18
|
+
from typing import Any, Optional
|
|
17
19
|
|
|
18
20
|
import pandas as pd
|
|
19
21
|
import yaml
|
|
20
|
-
from deprecated import deprecated
|
|
21
22
|
|
|
22
23
|
import mlrun
|
|
23
24
|
import mlrun.datastore
|
|
@@ -26,7 +27,7 @@ from ..data_types import InferOptions, get_infer_interface
|
|
|
26
27
|
from ..features import Feature
|
|
27
28
|
from ..model import ObjectList
|
|
28
29
|
from ..utils import StorePrefix, is_relative_path
|
|
29
|
-
from .base import Artifact, ArtifactSpec,
|
|
30
|
+
from .base import Artifact, ArtifactSpec, upload_extra_data
|
|
30
31
|
|
|
31
32
|
model_spec_filename = "model_spec.yaml"
|
|
32
33
|
|
|
@@ -69,8 +70,8 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
69
70
|
model_file=None,
|
|
70
71
|
metrics=None,
|
|
71
72
|
paraemeters=None,
|
|
72
|
-
inputs: list[Feature] = None,
|
|
73
|
-
outputs: list[Feature] = None,
|
|
73
|
+
inputs: Optional[list[Feature]] = None,
|
|
74
|
+
outputs: Optional[list[Feature]] = None,
|
|
74
75
|
framework=None,
|
|
75
76
|
algorithm=None,
|
|
76
77
|
feature_vector=None,
|
|
@@ -92,8 +93,8 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
92
93
|
self.model_file = model_file
|
|
93
94
|
self.metrics = metrics or {}
|
|
94
95
|
self.parameters = paraemeters or {}
|
|
95
|
-
self.inputs
|
|
96
|
-
self.outputs
|
|
96
|
+
self.inputs = inputs or []
|
|
97
|
+
self.outputs = outputs or []
|
|
97
98
|
self.framework = framework
|
|
98
99
|
self.algorithm = algorithm
|
|
99
100
|
self.feature_vector = feature_vector
|
|
@@ -102,21 +103,21 @@ class ModelArtifactSpec(ArtifactSpec):
|
|
|
102
103
|
self.model_target_file = model_target_file
|
|
103
104
|
|
|
104
105
|
@property
|
|
105
|
-
def inputs(self) ->
|
|
106
|
+
def inputs(self) -> ObjectList:
|
|
106
107
|
"""input feature list"""
|
|
107
108
|
return self._inputs
|
|
108
109
|
|
|
109
110
|
@inputs.setter
|
|
110
|
-
def inputs(self, inputs: list[Feature]):
|
|
111
|
+
def inputs(self, inputs: list[Feature]) -> None:
|
|
111
112
|
self._inputs = ObjectList.from_list(Feature, inputs)
|
|
112
113
|
|
|
113
114
|
@property
|
|
114
|
-
def outputs(self) ->
|
|
115
|
+
def outputs(self) -> ObjectList:
|
|
115
116
|
"""output feature list"""
|
|
116
117
|
return self._outputs
|
|
117
118
|
|
|
118
119
|
@outputs.setter
|
|
119
|
-
def outputs(self, outputs: list[Feature]):
|
|
120
|
+
def outputs(self, outputs: list[Feature]) -> None:
|
|
120
121
|
self._outputs = ObjectList.from_list(Feature, outputs)
|
|
121
122
|
|
|
122
123
|
|
|
@@ -148,6 +149,12 @@ class ModelArtifact(Artifact):
|
|
|
148
149
|
model_dir=None,
|
|
149
150
|
**kwargs,
|
|
150
151
|
):
|
|
152
|
+
if key or body or format or target_path:
|
|
153
|
+
warnings.warn(
|
|
154
|
+
"Artifact constructor parameters are deprecated and will be removed in 1.9.0. "
|
|
155
|
+
"Use the metadata and spec parameters instead.",
|
|
156
|
+
DeprecationWarning,
|
|
157
|
+
)
|
|
151
158
|
super().__init__(key, body, format=format, target_path=target_path, **kwargs)
|
|
152
159
|
model_file = str(model_file or "")
|
|
153
160
|
if model_file and "/" in model_file:
|
|
@@ -176,22 +183,22 @@ class ModelArtifact(Artifact):
|
|
|
176
183
|
self._spec = self._verify_dict(spec, "spec", ModelArtifactSpec)
|
|
177
184
|
|
|
178
185
|
@property
|
|
179
|
-
def inputs(self) ->
|
|
186
|
+
def inputs(self) -> ObjectList:
|
|
180
187
|
"""input feature list"""
|
|
181
188
|
return self.spec.inputs
|
|
182
189
|
|
|
183
190
|
@inputs.setter
|
|
184
|
-
def inputs(self, inputs: list[Feature]):
|
|
191
|
+
def inputs(self, inputs: list[Feature]) -> None:
|
|
185
192
|
"""input feature list"""
|
|
186
193
|
self.spec.inputs = inputs
|
|
187
194
|
|
|
188
195
|
@property
|
|
189
|
-
def outputs(self) ->
|
|
196
|
+
def outputs(self) -> ObjectList:
|
|
190
197
|
"""input feature list"""
|
|
191
198
|
return self.spec.outputs
|
|
192
199
|
|
|
193
200
|
@outputs.setter
|
|
194
|
-
def outputs(self, outputs: list[Feature]):
|
|
201
|
+
def outputs(self, outputs: list[Feature]) -> None:
|
|
195
202
|
"""input feature list"""
|
|
196
203
|
self.spec.outputs = outputs
|
|
197
204
|
|
|
@@ -396,144 +403,6 @@ class ModelArtifact(Artifact):
|
|
|
396
403
|
return mlrun.get_dataitem(target_model_path).get()
|
|
397
404
|
|
|
398
405
|
|
|
399
|
-
# TODO: remove in 1.7.0
|
|
400
|
-
@deprecated(
|
|
401
|
-
version="1.3.0",
|
|
402
|
-
reason="'LegacyModelArtifact' will be removed in 1.7.0, use 'ModelArtifact' instead",
|
|
403
|
-
category=FutureWarning,
|
|
404
|
-
)
|
|
405
|
-
class LegacyModelArtifact(LegacyArtifact):
|
|
406
|
-
"""ML Model artifact
|
|
407
|
-
|
|
408
|
-
Store link to ML model file(s) along with the model metrics, parameters, schema, and stats
|
|
409
|
-
"""
|
|
410
|
-
|
|
411
|
-
_dict_fields = LegacyArtifact._dict_fields + [
|
|
412
|
-
"model_file",
|
|
413
|
-
"metrics",
|
|
414
|
-
"parameters",
|
|
415
|
-
"inputs",
|
|
416
|
-
"outputs",
|
|
417
|
-
"framework",
|
|
418
|
-
"algorithm",
|
|
419
|
-
"extra_data",
|
|
420
|
-
"feature_vector",
|
|
421
|
-
"feature_weights",
|
|
422
|
-
"feature_stats",
|
|
423
|
-
"model_target_file",
|
|
424
|
-
]
|
|
425
|
-
kind = "model"
|
|
426
|
-
_store_prefix = StorePrefix.Model
|
|
427
|
-
|
|
428
|
-
def __init__(
|
|
429
|
-
self,
|
|
430
|
-
key=None,
|
|
431
|
-
body=None,
|
|
432
|
-
format=None,
|
|
433
|
-
model_file=None,
|
|
434
|
-
metrics=None,
|
|
435
|
-
target_path=None,
|
|
436
|
-
parameters=None,
|
|
437
|
-
inputs=None,
|
|
438
|
-
outputs=None,
|
|
439
|
-
framework=None,
|
|
440
|
-
algorithm=None,
|
|
441
|
-
feature_vector=None,
|
|
442
|
-
feature_weights=None,
|
|
443
|
-
extra_data=None,
|
|
444
|
-
model_target_file=None,
|
|
445
|
-
**kwargs,
|
|
446
|
-
):
|
|
447
|
-
super().__init__(key, body, format=format, target_path=target_path, **kwargs)
|
|
448
|
-
self._inputs: ObjectList = None
|
|
449
|
-
self._outputs: ObjectList = None
|
|
450
|
-
|
|
451
|
-
self.model_file = model_file
|
|
452
|
-
self.parameters = parameters or {}
|
|
453
|
-
self.metrics = metrics or {}
|
|
454
|
-
self.inputs: list[Feature] = inputs or []
|
|
455
|
-
self.outputs: list[Feature] = outputs or []
|
|
456
|
-
self.extra_data = extra_data or {}
|
|
457
|
-
self.framework = framework
|
|
458
|
-
self.algorithm = algorithm
|
|
459
|
-
self.feature_vector = feature_vector
|
|
460
|
-
self.feature_weights = feature_weights
|
|
461
|
-
self.feature_stats = None
|
|
462
|
-
self.model_target_file = model_target_file
|
|
463
|
-
|
|
464
|
-
@property
|
|
465
|
-
def inputs(self) -> list[Feature]:
|
|
466
|
-
"""input feature list"""
|
|
467
|
-
return self._inputs
|
|
468
|
-
|
|
469
|
-
@inputs.setter
|
|
470
|
-
def inputs(self, inputs: list[Feature]):
|
|
471
|
-
self._inputs = ObjectList.from_list(Feature, inputs)
|
|
472
|
-
|
|
473
|
-
@property
|
|
474
|
-
def outputs(self) -> list[Feature]:
|
|
475
|
-
"""output feature list"""
|
|
476
|
-
return self._outputs
|
|
477
|
-
|
|
478
|
-
@outputs.setter
|
|
479
|
-
def outputs(self, outputs: list[Feature]):
|
|
480
|
-
self._outputs = ObjectList.from_list(Feature, outputs)
|
|
481
|
-
|
|
482
|
-
def infer_from_df(self, df, label_columns=None, with_stats=True, num_bins=None):
|
|
483
|
-
"""infer inputs, outputs, and stats from provided df (training set)
|
|
484
|
-
|
|
485
|
-
:param df: dataframe to infer from
|
|
486
|
-
:param label_columns: name of the label (target) column
|
|
487
|
-
:param with_stats: infer statistics (min, max, .. histogram)
|
|
488
|
-
:param num_bins: number of bins for histogram
|
|
489
|
-
"""
|
|
490
|
-
subset = df
|
|
491
|
-
inferer = get_infer_interface(subset)
|
|
492
|
-
if label_columns:
|
|
493
|
-
if not isinstance(label_columns, list):
|
|
494
|
-
label_columns = [label_columns]
|
|
495
|
-
subset = df.drop(columns=label_columns)
|
|
496
|
-
inferer.infer_schema(subset, self.inputs, {}, options=InferOptions.Features)
|
|
497
|
-
if label_columns:
|
|
498
|
-
inferer.infer_schema(
|
|
499
|
-
df[label_columns], self.outputs, {}, options=InferOptions.Features
|
|
500
|
-
)
|
|
501
|
-
if with_stats:
|
|
502
|
-
self.feature_stats = inferer.get_stats(
|
|
503
|
-
df, options=InferOptions.Histogram, num_bins=num_bins
|
|
504
|
-
)
|
|
505
|
-
|
|
506
|
-
@property
|
|
507
|
-
def is_dir(self):
|
|
508
|
-
return True
|
|
509
|
-
|
|
510
|
-
def before_log(self):
|
|
511
|
-
if not self.model_file:
|
|
512
|
-
raise ValueError("model_file attr must be specified")
|
|
513
|
-
|
|
514
|
-
super().before_log()
|
|
515
|
-
|
|
516
|
-
if self.framework:
|
|
517
|
-
self.labels = self.labels or {}
|
|
518
|
-
self.labels["framework"] = self.framework
|
|
519
|
-
|
|
520
|
-
def upload(self):
|
|
521
|
-
target_model_path = path.join(self.target_path, self.model_file)
|
|
522
|
-
body = self.get_body()
|
|
523
|
-
if body:
|
|
524
|
-
self._upload_body(body, target=target_model_path)
|
|
525
|
-
else:
|
|
526
|
-
src_model_path = _get_src_path(self, self.model_file)
|
|
527
|
-
if not path.isfile(src_model_path):
|
|
528
|
-
raise ValueError(f"model file {src_model_path} not found")
|
|
529
|
-
self._upload_file(src_model_path, target=target_model_path)
|
|
530
|
-
|
|
531
|
-
upload_extra_data(self, self.extra_data)
|
|
532
|
-
|
|
533
|
-
spec_path = path.join(self.target_path, model_spec_filename)
|
|
534
|
-
mlrun.datastore.store_manager.object(url=spec_path).put(self.to_yaml())
|
|
535
|
-
|
|
536
|
-
|
|
537
406
|
def _get_src_path(model_spec: ModelArtifact, filename):
|
|
538
407
|
if model_spec.src_path:
|
|
539
408
|
return path.join(model_spec.src_path, filename)
|
|
@@ -552,9 +421,9 @@ def get_model(model_dir, suffix=""):
|
|
|
552
421
|
|
|
553
422
|
example::
|
|
554
423
|
|
|
555
|
-
model_file, model_artifact, extra_data = get_model(models_path, suffix=
|
|
424
|
+
model_file, model_artifact, extra_data = get_model(models_path, suffix=".pkl")
|
|
556
425
|
model = load(open(model_file, "rb"))
|
|
557
|
-
categories = extra_data[
|
|
426
|
+
categories = extra_data["categories"].as_df()
|
|
558
427
|
|
|
559
428
|
:param model_dir: model dir or artifact path (store://..) or DataItem
|
|
560
429
|
:param suffix: model filename suffix (when using a dir)
|
|
@@ -640,7 +509,7 @@ def _get_extra(target, extra_data, is_dir=False):
|
|
|
640
509
|
def _remove_tag_from_spec_yaml(model_spec):
|
|
641
510
|
spec_dict = model_spec.to_dict()
|
|
642
511
|
spec_dict["metadata"].pop("tag", None)
|
|
643
|
-
return yaml.
|
|
512
|
+
return yaml.safe_dump(spec_dict)
|
|
644
513
|
|
|
645
514
|
|
|
646
515
|
def update_model(
|
|
@@ -663,8 +532,11 @@ def update_model(
|
|
|
663
532
|
|
|
664
533
|
example::
|
|
665
534
|
|
|
666
|
-
update_model(
|
|
667
|
-
|
|
535
|
+
update_model(
|
|
536
|
+
model_path,
|
|
537
|
+
metrics={"speed": 100},
|
|
538
|
+
extra_data={"my_data": b"some text", "file": "s3://mybucket/.."},
|
|
539
|
+
)
|
|
668
540
|
|
|
669
541
|
:param model_artifact: model artifact object or path (store://..) or DataItem
|
|
670
542
|
:param parameters: parameters dict
|