mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +23 -111
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +36 -253
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +46 -42
- mlrun/artifacts/model.py +9 -141
- mlrun/artifacts/plots.py +14 -375
- mlrun/common/constants.py +65 -3
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +10 -5
- mlrun/common/schemas/alert.py +92 -11
- mlrun/common/schemas/api_gateway.py +56 -0
- mlrun/common/schemas/artifact.py +15 -5
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/model_monitoring/__init__.py +15 -3
- mlrun/common/schemas/model_monitoring/constants.py +58 -7
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
- mlrun/common/schemas/pipeline.py +0 -9
- mlrun/common/schemas/project.py +5 -11
- mlrun/common/types.py +1 -0
- mlrun/config.py +27 -9
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore.py +6 -2
- mlrun/datastore/datastore_profile.py +56 -4
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/sources.py +147 -7
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +110 -42
- mlrun/datastore/utils.py +42 -0
- mlrun/db/base.py +54 -10
- mlrun/db/httpdb.py +282 -79
- mlrun/db/nopdb.py +52 -10
- mlrun/errors.py +11 -0
- mlrun/execution.py +24 -9
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/feature_vector.py +8 -0
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +9 -9
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +16 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/tf_keras/__init__.py +4 -1
- mlrun/k8s_utils.py +10 -11
- mlrun/launcher/base.py +4 -3
- mlrun/launcher/client.py +5 -3
- mlrun/launcher/local.py +8 -2
- mlrun/launcher/remote.py +8 -2
- mlrun/lists.py +6 -2
- mlrun/model.py +45 -21
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +280 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/__init__.py +0 -2
- mlrun/model_monitoring/db/stores/base/store.py +22 -37
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +636 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +46 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +57 -216
- mlrun/model_monitoring/writer.py +134 -124
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +19 -12
- mlrun/projects/pipelines.py +79 -102
- mlrun/projects/project.py +265 -103
- mlrun/render.py +15 -14
- mlrun/run.py +16 -46
- mlrun/runtimes/__init__.py +6 -3
- mlrun/runtimes/base.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +194 -84
- mlrun/runtimes/nuclio/application/application.py +170 -8
- mlrun/runtimes/nuclio/function.py +39 -49
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +9 -3
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +6 -45
- mlrun/serving/server.py +2 -1
- mlrun/serving/v2_serving.py +5 -1
- mlrun/track/tracker.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +107 -75
- mlrun/utils/logger.py +39 -7
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +34 -7
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +147 -16
- mlrun/utils/regex.py +9 -0
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/METADATA +14 -6
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/RECORD +150 -130
- mlrun/kfpops.py +0 -865
- mlrun/platforms/other.py +0 -305
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc21.dist-info}/top_level.txt +0 -0
|
@@ -12,11 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
+
|
|
15
16
|
import pandas as pd
|
|
16
17
|
import semver
|
|
17
18
|
|
|
18
19
|
import mlrun
|
|
20
|
+
from mlrun.datastore.sources import ParquetSource
|
|
19
21
|
from mlrun.datastore.targets import get_offline_target
|
|
22
|
+
from mlrun.utils.helpers import additional_filters_warning
|
|
20
23
|
|
|
21
24
|
from ...runtimes import RemoteSparkRuntime
|
|
22
25
|
from ...runtimes.sparkjob import Spark3Runtime
|
|
@@ -225,7 +228,12 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
225
228
|
start_time=None,
|
|
226
229
|
end_time=None,
|
|
227
230
|
time_column=None,
|
|
231
|
+
additional_filters=None,
|
|
228
232
|
):
|
|
233
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
234
|
+
additional_filters, self.__class__
|
|
235
|
+
)
|
|
236
|
+
|
|
229
237
|
source_kwargs = {}
|
|
230
238
|
if feature_set.spec.passthrough:
|
|
231
239
|
if not feature_set.spec.source:
|
|
@@ -235,6 +243,7 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
235
243
|
source_kind = feature_set.spec.source.kind
|
|
236
244
|
source_path = feature_set.spec.source.path
|
|
237
245
|
source_kwargs.update(feature_set.spec.source.attributes)
|
|
246
|
+
source_kwargs.pop("additional_filters", None)
|
|
238
247
|
else:
|
|
239
248
|
target = get_offline_target(feature_set)
|
|
240
249
|
if not target:
|
|
@@ -248,12 +257,19 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
248
257
|
# entity_timestamp_column is from a specific feature set (can't be entity timestamp)
|
|
249
258
|
source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
|
|
250
259
|
|
|
260
|
+
if source_driver != ParquetSource:
|
|
261
|
+
additional_filters_warning(additional_filters, source_driver)
|
|
262
|
+
additional_filters = None
|
|
263
|
+
additional_filters_dict = (
|
|
264
|
+
{"additional_filters": additional_filters} if additional_filters else {}
|
|
265
|
+
)
|
|
251
266
|
source = source_driver(
|
|
252
267
|
name=self.vector.metadata.name,
|
|
253
268
|
path=source_path,
|
|
254
269
|
time_field=time_column,
|
|
255
270
|
start_time=start_time,
|
|
256
271
|
end_time=end_time,
|
|
272
|
+
**additional_filters_dict,
|
|
257
273
|
**source_kwargs,
|
|
258
274
|
)
|
|
259
275
|
|
|
@@ -547,9 +547,9 @@ class TensorboardLogger(Logger, Generic[DLTypes.WeightType]):
|
|
|
547
547
|
"inputs",
|
|
548
548
|
"parameters",
|
|
549
549
|
]:
|
|
550
|
-
text +=
|
|
551
|
-
property_name.capitalize()
|
|
552
|
-
self._markdown_print(value=property_value, tabs=2)
|
|
550
|
+
text += (
|
|
551
|
+
f"\n * **{property_name.capitalize()}**: "
|
|
552
|
+
f"{self._markdown_print(value=property_value, tabs=2)}"
|
|
553
553
|
)
|
|
554
554
|
else:
|
|
555
555
|
for property_name, property_value in self._extract_epoch_results().items():
|
|
@@ -614,13 +614,8 @@ class TensorboardLogger(Logger, Generic[DLTypes.WeightType]):
|
|
|
614
614
|
:return: The generated link.
|
|
615
615
|
"""
|
|
616
616
|
return (
|
|
617
|
-
'<a href="{}/{}/{}
|
|
618
|
-
|
|
619
|
-
config.ui.projects_prefix,
|
|
620
|
-
context.project,
|
|
621
|
-
context.uid,
|
|
622
|
-
link_text,
|
|
623
|
-
)
|
|
617
|
+
f'<a href="{config.resolve_ui_url()}/{config.ui.projects_prefix}/{context.project}'
|
|
618
|
+
f'/jobs/monitor/{context.uid}/overview" target="_blank">{link_text}</a>'
|
|
624
619
|
)
|
|
625
620
|
|
|
626
621
|
@staticmethod
|
|
@@ -653,13 +648,13 @@ class TensorboardLogger(Logger, Generic[DLTypes.WeightType]):
|
|
|
653
648
|
if isinstance(value, list):
|
|
654
649
|
if len(value) == 0:
|
|
655
650
|
return ""
|
|
656
|
-
text = "\n" + yaml.
|
|
651
|
+
text = "\n" + yaml.safe_dump(value)
|
|
657
652
|
text = " \n".join([" " * tabs + line for line in text.splitlines()])
|
|
658
653
|
return text
|
|
659
654
|
if isinstance(value, dict):
|
|
660
655
|
if len(value) == 0:
|
|
661
656
|
return ""
|
|
662
|
-
text = yaml.
|
|
657
|
+
text = yaml.safe_dump(value)
|
|
663
658
|
text = " \n".join(
|
|
664
659
|
[" " * tabs + "- " + line for line in text.splitlines()]
|
|
665
660
|
)
|
|
@@ -295,7 +295,7 @@ def compare_db_runs(
|
|
|
295
295
|
iter=False,
|
|
296
296
|
start_time_from: datetime = None,
|
|
297
297
|
hide_identical: bool = True,
|
|
298
|
-
exclude: list =
|
|
298
|
+
exclude: list = None,
|
|
299
299
|
show=None,
|
|
300
300
|
colorscale: str = "Blues",
|
|
301
301
|
filename=None,
|
|
@@ -332,6 +332,7 @@ def compare_db_runs(
|
|
|
332
332
|
**query_args,
|
|
333
333
|
)
|
|
334
334
|
|
|
335
|
+
exclude = exclude or []
|
|
335
336
|
runs_df = _runs_list_to_df(runs_list)
|
|
336
337
|
plot_as_html = gen_pcp_plot(
|
|
337
338
|
runs_df,
|
|
@@ -18,6 +18,7 @@ from typing import Any, Union
|
|
|
18
18
|
from tensorflow import keras
|
|
19
19
|
|
|
20
20
|
import mlrun
|
|
21
|
+
import mlrun.common.constants as mlrun_constants
|
|
21
22
|
|
|
22
23
|
from .callbacks import MLRunLoggingCallback, TensorboardLoggingCallback
|
|
23
24
|
from .mlrun_interface import TFKerasMLRunInterface
|
|
@@ -126,7 +127,9 @@ def apply_mlrun(
|
|
|
126
127
|
# # Use horovod:
|
|
127
128
|
if use_horovod is None:
|
|
128
129
|
use_horovod = (
|
|
129
|
-
context.labels.get(
|
|
130
|
+
context.labels.get(mlrun_constants.MLRunInternalLabels.kind, "") == "mpijob"
|
|
131
|
+
if context is not None
|
|
132
|
+
else False
|
|
130
133
|
)
|
|
131
134
|
|
|
132
135
|
# Create a model handler:
|
mlrun/k8s_utils.py
CHANGED
|
@@ -141,17 +141,6 @@ def verify_label_key(key: str):
|
|
|
141
141
|
if not key:
|
|
142
142
|
raise mlrun.errors.MLRunInvalidArgumentError("label key cannot be empty")
|
|
143
143
|
|
|
144
|
-
mlrun.utils.helpers.verify_field_regex(
|
|
145
|
-
f"project.metadata.labels.'{key}'",
|
|
146
|
-
key,
|
|
147
|
-
mlrun.utils.regex.k8s_character_limit,
|
|
148
|
-
)
|
|
149
|
-
|
|
150
|
-
if key.startswith("k8s.io/") or key.startswith("kubernetes.io/"):
|
|
151
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
152
|
-
"Labels cannot start with 'k8s.io/' or 'kubernetes.io/'"
|
|
153
|
-
)
|
|
154
|
-
|
|
155
144
|
parts = key.split("/")
|
|
156
145
|
if len(parts) == 1:
|
|
157
146
|
name = parts[0]
|
|
@@ -173,12 +162,22 @@ def verify_label_key(key: str):
|
|
|
173
162
|
"Label key can only contain one '/'"
|
|
174
163
|
)
|
|
175
164
|
|
|
165
|
+
mlrun.utils.helpers.verify_field_regex(
|
|
166
|
+
f"project.metadata.labels.'{key}'",
|
|
167
|
+
name,
|
|
168
|
+
mlrun.utils.regex.k8s_character_limit,
|
|
169
|
+
)
|
|
176
170
|
mlrun.utils.helpers.verify_field_regex(
|
|
177
171
|
f"project.metadata.labels.'{key}'",
|
|
178
172
|
name,
|
|
179
173
|
mlrun.utils.regex.qualified_name,
|
|
180
174
|
)
|
|
181
175
|
|
|
176
|
+
if key.startswith("k8s.io/") or key.startswith("kubernetes.io/"):
|
|
177
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
178
|
+
"Labels cannot start with 'k8s.io/' or 'kubernetes.io/'"
|
|
179
|
+
)
|
|
180
|
+
|
|
182
181
|
|
|
183
182
|
def verify_label_value(value, label_key):
|
|
184
183
|
mlrun.utils.helpers.verify_field_regex(
|
mlrun/launcher/base.py
CHANGED
|
@@ -18,10 +18,11 @@ import os
|
|
|
18
18
|
import uuid
|
|
19
19
|
from typing import Any, Callable, Optional, Union
|
|
20
20
|
|
|
21
|
+
import mlrun_pipelines.common.ops
|
|
22
|
+
|
|
21
23
|
import mlrun.common.schemas
|
|
22
24
|
import mlrun.config
|
|
23
25
|
import mlrun.errors
|
|
24
|
-
import mlrun.kfpops
|
|
25
26
|
import mlrun.lists
|
|
26
27
|
import mlrun.model
|
|
27
28
|
import mlrun.runtimes
|
|
@@ -390,7 +391,7 @@ class BaseLauncher(abc.ABC):
|
|
|
390
391
|
return
|
|
391
392
|
|
|
392
393
|
if result and runtime.kfp and err is None:
|
|
393
|
-
|
|
394
|
+
mlrun_pipelines.common.ops.write_kfpmeta(result)
|
|
394
395
|
|
|
395
396
|
self._log_track_results(runtime.is_child, result, run)
|
|
396
397
|
|
|
@@ -403,7 +404,7 @@ class BaseLauncher(abc.ABC):
|
|
|
403
404
|
)
|
|
404
405
|
if (
|
|
405
406
|
run.status.state
|
|
406
|
-
in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
|
|
407
|
+
in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
|
|
407
408
|
):
|
|
408
409
|
if runtime._is_remote and not runtime.is_child:
|
|
409
410
|
logger.error(
|
mlrun/launcher/client.py
CHANGED
|
@@ -16,6 +16,7 @@ from typing import Optional
|
|
|
16
16
|
|
|
17
17
|
import IPython
|
|
18
18
|
|
|
19
|
+
import mlrun.common.constants as mlrun_constants
|
|
19
20
|
import mlrun.errors
|
|
20
21
|
import mlrun.launcher.base as launcher
|
|
21
22
|
import mlrun.lists
|
|
@@ -69,13 +70,14 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
|
|
|
69
70
|
def _store_function(
|
|
70
71
|
runtime: "mlrun.runtimes.BaseRuntime", run: "mlrun.run.RunObject"
|
|
71
72
|
):
|
|
72
|
-
run.metadata.labels[
|
|
73
|
+
run.metadata.labels[mlrun_constants.MLRunInternalLabels.kind] = runtime.kind
|
|
73
74
|
mlrun.runtimes.utils.enrich_run_labels(
|
|
74
|
-
run.metadata.labels, [mlrun.runtimes.constants.RunLabels.owner]
|
|
75
|
+
run.metadata.labels, [mlrun.common.runtimes.constants.RunLabels.owner]
|
|
75
76
|
)
|
|
76
77
|
if run.spec.output_path:
|
|
77
78
|
run.spec.output_path = run.spec.output_path.replace(
|
|
78
|
-
"{{run.user}}",
|
|
79
|
+
"{{run.user}}",
|
|
80
|
+
run.metadata.labels[mlrun_constants.MLRunInternalLabels.owner],
|
|
79
81
|
)
|
|
80
82
|
db = runtime._get_db()
|
|
81
83
|
if db and runtime.kind != "handler":
|
mlrun/launcher/local.py
CHANGED
|
@@ -15,6 +15,7 @@ import os
|
|
|
15
15
|
import pathlib
|
|
16
16
|
from typing import Callable, Optional, Union
|
|
17
17
|
|
|
18
|
+
import mlrun.common.constants as mlrun_constants
|
|
18
19
|
import mlrun.common.schemas.schedule
|
|
19
20
|
import mlrun.errors
|
|
20
21
|
import mlrun.launcher.client as launcher
|
|
@@ -132,8 +133,13 @@ class ClientLocalLauncher(launcher.ClientBaseLauncher):
|
|
|
132
133
|
runtime: "mlrun.runtimes.BaseRuntime",
|
|
133
134
|
run: Optional[Union["mlrun.run.RunTemplate", "mlrun.run.RunObject"]] = None,
|
|
134
135
|
):
|
|
135
|
-
if
|
|
136
|
-
|
|
136
|
+
if (
|
|
137
|
+
"V3IO_USERNAME" in os.environ
|
|
138
|
+
and mlrun_constants.MLRunInternalLabels.v3io_user not in run.metadata.labels
|
|
139
|
+
):
|
|
140
|
+
run.metadata.labels[mlrun_constants.MLRunInternalLabels.v3io_user] = (
|
|
141
|
+
os.environ.get("V3IO_USERNAME")
|
|
142
|
+
)
|
|
137
143
|
|
|
138
144
|
# store function object in db unless running from within a run pod
|
|
139
145
|
if not runtime.is_child:
|
mlrun/launcher/remote.py
CHANGED
|
@@ -17,6 +17,7 @@ from typing import Optional, Union
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
import requests
|
|
19
19
|
|
|
20
|
+
import mlrun.common.constants as mlrun_constants
|
|
20
21
|
import mlrun.common.schemas.schedule
|
|
21
22
|
import mlrun.db
|
|
22
23
|
import mlrun.errors
|
|
@@ -100,8 +101,13 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
|
|
|
100
101
|
if runtime.verbose:
|
|
101
102
|
logger.info(f"runspec:\n{run.to_yaml()}")
|
|
102
103
|
|
|
103
|
-
if
|
|
104
|
-
|
|
104
|
+
if (
|
|
105
|
+
"V3IO_USERNAME" in os.environ
|
|
106
|
+
and mlrun_constants.MLRunInternalLabels.v3io_user not in run.metadata.labels
|
|
107
|
+
):
|
|
108
|
+
run.metadata.labels[mlrun_constants.MLRunInternalLabels.v3io_user] = (
|
|
109
|
+
os.environ.get("V3IO_USERNAME")
|
|
110
|
+
)
|
|
105
111
|
|
|
106
112
|
logger.info(
|
|
107
113
|
"Storing function",
|
mlrun/lists.py
CHANGED
|
@@ -21,7 +21,7 @@ import mlrun.frameworks
|
|
|
21
21
|
from .artifacts import Artifact, dict_to_artifact
|
|
22
22
|
from .config import config
|
|
23
23
|
from .render import artifacts_to_html, runs_to_html
|
|
24
|
-
from .utils import flatten, get_artifact_target, get_in
|
|
24
|
+
from .utils import flatten, get_artifact_target, get_in
|
|
25
25
|
|
|
26
26
|
list_header = [
|
|
27
27
|
"project",
|
|
@@ -29,12 +29,14 @@ list_header = [
|
|
|
29
29
|
"iter",
|
|
30
30
|
"start",
|
|
31
31
|
"state",
|
|
32
|
+
"kind",
|
|
32
33
|
"name",
|
|
33
34
|
"labels",
|
|
34
35
|
"inputs",
|
|
35
36
|
"parameters",
|
|
36
37
|
"results",
|
|
37
38
|
"artifacts",
|
|
39
|
+
"artifact_uris",
|
|
38
40
|
"error",
|
|
39
41
|
]
|
|
40
42
|
|
|
@@ -56,12 +58,14 @@ class RunList(list):
|
|
|
56
58
|
get_in(run, "metadata.iteration", ""),
|
|
57
59
|
get_in(run, "status.start_time", ""),
|
|
58
60
|
get_in(run, "status.state", ""),
|
|
61
|
+
get_in(run, "step_kind", get_in(run, "kind", "")),
|
|
59
62
|
get_in(run, "metadata.name", ""),
|
|
60
63
|
get_in(run, "metadata.labels", ""),
|
|
61
64
|
get_in(run, "spec.inputs", ""),
|
|
62
65
|
get_in(run, "spec.parameters", ""),
|
|
63
66
|
get_in(run, "status.results", ""),
|
|
64
67
|
get_in(run, "status.artifacts", []),
|
|
68
|
+
get_in(run, "status.artifact_uris", {}),
|
|
65
69
|
get_in(run, "status.error", ""),
|
|
66
70
|
]
|
|
67
71
|
if extend_iterations and iterations:
|
|
@@ -184,7 +188,7 @@ class ArtifactList(list):
|
|
|
184
188
|
"uri": ["uri", "uri"],
|
|
185
189
|
}
|
|
186
190
|
for artifact in self:
|
|
187
|
-
fields_index =
|
|
191
|
+
fields_index = 1
|
|
188
192
|
row = [get_in(artifact, v[fields_index], "") for k, v in head.items()]
|
|
189
193
|
artifact_uri = dict_to_artifact(artifact).uri
|
|
190
194
|
last_index = len(row) - 1
|
mlrun/model.py
CHANGED
|
@@ -27,13 +27,14 @@ from typing import Any, Optional, Union
|
|
|
27
27
|
import pydantic.error_wrappers
|
|
28
28
|
|
|
29
29
|
import mlrun
|
|
30
|
+
import mlrun.common.constants as mlrun_constants
|
|
30
31
|
import mlrun.common.schemas.notification
|
|
32
|
+
import mlrun.utils.regex
|
|
31
33
|
|
|
32
34
|
from .utils import (
|
|
33
35
|
dict_to_json,
|
|
34
36
|
dict_to_yaml,
|
|
35
37
|
get_artifact_target,
|
|
36
|
-
is_legacy_artifact,
|
|
37
38
|
logger,
|
|
38
39
|
template_artifact_path,
|
|
39
40
|
)
|
|
@@ -682,10 +683,14 @@ class Notification(ModelObj):
|
|
|
682
683
|
|
|
683
684
|
def __init__(
|
|
684
685
|
self,
|
|
685
|
-
kind=
|
|
686
|
+
kind: mlrun.common.schemas.notification.NotificationKind = (
|
|
687
|
+
mlrun.common.schemas.notification.NotificationKind.slack
|
|
688
|
+
),
|
|
686
689
|
name=None,
|
|
687
690
|
message=None,
|
|
688
|
-
severity=
|
|
691
|
+
severity: mlrun.common.schemas.notification.NotificationSeverity = (
|
|
692
|
+
mlrun.common.schemas.notification.NotificationSeverity.INFO
|
|
693
|
+
),
|
|
689
694
|
when=None,
|
|
690
695
|
condition=None,
|
|
691
696
|
secret_params=None,
|
|
@@ -694,12 +699,10 @@ class Notification(ModelObj):
|
|
|
694
699
|
sent_time=None,
|
|
695
700
|
reason=None,
|
|
696
701
|
):
|
|
697
|
-
self.kind = kind
|
|
702
|
+
self.kind = kind
|
|
698
703
|
self.name = name or ""
|
|
699
704
|
self.message = message or ""
|
|
700
|
-
self.severity =
|
|
701
|
-
severity or mlrun.common.schemas.notification.NotificationSeverity.INFO
|
|
702
|
-
)
|
|
705
|
+
self.severity = severity
|
|
703
706
|
self.when = when or ["completed"]
|
|
704
707
|
self.condition = condition or ""
|
|
705
708
|
self.secret_params = secret_params or {}
|
|
@@ -769,7 +772,10 @@ class RunMetadata(ModelObj):
|
|
|
769
772
|
def is_workflow_runner(self):
|
|
770
773
|
if not self.labels:
|
|
771
774
|
return False
|
|
772
|
-
return
|
|
775
|
+
return (
|
|
776
|
+
self.labels.get(mlrun_constants.MLRunInternalLabels.job_type, "")
|
|
777
|
+
== "workflow-runner"
|
|
778
|
+
)
|
|
773
779
|
|
|
774
780
|
|
|
775
781
|
class HyperParamStrategies:
|
|
@@ -1208,6 +1214,7 @@ class RunStatus(ModelObj):
|
|
|
1208
1214
|
ui_url=None,
|
|
1209
1215
|
reason: str = None,
|
|
1210
1216
|
notifications: dict[str, Notification] = None,
|
|
1217
|
+
artifact_uris: dict[str, str] = None,
|
|
1211
1218
|
):
|
|
1212
1219
|
self.state = state or "created"
|
|
1213
1220
|
self.status_text = status_text
|
|
@@ -1222,6 +1229,8 @@ class RunStatus(ModelObj):
|
|
|
1222
1229
|
self.ui_url = ui_url
|
|
1223
1230
|
self.reason = reason
|
|
1224
1231
|
self.notifications = notifications or {}
|
|
1232
|
+
# Artifact key -> URI mapping, since the full artifacts are not stored in the runs DB table
|
|
1233
|
+
self.artifact_uris = artifact_uris or {}
|
|
1225
1234
|
|
|
1226
1235
|
def is_failed(self) -> Optional[bool]:
|
|
1227
1236
|
"""
|
|
@@ -1435,11 +1444,14 @@ class RunObject(RunTemplate):
|
|
|
1435
1444
|
unknown_error = ""
|
|
1436
1445
|
if (
|
|
1437
1446
|
self.status.state
|
|
1438
|
-
in mlrun.runtimes.constants.RunStates.abortion_states()
|
|
1447
|
+
in mlrun.common.runtimes.constants.RunStates.abortion_states()
|
|
1439
1448
|
):
|
|
1440
1449
|
unknown_error = "Run was aborted"
|
|
1441
1450
|
|
|
1442
|
-
elif
|
|
1451
|
+
elif (
|
|
1452
|
+
self.status.state
|
|
1453
|
+
in mlrun.common.runtimes.constants.RunStates.error_states()
|
|
1454
|
+
):
|
|
1443
1455
|
unknown_error = "Unknown error"
|
|
1444
1456
|
|
|
1445
1457
|
return (
|
|
@@ -1477,7 +1489,7 @@ class RunObject(RunTemplate):
|
|
|
1477
1489
|
outputs = {k: v for k, v in self.status.results.items()}
|
|
1478
1490
|
if self.status.artifacts:
|
|
1479
1491
|
for a in self.status.artifacts:
|
|
1480
|
-
key = a["
|
|
1492
|
+
key = a["metadata"]["key"]
|
|
1481
1493
|
outputs[key] = get_artifact_target(a, self.metadata.project)
|
|
1482
1494
|
return outputs
|
|
1483
1495
|
|
|
@@ -1520,7 +1532,10 @@ class RunObject(RunTemplate):
|
|
|
1520
1532
|
|
|
1521
1533
|
def state(self):
|
|
1522
1534
|
"""current run state"""
|
|
1523
|
-
if
|
|
1535
|
+
if (
|
|
1536
|
+
self.status.state
|
|
1537
|
+
in mlrun.common.runtimes.constants.RunStates.terminal_states()
|
|
1538
|
+
):
|
|
1524
1539
|
return self.status.state
|
|
1525
1540
|
self.refresh()
|
|
1526
1541
|
return self.status.state or "unknown"
|
|
@@ -1534,8 +1549,10 @@ class RunObject(RunTemplate):
|
|
|
1534
1549
|
iter=self.metadata.iteration,
|
|
1535
1550
|
)
|
|
1536
1551
|
if run:
|
|
1537
|
-
|
|
1538
|
-
|
|
1552
|
+
run_status = run.get("status", {})
|
|
1553
|
+
# Artifacts are not stored in the DB, so we need to preserve them here
|
|
1554
|
+
run_status["artifacts"] = self.status.artifacts
|
|
1555
|
+
self.status = RunStatus.from_dict(run_status)
|
|
1539
1556
|
return self
|
|
1540
1557
|
|
|
1541
1558
|
def show(self):
|
|
@@ -1582,7 +1599,7 @@ class RunObject(RunTemplate):
|
|
|
1582
1599
|
last_pull_log_time = None
|
|
1583
1600
|
logs_enabled = show_logs is not False
|
|
1584
1601
|
state = self.state()
|
|
1585
|
-
if state not in mlrun.runtimes.constants.RunStates.terminal_states():
|
|
1602
|
+
if state not in mlrun.common.runtimes.constants.RunStates.terminal_states():
|
|
1586
1603
|
logger.info(
|
|
1587
1604
|
f"run {self.metadata.name} is not completed yet, waiting for it to complete",
|
|
1588
1605
|
current_state=state,
|
|
@@ -1592,7 +1609,8 @@ class RunObject(RunTemplate):
|
|
|
1592
1609
|
if (
|
|
1593
1610
|
logs_enabled
|
|
1594
1611
|
and logs_interval
|
|
1595
|
-
and state
|
|
1612
|
+
and state
|
|
1613
|
+
not in mlrun.common.runtimes.constants.RunStates.terminal_states()
|
|
1596
1614
|
and (
|
|
1597
1615
|
last_pull_log_time is None
|
|
1598
1616
|
or (datetime.now() - last_pull_log_time).seconds > logs_interval
|
|
@@ -1601,7 +1619,7 @@ class RunObject(RunTemplate):
|
|
|
1601
1619
|
last_pull_log_time = datetime.now()
|
|
1602
1620
|
state, offset = self.logs(watch=False, offset=offset)
|
|
1603
1621
|
|
|
1604
|
-
if state in mlrun.runtimes.constants.RunStates.terminal_states():
|
|
1622
|
+
if state in mlrun.common.runtimes.constants.RunStates.terminal_states():
|
|
1605
1623
|
if logs_enabled and logs_interval:
|
|
1606
1624
|
self.logs(watch=False, offset=offset)
|
|
1607
1625
|
break
|
|
@@ -1613,7 +1631,10 @@ class RunObject(RunTemplate):
|
|
|
1613
1631
|
)
|
|
1614
1632
|
if logs_enabled and not logs_interval:
|
|
1615
1633
|
self.logs(watch=False)
|
|
1616
|
-
if
|
|
1634
|
+
if (
|
|
1635
|
+
raise_on_failure
|
|
1636
|
+
and state != mlrun.common.runtimes.constants.RunStates.completed
|
|
1637
|
+
):
|
|
1617
1638
|
raise mlrun.errors.MLRunRuntimeError(
|
|
1618
1639
|
f"Task {self.metadata.name} did not complete (state={state})"
|
|
1619
1640
|
)
|
|
@@ -1629,9 +1650,12 @@ class RunObject(RunTemplate):
|
|
|
1629
1650
|
|
|
1630
1651
|
@staticmethod
|
|
1631
1652
|
def parse_uri(uri: str) -> tuple[str, str, str, str]:
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1653
|
+
"""Parse the run's uri
|
|
1654
|
+
|
|
1655
|
+
:param uri: run uri in the format of <project>@<uid>#<iteration>[:tag]
|
|
1656
|
+
:return: project, uid, iteration, tag
|
|
1657
|
+
"""
|
|
1658
|
+
uri_pattern = mlrun.utils.regex.run_uri_pattern
|
|
1635
1659
|
match = re.match(uri_pattern, uri)
|
|
1636
1660
|
if not match:
|
|
1637
1661
|
raise ValueError(
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
# flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
|
|
16
16
|
# for backwards compatibility
|
|
17
17
|
|
|
18
|
-
from .db import get_store_object
|
|
18
|
+
from .db import get_store_object, get_tsdb_connector
|
|
19
19
|
from .helpers import get_stream_path
|
|
20
20
|
from .model_endpoint import ModelEndpoint
|
|
21
21
|
from .tracking_policy import TrackingPolicy
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -22,9 +22,10 @@ import pandas as pd
|
|
|
22
22
|
|
|
23
23
|
import mlrun.artifacts
|
|
24
24
|
import mlrun.common.helpers
|
|
25
|
-
import mlrun.common.schemas.model_monitoring.constants as
|
|
25
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
26
26
|
import mlrun.feature_store
|
|
27
27
|
import mlrun.model_monitoring.application
|
|
28
|
+
import mlrun.model_monitoring.applications as mm_app
|
|
28
29
|
import mlrun.serving
|
|
29
30
|
from mlrun.data_types.infer import InferOptions, get_df_stats
|
|
30
31
|
from mlrun.utils import datetime_now, logger
|
|
@@ -48,7 +49,7 @@ def get_or_create_model_endpoint(
|
|
|
48
49
|
sample_set_statistics: dict[str, typing.Any] = None,
|
|
49
50
|
drift_threshold: float = None,
|
|
50
51
|
possible_drift_threshold: float = None,
|
|
51
|
-
monitoring_mode:
|
|
52
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
52
53
|
db_session=None,
|
|
53
54
|
) -> ModelEndpoint:
|
|
54
55
|
"""
|
|
@@ -128,7 +129,7 @@ def record_results(
|
|
|
128
129
|
context: typing.Optional[mlrun.MLClientCtx] = None,
|
|
129
130
|
infer_results_df: typing.Optional[pd.DataFrame] = None,
|
|
130
131
|
sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
|
|
131
|
-
monitoring_mode:
|
|
132
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
|
|
132
133
|
# Deprecated arguments:
|
|
133
134
|
drift_threshold: typing.Optional[float] = None,
|
|
134
135
|
possible_drift_threshold: typing.Optional[float] = None,
|
|
@@ -282,7 +283,7 @@ def _model_endpoint_validations(
|
|
|
282
283
|
# drift and possible drift thresholds
|
|
283
284
|
if drift_threshold:
|
|
284
285
|
current_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
285
|
-
|
|
286
|
+
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD,
|
|
286
287
|
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
|
|
287
288
|
)
|
|
288
289
|
if current_drift_threshold != drift_threshold:
|
|
@@ -293,7 +294,7 @@ def _model_endpoint_validations(
|
|
|
293
294
|
|
|
294
295
|
if possible_drift_threshold:
|
|
295
296
|
current_possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
296
|
-
|
|
297
|
+
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
|
|
297
298
|
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
|
|
298
299
|
)
|
|
299
300
|
if current_possible_drift_threshold != possible_drift_threshold:
|
|
@@ -332,14 +333,14 @@ def write_monitoring_df(
|
|
|
332
333
|
)
|
|
333
334
|
|
|
334
335
|
# Modify the DataFrame to the required structure that will be used later by the monitoring batch job
|
|
335
|
-
if
|
|
336
|
+
if mm_constants.EventFieldType.TIMESTAMP not in infer_results_df.columns:
|
|
336
337
|
# Initialize timestamp column with the current time
|
|
337
|
-
infer_results_df[
|
|
338
|
+
infer_results_df[mm_constants.EventFieldType.TIMESTAMP] = infer_datetime
|
|
338
339
|
|
|
339
340
|
# `endpoint_id` is the monitoring feature set entity and therefore it should be defined as the df index before
|
|
340
341
|
# the ingest process
|
|
341
|
-
infer_results_df[
|
|
342
|
-
infer_results_df.set_index(
|
|
342
|
+
infer_results_df[mm_constants.EventFieldType.ENDPOINT_ID] = endpoint_id
|
|
343
|
+
infer_results_df.set_index(mm_constants.EventFieldType.ENDPOINT_ID, inplace=True)
|
|
343
344
|
|
|
344
345
|
monitoring_feature_set.ingest(source=infer_results_df, overwrite=False)
|
|
345
346
|
|
|
@@ -355,7 +356,7 @@ def _generate_model_endpoint(
|
|
|
355
356
|
sample_set_statistics: dict[str, typing.Any],
|
|
356
357
|
drift_threshold: float,
|
|
357
358
|
possible_drift_threshold: float,
|
|
358
|
-
monitoring_mode:
|
|
359
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
359
360
|
) -> ModelEndpoint:
|
|
360
361
|
"""
|
|
361
362
|
Write a new model endpoint record.
|
|
@@ -394,11 +395,11 @@ def _generate_model_endpoint(
|
|
|
394
395
|
model_endpoint.spec.model_class = "drift-analysis"
|
|
395
396
|
if drift_threshold:
|
|
396
397
|
model_endpoint.spec.monitor_configuration[
|
|
397
|
-
|
|
398
|
+
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD
|
|
398
399
|
] = drift_threshold
|
|
399
400
|
if possible_drift_threshold:
|
|
400
401
|
model_endpoint.spec.monitor_configuration[
|
|
401
|
-
|
|
402
|
+
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
|
|
402
403
|
] = possible_drift_threshold
|
|
403
404
|
|
|
404
405
|
model_endpoint.spec.monitoring_mode = monitoring_mode
|
|
@@ -589,7 +590,10 @@ def _create_model_monitoring_function_base(
|
|
|
589
590
|
project: str,
|
|
590
591
|
func: typing.Union[str, None] = None,
|
|
591
592
|
application_class: typing.Union[
|
|
592
|
-
str,
|
|
593
|
+
str,
|
|
594
|
+
mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
|
|
595
|
+
mm_app.ModelMonitoringApplicationBaseV2,
|
|
596
|
+
None,
|
|
593
597
|
] = None,
|
|
594
598
|
name: typing.Optional[str] = None,
|
|
595
599
|
image: typing.Optional[str] = None,
|
|
@@ -602,6 +606,20 @@ def _create_model_monitoring_function_base(
|
|
|
602
606
|
Note: this is an internal API only.
|
|
603
607
|
This function does not set the labels or mounts v3io.
|
|
604
608
|
"""
|
|
609
|
+
if isinstance(
|
|
610
|
+
application_class,
|
|
611
|
+
mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
|
|
612
|
+
):
|
|
613
|
+
warnings.warn(
|
|
614
|
+
"The `ModelMonitoringApplicationBase` class is deprecated from version 1.7.0, "
|
|
615
|
+
"please use `ModelMonitoringApplicationBaseV2`. It will be removed in 1.9.0.",
|
|
616
|
+
FutureWarning,
|
|
617
|
+
)
|
|
618
|
+
if name in mm_constants.MonitoringFunctionNames.list():
|
|
619
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
620
|
+
f"An application cannot have the following names: "
|
|
621
|
+
f"{mm_constants.MonitoringFunctionNames.list()}"
|
|
622
|
+
)
|
|
605
623
|
if func is None:
|
|
606
624
|
func = ""
|
|
607
625
|
func_obj = typing.cast(
|
|
@@ -618,14 +636,19 @@ def _create_model_monitoring_function_base(
|
|
|
618
636
|
),
|
|
619
637
|
)
|
|
620
638
|
graph = func_obj.set_topology(mlrun.serving.states.StepKinds.flow)
|
|
639
|
+
prepare_step = graph.to(
|
|
640
|
+
class_name="mlrun.model_monitoring.applications._application_steps._PrepareMonitoringEvent",
|
|
641
|
+
name="PrepareMonitoringEvent",
|
|
642
|
+
application_name=name,
|
|
643
|
+
)
|
|
621
644
|
if isinstance(application_class, str):
|
|
622
|
-
|
|
645
|
+
app_step = prepare_step.to(class_name=application_class, **application_kwargs)
|
|
623
646
|
else:
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
class_name="mlrun.model_monitoring.
|
|
647
|
+
app_step = prepare_step.to(class_name=application_class)
|
|
648
|
+
app_step.to(
|
|
649
|
+
class_name="mlrun.model_monitoring.applications._application_steps._PushToMonitoringWriter",
|
|
627
650
|
name="PushToMonitoringWriter",
|
|
628
651
|
project=project,
|
|
629
|
-
writer_application_name=
|
|
652
|
+
writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
|
|
630
653
|
).respond()
|
|
631
654
|
return func_obj
|