mlrun 1.7.0rc13__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +0 -105
- mlrun/artifacts/__init__.py +1 -2
- mlrun/artifacts/base.py +8 -250
- mlrun/artifacts/dataset.py +1 -190
- mlrun/artifacts/manager.py +2 -41
- mlrun/artifacts/model.py +1 -140
- mlrun/artifacts/plots.py +1 -375
- mlrun/common/schemas/model_monitoring/__init__.py +4 -0
- mlrun/common/schemas/model_monitoring/constants.py +24 -3
- mlrun/common/schemas/model_monitoring/model_endpoints.py +13 -1
- mlrun/common/schemas/project.py +1 -0
- mlrun/config.py +14 -4
- mlrun/data_types/to_pandas.py +4 -4
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore_profile.py +50 -3
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/sources.py +43 -2
- mlrun/datastore/store_resources.py +2 -6
- mlrun/datastore/targets.py +125 -6
- mlrun/datastore/v3io.py +1 -1
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +1 -1
- mlrun/db/httpdb.py +69 -33
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +4 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +2 -0
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +5 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
- mlrun/kfpops.py +5 -10
- mlrun/launcher/base.py +1 -1
- mlrun/launcher/client.py +1 -1
- mlrun/lists.py +2 -2
- mlrun/model.py +36 -9
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +158 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +92 -77
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +1 -1
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +67 -4
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +1 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +2 -3
- mlrun/model_monitoring/writer.py +69 -39
- mlrun/platforms/iguazio.py +2 -2
- mlrun/projects/pipelines.py +24 -7
- mlrun/projects/project.py +130 -65
- mlrun/render.py +2 -10
- mlrun/run.py +1 -4
- mlrun/runtimes/__init__.py +3 -3
- mlrun/runtimes/base.py +3 -3
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +275 -153
- mlrun/runtimes/nuclio/function.py +1 -1
- mlrun/runtimes/pod.py +5 -5
- mlrun/runtimes/utils.py +1 -1
- mlrun/serving/states.py +53 -2
- mlrun/utils/helpers.py +27 -40
- mlrun/utils/notifications/notification/slack.py +31 -8
- mlrun/utils/notifications/notification_pusher.py +133 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/METADATA +2 -2
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/RECORD +84 -79
- mlrun/runtimes/mpijob/v1alpha1.py +0 -29
- /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/top_level.txt +0 -0
mlrun/feature_store/api.py
CHANGED
|
@@ -113,6 +113,7 @@ def get_offline_features(
|
|
|
113
113
|
order_by: Union[str, list[str]] = None,
|
|
114
114
|
spark_service: str = None,
|
|
115
115
|
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
116
|
+
additional_filters: list = None,
|
|
116
117
|
):
|
|
117
118
|
"""retrieve offline feature vector results
|
|
118
119
|
|
|
@@ -175,6 +176,13 @@ def get_offline_features(
|
|
|
175
176
|
By default, the filter executes on the timestamp_key of each feature set.
|
|
176
177
|
Note: the time filtering is performed on each feature set before the
|
|
177
178
|
merge process using start_time and end_time params.
|
|
179
|
+
:param additional_filters: List of additional_filter conditions as tuples.
|
|
180
|
+
Each tuple should be in the format (column_name, operator, value).
|
|
181
|
+
Supported operators: "=", ">=", "<=", ">", "<".
|
|
182
|
+
Example: [("Product", "=", "Computer")]
|
|
183
|
+
For all supported filters, please see:
|
|
184
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
185
|
+
|
|
178
186
|
|
|
179
187
|
"""
|
|
180
188
|
return _get_offline_features(
|
|
@@ -194,6 +202,7 @@ def get_offline_features(
|
|
|
194
202
|
order_by,
|
|
195
203
|
spark_service,
|
|
196
204
|
timestamp_for_filtering,
|
|
205
|
+
additional_filters,
|
|
197
206
|
)
|
|
198
207
|
|
|
199
208
|
|
|
@@ -214,6 +223,7 @@ def _get_offline_features(
|
|
|
214
223
|
order_by: Union[str, list[str]] = None,
|
|
215
224
|
spark_service: str = None,
|
|
216
225
|
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
226
|
+
additional_filters=None,
|
|
217
227
|
) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
|
|
218
228
|
if entity_rows is None and entity_timestamp_column is not None:
|
|
219
229
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -252,6 +262,7 @@ def _get_offline_features(
|
|
|
252
262
|
start_time=start_time,
|
|
253
263
|
end_time=end_time,
|
|
254
264
|
timestamp_for_filtering=timestamp_for_filtering,
|
|
265
|
+
additional_filters=additional_filters,
|
|
255
266
|
)
|
|
256
267
|
|
|
257
268
|
merger = merger_engine(feature_vector, **(engine_args or {}))
|
|
@@ -267,6 +278,7 @@ def _get_offline_features(
|
|
|
267
278
|
update_stats=update_stats,
|
|
268
279
|
query=query,
|
|
269
280
|
order_by=order_by,
|
|
281
|
+
additional_filters=additional_filters,
|
|
270
282
|
)
|
|
271
283
|
|
|
272
284
|
|
|
@@ -1005,53 +1017,6 @@ def _deploy_ingestion_service_v2(
|
|
|
1005
1017
|
return function.deploy(), function
|
|
1006
1018
|
|
|
1007
1019
|
|
|
1008
|
-
@deprecated(
|
|
1009
|
-
version="1.5.0",
|
|
1010
|
-
reason="'deploy_ingestion_service' will be removed in 1.7.0, use 'deploy_ingestion_service_v2' instead",
|
|
1011
|
-
category=FutureWarning,
|
|
1012
|
-
)
|
|
1013
|
-
def deploy_ingestion_service(
|
|
1014
|
-
featureset: Union[FeatureSet, str],
|
|
1015
|
-
source: DataSource = None,
|
|
1016
|
-
targets: list[DataTargetBase] = None,
|
|
1017
|
-
name: str = None,
|
|
1018
|
-
run_config: RunConfig = None,
|
|
1019
|
-
verbose=False,
|
|
1020
|
-
) -> str:
|
|
1021
|
-
"""Start real-time ingestion service using nuclio function
|
|
1022
|
-
|
|
1023
|
-
Deploy a real-time function implementing feature ingestion pipeline
|
|
1024
|
-
the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
|
|
1025
|
-
|
|
1026
|
-
the `run_config` parameter allow specifying the function and job configuration,
|
|
1027
|
-
see: :py:class:`~mlrun.feature_store.RunConfig`
|
|
1028
|
-
|
|
1029
|
-
example::
|
|
1030
|
-
|
|
1031
|
-
source = HTTPSource()
|
|
1032
|
-
func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
|
|
1033
|
-
config = RunConfig(function=func)
|
|
1034
|
-
my_set.deploy_ingestion_service(source, run_config=config)
|
|
1035
|
-
|
|
1036
|
-
:param featureset: feature set object or uri
|
|
1037
|
-
:param source: data source object describing the online or offline source
|
|
1038
|
-
:param targets: list of data target objects
|
|
1039
|
-
:param name: name for the job/function
|
|
1040
|
-
:param run_config: service runtime configuration (function object/uri, resources, etc..)
|
|
1041
|
-
:param verbose: verbose log
|
|
1042
|
-
|
|
1043
|
-
:return: URL to access the deployed ingestion service
|
|
1044
|
-
"""
|
|
1045
|
-
endpoint, _ = featureset.deploy_ingestion_service(
|
|
1046
|
-
source=source,
|
|
1047
|
-
targets=targets,
|
|
1048
|
-
name=name,
|
|
1049
|
-
run_config=run_config,
|
|
1050
|
-
verbose=verbose,
|
|
1051
|
-
)
|
|
1052
|
-
return endpoint
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
1020
|
def _ingest_with_spark(
|
|
1056
1021
|
spark=None,
|
|
1057
1022
|
featureset: Union[FeatureSet, str] = None,
|
|
@@ -917,6 +917,7 @@ class FeatureSet(ModelObj):
|
|
|
917
917
|
start_time=None,
|
|
918
918
|
end_time=None,
|
|
919
919
|
time_column=None,
|
|
920
|
+
additional_filters=None,
|
|
920
921
|
**kwargs,
|
|
921
922
|
):
|
|
922
923
|
"""return featureset (offline) data as dataframe
|
|
@@ -928,6 +929,12 @@ class FeatureSet(ModelObj):
|
|
|
928
929
|
:param end_time: filter by end time
|
|
929
930
|
:param time_column: specify the time column name in the file
|
|
930
931
|
:param kwargs: additional reader (csv, parquet, ..) args
|
|
932
|
+
:param additional_filters: List of additional_filter conditions as tuples.
|
|
933
|
+
Each tuple should be in the format (column_name, operator, value).
|
|
934
|
+
Supported operators: "=", ">=", "<=", ">", "<".
|
|
935
|
+
Example: [("Product", "=", "Computer")]
|
|
936
|
+
For all supported filters, please see:
|
|
937
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
931
938
|
:return: DataFrame
|
|
932
939
|
"""
|
|
933
940
|
entities = list(self.spec.entities.keys())
|
|
@@ -946,6 +953,7 @@ class FeatureSet(ModelObj):
|
|
|
946
953
|
start_time=start_time,
|
|
947
954
|
end_time=end_time,
|
|
948
955
|
time_field=time_column,
|
|
956
|
+
additional_filters=additional_filters,
|
|
949
957
|
**kwargs,
|
|
950
958
|
)
|
|
951
959
|
# to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
|
|
@@ -965,6 +973,7 @@ class FeatureSet(ModelObj):
|
|
|
965
973
|
start_time=start_time,
|
|
966
974
|
end_time=end_time,
|
|
967
975
|
time_column=time_column,
|
|
976
|
+
additional_filters=additional_filters,
|
|
968
977
|
**kwargs,
|
|
969
978
|
)
|
|
970
979
|
return result
|
|
@@ -88,6 +88,7 @@ class BaseMerger(abc.ABC):
|
|
|
88
88
|
update_stats=None,
|
|
89
89
|
query=None,
|
|
90
90
|
order_by=None,
|
|
91
|
+
additional_filters=None,
|
|
91
92
|
):
|
|
92
93
|
self._target = target
|
|
93
94
|
|
|
@@ -134,6 +135,7 @@ class BaseMerger(abc.ABC):
|
|
|
134
135
|
timestamp_for_filtering=timestamp_for_filtering,
|
|
135
136
|
query=query,
|
|
136
137
|
order_by=order_by,
|
|
138
|
+
additional_filters=additional_filters,
|
|
137
139
|
)
|
|
138
140
|
|
|
139
141
|
def _write_to_offline_target(self, timestamp_key=None):
|
|
@@ -186,6 +188,7 @@ class BaseMerger(abc.ABC):
|
|
|
186
188
|
timestamp_for_filtering=None,
|
|
187
189
|
query=None,
|
|
188
190
|
order_by=None,
|
|
191
|
+
additional_filters=None,
|
|
189
192
|
):
|
|
190
193
|
self._create_engine_env()
|
|
191
194
|
|
|
@@ -212,7 +215,7 @@ class BaseMerger(abc.ABC):
|
|
|
212
215
|
feature_sets.append(None)
|
|
213
216
|
join_types.append(None)
|
|
214
217
|
|
|
215
|
-
|
|
218
|
+
timestamp_filtered = False
|
|
216
219
|
for step in join_graph.steps:
|
|
217
220
|
name = step.right_feature_set_name
|
|
218
221
|
feature_set = feature_set_objects[name]
|
|
@@ -250,7 +253,7 @@ class BaseMerger(abc.ABC):
|
|
|
250
253
|
if self._drop_indexes:
|
|
251
254
|
self._append_drop_column(time_column)
|
|
252
255
|
if (start_time or end_time) and time_column:
|
|
253
|
-
|
|
256
|
+
timestamp_filtered = True
|
|
254
257
|
|
|
255
258
|
df = self._get_engine_df(
|
|
256
259
|
feature_set,
|
|
@@ -259,6 +262,7 @@ class BaseMerger(abc.ABC):
|
|
|
259
262
|
start_time if time_column else None,
|
|
260
263
|
end_time if time_column else None,
|
|
261
264
|
time_column,
|
|
265
|
+
additional_filters,
|
|
262
266
|
)
|
|
263
267
|
|
|
264
268
|
fs_entities_and_timestamp = list(feature_set.spec.entities.keys())
|
|
@@ -302,8 +306,8 @@ class BaseMerger(abc.ABC):
|
|
|
302
306
|
new_columns.append((column, alias))
|
|
303
307
|
self._update_alias(dictionary={name: alias for name, alias in new_columns})
|
|
304
308
|
|
|
305
|
-
# None of the feature sets was filtered as required
|
|
306
|
-
if not
|
|
309
|
+
# None of the feature sets was timestamp filtered as required
|
|
310
|
+
if not timestamp_filtered and (start_time or end_time):
|
|
307
311
|
raise mlrun.errors.MLRunRuntimeError(
|
|
308
312
|
"start_time and end_time can only be provided in conjunction with "
|
|
309
313
|
"a timestamp column, or when the at least one feature_set has a timestamp key"
|
|
@@ -755,6 +759,7 @@ class BaseMerger(abc.ABC):
|
|
|
755
759
|
start_time: typing.Union[str, datetime] = None,
|
|
756
760
|
end_time: typing.Union[str, datetime] = None,
|
|
757
761
|
time_column: typing.Optional[str] = None,
|
|
762
|
+
additional_filters=None,
|
|
758
763
|
):
|
|
759
764
|
"""
|
|
760
765
|
Return the feature_set data frame according to the args
|
|
@@ -79,10 +79,10 @@ class PandasConversionMixin:
|
|
|
79
79
|
msg = (
|
|
80
80
|
"toPandas attempted Arrow optimization because "
|
|
81
81
|
"'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
|
|
82
|
-
"failed by the reason below:\n
|
|
82
|
+
f"failed by the reason below:\n {e}\n"
|
|
83
83
|
"Attempting non-optimization as "
|
|
84
84
|
"'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
|
|
85
|
-
"true."
|
|
85
|
+
"true."
|
|
86
86
|
)
|
|
87
87
|
warnings.warn(msg)
|
|
88
88
|
use_arrow = False
|
|
@@ -92,7 +92,7 @@ class PandasConversionMixin:
|
|
|
92
92
|
"'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
|
|
93
93
|
"reached the error below and will not continue because automatic fallback "
|
|
94
94
|
"with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
|
|
95
|
-
"false.\n
|
|
95
|
+
f"false.\n {e}"
|
|
96
96
|
)
|
|
97
97
|
warnings.warn(msg)
|
|
98
98
|
raise
|
|
@@ -158,7 +158,7 @@ class PandasConversionMixin:
|
|
|
158
158
|
"reached the error below and can not continue. Note that "
|
|
159
159
|
"'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
|
|
160
160
|
"effect on failures in the middle of "
|
|
161
|
-
"computation.\n
|
|
161
|
+
f"computation.\n {e}"
|
|
162
162
|
)
|
|
163
163
|
warnings.warn(msg)
|
|
164
164
|
raise
|
|
@@ -145,6 +145,7 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
145
145
|
start_time=None,
|
|
146
146
|
end_time=None,
|
|
147
147
|
time_column=None,
|
|
148
|
+
additional_filters=None,
|
|
148
149
|
):
|
|
149
150
|
import dask.dataframe as dd
|
|
150
151
|
|
|
@@ -155,6 +156,7 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
155
156
|
end_time=end_time,
|
|
156
157
|
time_column=time_column,
|
|
157
158
|
index=False,
|
|
159
|
+
additional_filters=additional_filters,
|
|
158
160
|
)
|
|
159
161
|
|
|
160
162
|
return self._reset_index(df).persist()
|
|
@@ -42,6 +42,7 @@ def run_merge_job(
|
|
|
42
42
|
start_time=None,
|
|
43
43
|
end_time=None,
|
|
44
44
|
timestamp_for_filtering=None,
|
|
45
|
+
additional_filters=None,
|
|
45
46
|
):
|
|
46
47
|
name = vector.metadata.name
|
|
47
48
|
if not target or not hasattr(target, "to_dict"):
|
|
@@ -116,6 +117,7 @@ def run_merge_job(
|
|
|
116
117
|
"end_time": end_time,
|
|
117
118
|
"timestamp_for_filtering": timestamp_for_filtering,
|
|
118
119
|
"engine_args": engine_args,
|
|
120
|
+
"additional_filters": additional_filters,
|
|
119
121
|
},
|
|
120
122
|
inputs={"entity_rows": entity_rows} if entity_rows is not None else {},
|
|
121
123
|
)
|
|
@@ -114,12 +114,14 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
114
114
|
start_time=None,
|
|
115
115
|
end_time=None,
|
|
116
116
|
time_column=None,
|
|
117
|
+
additional_filters=None,
|
|
117
118
|
):
|
|
118
119
|
df = feature_set.to_dataframe(
|
|
119
120
|
columns=column_names,
|
|
120
121
|
start_time=start_time,
|
|
121
122
|
end_time=end_time,
|
|
122
123
|
time_column=time_column,
|
|
124
|
+
additional_filters=additional_filters,
|
|
123
125
|
)
|
|
124
126
|
if df.index.names[0]:
|
|
125
127
|
df.reset_index(inplace=True)
|
|
@@ -225,7 +225,12 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
225
225
|
start_time=None,
|
|
226
226
|
end_time=None,
|
|
227
227
|
time_column=None,
|
|
228
|
+
additional_filters=None,
|
|
228
229
|
):
|
|
230
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
231
|
+
additional_filters, self.__class__
|
|
232
|
+
)
|
|
233
|
+
|
|
229
234
|
source_kwargs = {}
|
|
230
235
|
if feature_set.spec.passthrough:
|
|
231
236
|
if not feature_set.spec.source:
|
|
@@ -547,9 +547,9 @@ class TensorboardLogger(Logger, Generic[DLTypes.WeightType]):
|
|
|
547
547
|
"inputs",
|
|
548
548
|
"parameters",
|
|
549
549
|
]:
|
|
550
|
-
text +=
|
|
551
|
-
property_name.capitalize()
|
|
552
|
-
self._markdown_print(value=property_value, tabs=2)
|
|
550
|
+
text += (
|
|
551
|
+
f"\n * **{property_name.capitalize()}**: "
|
|
552
|
+
f"{self._markdown_print(value=property_value, tabs=2)}"
|
|
553
553
|
)
|
|
554
554
|
else:
|
|
555
555
|
for property_name, property_value in self._extract_epoch_results().items():
|
|
@@ -614,13 +614,8 @@ class TensorboardLogger(Logger, Generic[DLTypes.WeightType]):
|
|
|
614
614
|
:return: The generated link.
|
|
615
615
|
"""
|
|
616
616
|
return (
|
|
617
|
-
'<a href="{}/{}/{}
|
|
618
|
-
|
|
619
|
-
config.ui.projects_prefix,
|
|
620
|
-
context.project,
|
|
621
|
-
context.uid,
|
|
622
|
-
link_text,
|
|
623
|
-
)
|
|
617
|
+
f'<a href="{config.resolve_ui_url()}/{config.ui.projects_prefix}/{context.project}'
|
|
618
|
+
f'/jobs/monitor/{context.uid}/overview" target="_blank">{link_text}</a>'
|
|
624
619
|
)
|
|
625
620
|
|
|
626
621
|
@staticmethod
|
mlrun/kfpops.py
CHANGED
|
@@ -33,7 +33,6 @@ from .utils import (
|
|
|
33
33
|
get_in,
|
|
34
34
|
get_workflow_url,
|
|
35
35
|
is_ipython,
|
|
36
|
-
is_legacy_artifact,
|
|
37
36
|
logger,
|
|
38
37
|
run_keys,
|
|
39
38
|
version,
|
|
@@ -121,14 +120,8 @@ def get_kfp_outputs(artifacts, labels, project):
|
|
|
121
120
|
outputs = []
|
|
122
121
|
out_dict = {}
|
|
123
122
|
for output in artifacts:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
# The spec in a legacy artifact is contained in the main object, so using this assignment saves us a lot
|
|
127
|
-
# of if/else in the rest of this function.
|
|
128
|
-
output_spec = output
|
|
129
|
-
else:
|
|
130
|
-
key = output.get("metadata")["key"]
|
|
131
|
-
output_spec = output.get("spec", {})
|
|
123
|
+
key = output.get("metadata")["key"]
|
|
124
|
+
output_spec = output.get("spec", {})
|
|
132
125
|
|
|
133
126
|
target = output_spec.get("target_path", "")
|
|
134
127
|
target = output_spec.get("inline", target)
|
|
@@ -655,7 +648,9 @@ def add_default_env(k8s_client, cop):
|
|
|
655
648
|
)
|
|
656
649
|
)
|
|
657
650
|
|
|
658
|
-
auth_env_var =
|
|
651
|
+
auth_env_var = (
|
|
652
|
+
mlrun.common.runtimes.constants.FunctionEnvironmentVariables.auth_session
|
|
653
|
+
)
|
|
659
654
|
if auth_env_var in os.environ or "V3IO_ACCESS_KEY" in os.environ:
|
|
660
655
|
cop.container.add_env_variable(
|
|
661
656
|
k8s_client.V1EnvVar(
|
mlrun/launcher/base.py
CHANGED
|
@@ -403,7 +403,7 @@ class BaseLauncher(abc.ABC):
|
|
|
403
403
|
)
|
|
404
404
|
if (
|
|
405
405
|
run.status.state
|
|
406
|
-
in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
|
|
406
|
+
in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
|
|
407
407
|
):
|
|
408
408
|
if runtime._is_remote and not runtime.is_child:
|
|
409
409
|
logger.error(
|
mlrun/launcher/client.py
CHANGED
|
@@ -71,7 +71,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
|
|
|
71
71
|
):
|
|
72
72
|
run.metadata.labels["kind"] = runtime.kind
|
|
73
73
|
mlrun.runtimes.utils.enrich_run_labels(
|
|
74
|
-
run.metadata.labels, [mlrun.runtimes.constants.RunLabels.owner]
|
|
74
|
+
run.metadata.labels, [mlrun.common.runtimes.constants.RunLabels.owner]
|
|
75
75
|
)
|
|
76
76
|
if run.spec.output_path:
|
|
77
77
|
run.spec.output_path = run.spec.output_path.replace(
|
mlrun/lists.py
CHANGED
|
@@ -21,7 +21,7 @@ import mlrun.frameworks
|
|
|
21
21
|
from .artifacts import Artifact, dict_to_artifact
|
|
22
22
|
from .config import config
|
|
23
23
|
from .render import artifacts_to_html, runs_to_html
|
|
24
|
-
from .utils import flatten, get_artifact_target, get_in
|
|
24
|
+
from .utils import flatten, get_artifact_target, get_in
|
|
25
25
|
|
|
26
26
|
list_header = [
|
|
27
27
|
"project",
|
|
@@ -184,7 +184,7 @@ class ArtifactList(list):
|
|
|
184
184
|
"uri": ["uri", "uri"],
|
|
185
185
|
}
|
|
186
186
|
for artifact in self:
|
|
187
|
-
fields_index =
|
|
187
|
+
fields_index = 1
|
|
188
188
|
row = [get_in(artifact, v[fields_index], "") for k, v in head.items()]
|
|
189
189
|
artifact_uri = dict_to_artifact(artifact).uri
|
|
190
190
|
last_index = len(row) - 1
|
mlrun/model.py
CHANGED
|
@@ -33,7 +33,6 @@ from .utils import (
|
|
|
33
33
|
dict_to_json,
|
|
34
34
|
dict_to_yaml,
|
|
35
35
|
get_artifact_target,
|
|
36
|
-
is_legacy_artifact,
|
|
37
36
|
logger,
|
|
38
37
|
template_artifact_path,
|
|
39
38
|
)
|
|
@@ -766,6 +765,11 @@ class RunMetadata(ModelObj):
|
|
|
766
765
|
def iteration(self, iteration):
|
|
767
766
|
self._iteration = iteration
|
|
768
767
|
|
|
768
|
+
def is_workflow_runner(self):
|
|
769
|
+
if not self.labels:
|
|
770
|
+
return False
|
|
771
|
+
return self.labels.get("job-type", "") == "workflow-runner"
|
|
772
|
+
|
|
769
773
|
|
|
770
774
|
class HyperParamStrategies:
|
|
771
775
|
grid = "grid"
|
|
@@ -1218,6 +1222,19 @@ class RunStatus(ModelObj):
|
|
|
1218
1222
|
self.reason = reason
|
|
1219
1223
|
self.notifications = notifications or {}
|
|
1220
1224
|
|
|
1225
|
+
def is_failed(self) -> Optional[bool]:
|
|
1226
|
+
"""
|
|
1227
|
+
This method returns whether a run has failed.
|
|
1228
|
+
Returns none if state has yet to be defined. callee is responsible for handling None.
|
|
1229
|
+
(e.g wait for state to be defined)
|
|
1230
|
+
"""
|
|
1231
|
+
if not self.state:
|
|
1232
|
+
return None
|
|
1233
|
+
return self.state.casefold() in [
|
|
1234
|
+
mlrun.run.RunStatuses.failed.casefold(),
|
|
1235
|
+
mlrun.run.RunStatuses.error.casefold(),
|
|
1236
|
+
]
|
|
1237
|
+
|
|
1221
1238
|
|
|
1222
1239
|
class RunTemplate(ModelObj):
|
|
1223
1240
|
"""Run template"""
|
|
@@ -1417,11 +1434,14 @@ class RunObject(RunTemplate):
|
|
|
1417
1434
|
unknown_error = ""
|
|
1418
1435
|
if (
|
|
1419
1436
|
self.status.state
|
|
1420
|
-
in mlrun.runtimes.constants.RunStates.abortion_states()
|
|
1437
|
+
in mlrun.common.runtimes.constants.RunStates.abortion_states()
|
|
1421
1438
|
):
|
|
1422
1439
|
unknown_error = "Run was aborted"
|
|
1423
1440
|
|
|
1424
|
-
elif
|
|
1441
|
+
elif (
|
|
1442
|
+
self.status.state
|
|
1443
|
+
in mlrun.common.runtimes.constants.RunStates.error_states()
|
|
1444
|
+
):
|
|
1425
1445
|
unknown_error = "Unknown error"
|
|
1426
1446
|
|
|
1427
1447
|
return (
|
|
@@ -1459,7 +1479,7 @@ class RunObject(RunTemplate):
|
|
|
1459
1479
|
outputs = {k: v for k, v in self.status.results.items()}
|
|
1460
1480
|
if self.status.artifacts:
|
|
1461
1481
|
for a in self.status.artifacts:
|
|
1462
|
-
key = a["
|
|
1482
|
+
key = a["metadata"]["key"]
|
|
1463
1483
|
outputs[key] = get_artifact_target(a, self.metadata.project)
|
|
1464
1484
|
return outputs
|
|
1465
1485
|
|
|
@@ -1502,7 +1522,10 @@ class RunObject(RunTemplate):
|
|
|
1502
1522
|
|
|
1503
1523
|
def state(self):
|
|
1504
1524
|
"""current run state"""
|
|
1505
|
-
if
|
|
1525
|
+
if (
|
|
1526
|
+
self.status.state
|
|
1527
|
+
in mlrun.common.runtimes.constants.RunStates.terminal_states()
|
|
1528
|
+
):
|
|
1506
1529
|
return self.status.state
|
|
1507
1530
|
self.refresh()
|
|
1508
1531
|
return self.status.state or "unknown"
|
|
@@ -1564,7 +1587,7 @@ class RunObject(RunTemplate):
|
|
|
1564
1587
|
last_pull_log_time = None
|
|
1565
1588
|
logs_enabled = show_logs is not False
|
|
1566
1589
|
state = self.state()
|
|
1567
|
-
if state not in mlrun.runtimes.constants.RunStates.terminal_states():
|
|
1590
|
+
if state not in mlrun.common.runtimes.constants.RunStates.terminal_states():
|
|
1568
1591
|
logger.info(
|
|
1569
1592
|
f"run {self.metadata.name} is not completed yet, waiting for it to complete",
|
|
1570
1593
|
current_state=state,
|
|
@@ -1574,7 +1597,8 @@ class RunObject(RunTemplate):
|
|
|
1574
1597
|
if (
|
|
1575
1598
|
logs_enabled
|
|
1576
1599
|
and logs_interval
|
|
1577
|
-
and state
|
|
1600
|
+
and state
|
|
1601
|
+
not in mlrun.common.runtimes.constants.RunStates.terminal_states()
|
|
1578
1602
|
and (
|
|
1579
1603
|
last_pull_log_time is None
|
|
1580
1604
|
or (datetime.now() - last_pull_log_time).seconds > logs_interval
|
|
@@ -1583,7 +1607,7 @@ class RunObject(RunTemplate):
|
|
|
1583
1607
|
last_pull_log_time = datetime.now()
|
|
1584
1608
|
state, offset = self.logs(watch=False, offset=offset)
|
|
1585
1609
|
|
|
1586
|
-
if state in mlrun.runtimes.constants.RunStates.terminal_states():
|
|
1610
|
+
if state in mlrun.common.runtimes.constants.RunStates.terminal_states():
|
|
1587
1611
|
if logs_enabled and logs_interval:
|
|
1588
1612
|
self.logs(watch=False, offset=offset)
|
|
1589
1613
|
break
|
|
@@ -1595,7 +1619,10 @@ class RunObject(RunTemplate):
|
|
|
1595
1619
|
)
|
|
1596
1620
|
if logs_enabled and not logs_interval:
|
|
1597
1621
|
self.logs(watch=False)
|
|
1598
|
-
if
|
|
1622
|
+
if (
|
|
1623
|
+
raise_on_failure
|
|
1624
|
+
and state != mlrun.common.runtimes.constants.RunStates.completed
|
|
1625
|
+
):
|
|
1599
1626
|
raise mlrun.errors.MLRunRuntimeError(
|
|
1600
1627
|
f"Task {self.metadata.name} did not complete (state={state})"
|
|
1601
1628
|
)
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -22,9 +22,10 @@ import pandas as pd
|
|
|
22
22
|
|
|
23
23
|
import mlrun.artifacts
|
|
24
24
|
import mlrun.common.helpers
|
|
25
|
-
import mlrun.common.schemas.model_monitoring.constants as
|
|
25
|
+
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
26
26
|
import mlrun.feature_store
|
|
27
27
|
import mlrun.model_monitoring.application
|
|
28
|
+
import mlrun.model_monitoring.applications as mm_app
|
|
28
29
|
import mlrun.serving
|
|
29
30
|
from mlrun.data_types.infer import InferOptions, get_df_stats
|
|
30
31
|
from mlrun.utils import datetime_now, logger
|
|
@@ -48,7 +49,7 @@ def get_or_create_model_endpoint(
|
|
|
48
49
|
sample_set_statistics: dict[str, typing.Any] = None,
|
|
49
50
|
drift_threshold: float = None,
|
|
50
51
|
possible_drift_threshold: float = None,
|
|
51
|
-
monitoring_mode:
|
|
52
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
52
53
|
db_session=None,
|
|
53
54
|
) -> ModelEndpoint:
|
|
54
55
|
"""
|
|
@@ -128,7 +129,7 @@ def record_results(
|
|
|
128
129
|
context: typing.Optional[mlrun.MLClientCtx] = None,
|
|
129
130
|
infer_results_df: typing.Optional[pd.DataFrame] = None,
|
|
130
131
|
sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
|
|
131
|
-
monitoring_mode:
|
|
132
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.enabled,
|
|
132
133
|
# Deprecated arguments:
|
|
133
134
|
drift_threshold: typing.Optional[float] = None,
|
|
134
135
|
possible_drift_threshold: typing.Optional[float] = None,
|
|
@@ -282,7 +283,7 @@ def _model_endpoint_validations(
|
|
|
282
283
|
# drift and possible drift thresholds
|
|
283
284
|
if drift_threshold:
|
|
284
285
|
current_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
285
|
-
|
|
286
|
+
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD,
|
|
286
287
|
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
|
|
287
288
|
)
|
|
288
289
|
if current_drift_threshold != drift_threshold:
|
|
@@ -293,7 +294,7 @@ def _model_endpoint_validations(
|
|
|
293
294
|
|
|
294
295
|
if possible_drift_threshold:
|
|
295
296
|
current_possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
|
|
296
|
-
|
|
297
|
+
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
|
|
297
298
|
mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
|
|
298
299
|
)
|
|
299
300
|
if current_possible_drift_threshold != possible_drift_threshold:
|
|
@@ -332,14 +333,14 @@ def write_monitoring_df(
|
|
|
332
333
|
)
|
|
333
334
|
|
|
334
335
|
# Modify the DataFrame to the required structure that will be used later by the monitoring batch job
|
|
335
|
-
if
|
|
336
|
+
if mm_constants.EventFieldType.TIMESTAMP not in infer_results_df.columns:
|
|
336
337
|
# Initialize timestamp column with the current time
|
|
337
|
-
infer_results_df[
|
|
338
|
+
infer_results_df[mm_constants.EventFieldType.TIMESTAMP] = infer_datetime
|
|
338
339
|
|
|
339
340
|
# `endpoint_id` is the monitoring feature set entity and therefore it should be defined as the df index before
|
|
340
341
|
# the ingest process
|
|
341
|
-
infer_results_df[
|
|
342
|
-
infer_results_df.set_index(
|
|
342
|
+
infer_results_df[mm_constants.EventFieldType.ENDPOINT_ID] = endpoint_id
|
|
343
|
+
infer_results_df.set_index(mm_constants.EventFieldType.ENDPOINT_ID, inplace=True)
|
|
343
344
|
|
|
344
345
|
monitoring_feature_set.ingest(source=infer_results_df, overwrite=False)
|
|
345
346
|
|
|
@@ -355,7 +356,7 @@ def _generate_model_endpoint(
|
|
|
355
356
|
sample_set_statistics: dict[str, typing.Any],
|
|
356
357
|
drift_threshold: float,
|
|
357
358
|
possible_drift_threshold: float,
|
|
358
|
-
monitoring_mode:
|
|
359
|
+
monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
|
|
359
360
|
) -> ModelEndpoint:
|
|
360
361
|
"""
|
|
361
362
|
Write a new model endpoint record.
|
|
@@ -394,11 +395,11 @@ def _generate_model_endpoint(
|
|
|
394
395
|
model_endpoint.spec.model_class = "drift-analysis"
|
|
395
396
|
if drift_threshold:
|
|
396
397
|
model_endpoint.spec.monitor_configuration[
|
|
397
|
-
|
|
398
|
+
mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD
|
|
398
399
|
] = drift_threshold
|
|
399
400
|
if possible_drift_threshold:
|
|
400
401
|
model_endpoint.spec.monitor_configuration[
|
|
401
|
-
|
|
402
|
+
mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
|
|
402
403
|
] = possible_drift_threshold
|
|
403
404
|
|
|
404
405
|
model_endpoint.spec.monitoring_mode = monitoring_mode
|
|
@@ -589,7 +590,10 @@ def _create_model_monitoring_function_base(
|
|
|
589
590
|
project: str,
|
|
590
591
|
func: typing.Union[str, None] = None,
|
|
591
592
|
application_class: typing.Union[
|
|
592
|
-
str,
|
|
593
|
+
str,
|
|
594
|
+
mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
|
|
595
|
+
mm_app.ModelMonitoringApplicationBaseV2,
|
|
596
|
+
None,
|
|
593
597
|
] = None,
|
|
594
598
|
name: typing.Optional[str] = None,
|
|
595
599
|
image: typing.Optional[str] = None,
|
|
@@ -602,6 +606,20 @@ def _create_model_monitoring_function_base(
|
|
|
602
606
|
Note: this is an internal API only.
|
|
603
607
|
This function does not set the labels or mounts v3io.
|
|
604
608
|
"""
|
|
609
|
+
if isinstance(
|
|
610
|
+
application_class,
|
|
611
|
+
mlrun.model_monitoring.application.ModelMonitoringApplicationBase,
|
|
612
|
+
):
|
|
613
|
+
warnings.warn(
|
|
614
|
+
"The `ModelMonitoringApplicationBase` class is deprecated from version 1.7.0, "
|
|
615
|
+
"please use `ModelMonitoringApplicationBaseV2`. It will be removed in 1.9.0.",
|
|
616
|
+
FutureWarning,
|
|
617
|
+
)
|
|
618
|
+
if name in mm_constants.MonitoringFunctionNames.list():
|
|
619
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
620
|
+
f"An application cannot have the following names: "
|
|
621
|
+
f"{mm_constants.MonitoringFunctionNames.list()}"
|
|
622
|
+
)
|
|
605
623
|
if func is None:
|
|
606
624
|
func = ""
|
|
607
625
|
func_obj = typing.cast(
|
|
@@ -618,14 +636,19 @@ def _create_model_monitoring_function_base(
|
|
|
618
636
|
),
|
|
619
637
|
)
|
|
620
638
|
graph = func_obj.set_topology(mlrun.serving.states.StepKinds.flow)
|
|
639
|
+
prepare_step = graph.to(
|
|
640
|
+
class_name="mlrun.model_monitoring.applications._application_steps._PrepareMonitoringEvent",
|
|
641
|
+
name="PrepareMonitoringEvent",
|
|
642
|
+
application_name=name,
|
|
643
|
+
)
|
|
621
644
|
if isinstance(application_class, str):
|
|
622
|
-
|
|
645
|
+
app_step = prepare_step.to(class_name=application_class, **application_kwargs)
|
|
623
646
|
else:
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
class_name="mlrun.model_monitoring.
|
|
647
|
+
app_step = prepare_step.to(class_name=application_class)
|
|
648
|
+
app_step.to(
|
|
649
|
+
class_name="mlrun.model_monitoring.applications._application_steps._PushToMonitoringWriter",
|
|
627
650
|
name="PushToMonitoringWriter",
|
|
628
651
|
project=project,
|
|
629
|
-
writer_application_name=
|
|
652
|
+
writer_application_name=mm_constants.MonitoringFunctionNames.WRITER,
|
|
630
653
|
).respond()
|
|
631
654
|
return func_obj
|