mlrun 1.6.0rc11__py3-none-any.whl → 1.6.0rc13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +2 -2
- mlrun/config.py +2 -2
- mlrun/datastore/azure_blob.py +25 -17
- mlrun/datastore/datastore_profile.py +45 -0
- mlrun/datastore/dbfs_store.py +3 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/s3.py +4 -4
- mlrun/datastore/sources.py +4 -4
- mlrun/datastore/targets.py +13 -3
- mlrun/feature_store/retrieval/base.py +24 -0
- mlrun/feature_store/retrieval/dask_merger.py +8 -0
- mlrun/feature_store/retrieval/local_merger.py +9 -2
- mlrun/model_monitoring/api.py +37 -6
- mlrun/model_monitoring/controller.py +6 -5
- mlrun/model_monitoring/controller_handler.py +1 -2
- mlrun/model_monitoring/helpers.py +59 -1
- mlrun/projects/operations.py +24 -10
- mlrun/projects/project.py +52 -32
- mlrun/runtimes/databricks_job/databricks_runtime.py +128 -62
- mlrun/runtimes/databricks_job/databricks_wrapper.py +0 -1
- mlrun/runtimes/function.py +8 -1
- mlrun/runtimes/kubejob.py +7 -1
- mlrun/serving/server.py +18 -1
- mlrun/serving/states.py +5 -1
- mlrun/utils/helpers.py +14 -10
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/METADATA +29 -29
- {mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/RECORD +32 -32
- {mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/LICENSE +0 -0
- {mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/WHEEL +0 -0
- {mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.0rc11.dist-info → mlrun-1.6.0rc13.dist-info}/top_level.txt +0 -0
mlrun/__main__.py
CHANGED
|
@@ -547,7 +547,7 @@ def build(
|
|
|
547
547
|
archive = archive or mlconf.default_archive
|
|
548
548
|
if archive:
|
|
549
549
|
src = b.source or "./"
|
|
550
|
-
logger.info(f"
|
|
550
|
+
logger.info(f"Uploading data from {src} to {archive}")
|
|
551
551
|
target = archive if archive.endswith("/") else archive + "/"
|
|
552
552
|
target += f"src-{meta.project}-{meta.name}-{meta.tag or 'latest'}.tar.gz"
|
|
553
553
|
mlrun.datastore.utils.upload_tarball(src, target)
|
|
@@ -582,7 +582,7 @@ def build(
|
|
|
582
582
|
fp.write(image)
|
|
583
583
|
with open("/tmp/fullimage", "w") as fp:
|
|
584
584
|
fp.write(full_image)
|
|
585
|
-
print("
|
|
585
|
+
print("Full image path = ", full_image)
|
|
586
586
|
|
|
587
587
|
print(f"Function built, state={state} image={image}")
|
|
588
588
|
else:
|
mlrun/config.py
CHANGED
|
@@ -251,8 +251,8 @@ default_config = {
|
|
|
251
251
|
},
|
|
252
252
|
"port": 8080,
|
|
253
253
|
"dirpath": expanduser("~/.mlrun/db"),
|
|
254
|
+
# in production envs we recommend to use a real db (e.g. mysql)
|
|
254
255
|
"dsn": "sqlite:///db/mlrun.db?check_same_thread=false",
|
|
255
|
-
"old_dsn": "",
|
|
256
256
|
"debug": False,
|
|
257
257
|
"user": "",
|
|
258
258
|
"password": "",
|
|
@@ -1130,7 +1130,7 @@ class Config:
|
|
|
1130
1130
|
|
|
1131
1131
|
def is_explicit_ack(self) -> bool:
|
|
1132
1132
|
return self.httpdb.nuclio.explicit_ack == "enabled" and (
|
|
1133
|
-
not self.nuclio_version or self.nuclio_version >= "1.12.
|
|
1133
|
+
not self.nuclio_version or self.nuclio_version >= "1.12.9"
|
|
1134
1134
|
)
|
|
1135
1135
|
|
|
1136
1136
|
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -15,19 +15,21 @@
|
|
|
15
15
|
import time
|
|
16
16
|
from pathlib import Path
|
|
17
17
|
|
|
18
|
-
import fsspec
|
|
19
18
|
from azure.storage.blob import BlobServiceClient
|
|
19
|
+
from fsspec.registry import get_filesystem_class
|
|
20
20
|
|
|
21
21
|
import mlrun.errors
|
|
22
22
|
from mlrun.errors import err_to_str
|
|
23
23
|
|
|
24
|
-
from .base import DataStore, FileStats
|
|
24
|
+
from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
|
|
25
25
|
|
|
26
26
|
# Azure blobs will be represented with the following URL: az://<container name>. The storage account is already
|
|
27
27
|
# pointed to by the connection string, so the user is not expected to specify it in any way.
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class AzureBlobStore(DataStore):
|
|
31
|
+
using_bucket = True
|
|
32
|
+
|
|
31
33
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
32
34
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
33
35
|
self.bsc = None
|
|
@@ -50,25 +52,31 @@ class AzureBlobStore(DataStore):
|
|
|
50
52
|
f"Azure adlfs not installed, run pip install adlfs, {err_to_str(exc)}"
|
|
51
53
|
)
|
|
52
54
|
return None
|
|
53
|
-
|
|
55
|
+
# in order to support az and wasbs kinds.
|
|
56
|
+
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
57
|
+
self._filesystem = makeDatastoreSchemaSanitizer(
|
|
58
|
+
filesystem_class,
|
|
59
|
+
using_bucket=self.using_bucket,
|
|
60
|
+
**self.get_storage_options(),
|
|
61
|
+
)
|
|
54
62
|
return self._filesystem
|
|
55
63
|
|
|
56
64
|
def get_storage_options(self):
|
|
57
65
|
return dict(
|
|
58
|
-
account_name=self._get_secret_or_env("
|
|
59
|
-
or self._get_secret_or_env("
|
|
60
|
-
account_key=self._get_secret_or_env("
|
|
61
|
-
or self._get_secret_or_env("
|
|
62
|
-
connection_string=self._get_secret_or_env("
|
|
63
|
-
or self._get_secret_or_env("
|
|
64
|
-
tenant_id=self._get_secret_or_env("
|
|
65
|
-
or self._get_secret_or_env("
|
|
66
|
-
client_id=self._get_secret_or_env("
|
|
67
|
-
or self._get_secret_or_env("
|
|
68
|
-
client_secret=self._get_secret_or_env("
|
|
69
|
-
or self._get_secret_or_env("
|
|
70
|
-
sas_token=self._get_secret_or_env("
|
|
71
|
-
or self._get_secret_or_env("
|
|
66
|
+
account_name=self._get_secret_or_env("account_name")
|
|
67
|
+
or self._get_secret_or_env("AZURE_STORAGE_ACCOUNT_NAME"),
|
|
68
|
+
account_key=self._get_secret_or_env("account_key")
|
|
69
|
+
or self._get_secret_or_env("AZURE_STORAGE_KEY"),
|
|
70
|
+
connection_string=self._get_secret_or_env("connection_string")
|
|
71
|
+
or self._get_secret_or_env("AZURE_STORAGE_CONNECTION_STRING"),
|
|
72
|
+
tenant_id=self._get_secret_or_env("tenant_id")
|
|
73
|
+
or self._get_secret_or_env("AZURE_STORAGE_TENANT_ID"),
|
|
74
|
+
client_id=self._get_secret_or_env("client_id")
|
|
75
|
+
or self._get_secret_or_env("AZURE_STORAGE_CLIENT_ID"),
|
|
76
|
+
client_secret=self._get_secret_or_env("client_secret")
|
|
77
|
+
or self._get_secret_or_env("AZURE_STORAGE_CLIENT_SECRET"),
|
|
78
|
+
sas_token=self._get_secret_or_env("sas_token")
|
|
79
|
+
or self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN"),
|
|
72
80
|
credential=self._get_secret_or_env("credential"),
|
|
73
81
|
)
|
|
74
82
|
|
|
@@ -241,6 +241,51 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
241
241
|
return res if res else None
|
|
242
242
|
|
|
243
243
|
|
|
244
|
+
class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
245
|
+
type: str = pydantic.Field("az")
|
|
246
|
+
_private_attributes = (
|
|
247
|
+
"connection_string",
|
|
248
|
+
"account_key",
|
|
249
|
+
"client_secret",
|
|
250
|
+
"sas_token",
|
|
251
|
+
"credential",
|
|
252
|
+
)
|
|
253
|
+
connection_string: typing.Optional[str] = None
|
|
254
|
+
account_name: typing.Optional[str] = None
|
|
255
|
+
account_key: typing.Optional[str] = None
|
|
256
|
+
tenant_id: typing.Optional[str] = None
|
|
257
|
+
client_id: typing.Optional[str] = None
|
|
258
|
+
client_secret: typing.Optional[str] = None
|
|
259
|
+
sas_token: typing.Optional[str] = None
|
|
260
|
+
credential: typing.Optional[str] = None
|
|
261
|
+
|
|
262
|
+
def url(self, subpath) -> str:
|
|
263
|
+
if subpath.startswith("/"):
|
|
264
|
+
# in azure the path after schema is starts with bucket, wherefore it should not start with "/".
|
|
265
|
+
subpath = subpath[1:]
|
|
266
|
+
return f"az://{subpath}"
|
|
267
|
+
|
|
268
|
+
def secrets(self) -> dict:
|
|
269
|
+
res = {}
|
|
270
|
+
if self.connection_string:
|
|
271
|
+
res["connection_string"] = self.connection_string
|
|
272
|
+
if self.account_name:
|
|
273
|
+
res["account_name"] = self.account_name
|
|
274
|
+
if self.account_key:
|
|
275
|
+
res["account_key"] = self.account_key
|
|
276
|
+
if self.tenant_id:
|
|
277
|
+
res["tenant_id"] = self.tenant_id
|
|
278
|
+
if self.client_id:
|
|
279
|
+
res["client_id"] = self.client_id
|
|
280
|
+
if self.client_secret:
|
|
281
|
+
res["client_secret"] = self.client_secret
|
|
282
|
+
if self.sas_token:
|
|
283
|
+
res["sas_token"] = self.sas_token
|
|
284
|
+
if self.credential:
|
|
285
|
+
res["credential"] = self.credential
|
|
286
|
+
return res if res else None
|
|
287
|
+
|
|
288
|
+
|
|
244
289
|
class DatastoreProfile2Json(pydantic.BaseModel):
|
|
245
290
|
@staticmethod
|
|
246
291
|
def _to_json(attributes):
|
mlrun/datastore/dbfs_store.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import pathlib
|
|
16
16
|
|
|
17
17
|
from fsspec.implementations.dbfs import DatabricksFile, DatabricksFileSystem
|
|
18
|
+
from fsspec.registry import get_filesystem_class
|
|
18
19
|
|
|
19
20
|
import mlrun.errors
|
|
20
21
|
|
|
@@ -86,9 +87,10 @@ class DBFSStore(DataStore):
|
|
|
86
87
|
|
|
87
88
|
def get_filesystem(self, silent=True):
|
|
88
89
|
"""return fsspec file system object, if supported"""
|
|
90
|
+
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
89
91
|
if not self._filesystem:
|
|
90
92
|
self._filesystem = makeDatastoreSchemaSanitizer(
|
|
91
|
-
cls=
|
|
93
|
+
cls=filesystem_class,
|
|
92
94
|
using_bucket=False,
|
|
93
95
|
**self.get_storage_options(),
|
|
94
96
|
)
|
|
@@ -15,6 +15,8 @@ import os
|
|
|
15
15
|
import tempfile
|
|
16
16
|
from pathlib import Path
|
|
17
17
|
|
|
18
|
+
from fsspec.registry import get_filesystem_class
|
|
19
|
+
|
|
18
20
|
import mlrun.errors
|
|
19
21
|
from mlrun.utils import logger
|
|
20
22
|
|
|
@@ -73,8 +75,9 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
73
75
|
"Google gcsfs not installed, run pip install gcsfs"
|
|
74
76
|
) from exc
|
|
75
77
|
return None
|
|
78
|
+
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
76
79
|
self._filesystem = makeDatastoreSchemaSanitizer(
|
|
77
|
-
|
|
80
|
+
filesystem_class,
|
|
78
81
|
using_bucket=self.using_bucket,
|
|
79
82
|
**self.get_storage_options(),
|
|
80
83
|
)
|
mlrun/datastore/s3.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import time
|
|
16
16
|
|
|
17
17
|
import boto3
|
|
18
|
+
from fsspec.registry import get_filesystem_class
|
|
18
19
|
|
|
19
20
|
import mlrun.errors
|
|
20
21
|
|
|
@@ -113,17 +114,16 @@ class S3Store(DataStore):
|
|
|
113
114
|
if self._filesystem:
|
|
114
115
|
return self._filesystem
|
|
115
116
|
try:
|
|
116
|
-
# noqa
|
|
117
|
-
import s3fs
|
|
117
|
+
import s3fs # noqa
|
|
118
118
|
except ImportError as exc:
|
|
119
119
|
if not silent:
|
|
120
120
|
raise ImportError(
|
|
121
121
|
"AWS s3fs not installed, run pip install s3fs"
|
|
122
122
|
) from exc
|
|
123
123
|
return None
|
|
124
|
-
|
|
124
|
+
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
125
125
|
self._filesystem = makeDatastoreSchemaSanitizer(
|
|
126
|
-
|
|
126
|
+
filesystem_class,
|
|
127
127
|
using_bucket=self.using_bucket,
|
|
128
128
|
**self.get_storage_options(),
|
|
129
129
|
)
|
mlrun/datastore/sources.py
CHANGED
|
@@ -177,7 +177,7 @@ class CSVSource(BaseSourceDriver):
|
|
|
177
177
|
parse_dates.append(time_field)
|
|
178
178
|
|
|
179
179
|
data_item = mlrun.store_manager.object(self.path)
|
|
180
|
-
if self.path.startswith("ds://"):
|
|
180
|
+
if self.path and self.path.startswith("ds://"):
|
|
181
181
|
store, path = mlrun.store_manager.get_or_create_store(self.path)
|
|
182
182
|
path = store.url + path
|
|
183
183
|
else:
|
|
@@ -193,7 +193,7 @@ class CSVSource(BaseSourceDriver):
|
|
|
193
193
|
)
|
|
194
194
|
|
|
195
195
|
def get_spark_options(self):
|
|
196
|
-
if self.path.startswith("ds://"):
|
|
196
|
+
if self.path and self.path.startswith("ds://"):
|
|
197
197
|
store, path = mlrun.store_manager.get_or_create_store(self.path)
|
|
198
198
|
path = store.url + path
|
|
199
199
|
result = {
|
|
@@ -340,7 +340,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
340
340
|
attributes["context"] = context
|
|
341
341
|
|
|
342
342
|
data_item = mlrun.store_manager.object(self.path)
|
|
343
|
-
if self.path.startswith("ds://"):
|
|
343
|
+
if self.path and self.path.startswith("ds://"):
|
|
344
344
|
store, path = mlrun.store_manager.get_or_create_store(self.path)
|
|
345
345
|
path = store.url + path
|
|
346
346
|
else:
|
|
@@ -357,7 +357,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
357
357
|
)
|
|
358
358
|
|
|
359
359
|
def get_spark_options(self):
|
|
360
|
-
if self.path.startswith("ds://"):
|
|
360
|
+
if self.path and self.path.startswith("ds://"):
|
|
361
361
|
store, path = mlrun.store_manager.get_or_create_store(self.path)
|
|
362
362
|
path = store.url + path
|
|
363
363
|
result = {
|
mlrun/datastore/targets.py
CHANGED
|
@@ -877,7 +877,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
877
877
|
else:
|
|
878
878
|
storage_options = storage_options or self.storage_options
|
|
879
879
|
|
|
880
|
-
graph.add_step(
|
|
880
|
+
step = graph.add_step(
|
|
881
881
|
name=self.name or "ParquetTarget",
|
|
882
882
|
after=after,
|
|
883
883
|
graph_shape="cylinder",
|
|
@@ -894,6 +894,16 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
894
894
|
**self.attributes,
|
|
895
895
|
)
|
|
896
896
|
|
|
897
|
+
original_to_dict = step.to_dict
|
|
898
|
+
|
|
899
|
+
def delete_update_last_written(*arg, **kargs):
|
|
900
|
+
result = original_to_dict(*arg, **kargs)
|
|
901
|
+
del result["class_args"]["update_last_written"]
|
|
902
|
+
return result
|
|
903
|
+
|
|
904
|
+
# update_last_written is not serializable (ML-5108)
|
|
905
|
+
step.to_dict = delete_update_last_written
|
|
906
|
+
|
|
897
907
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
898
908
|
partition_cols = []
|
|
899
909
|
if timestamp_key:
|
|
@@ -912,7 +922,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
912
922
|
if unit == time_partitioning_granularity:
|
|
913
923
|
break
|
|
914
924
|
|
|
915
|
-
if self.path.startswith("ds://"):
|
|
925
|
+
if self.path and self.path.startswith("ds://"):
|
|
916
926
|
store, path = mlrun.store_manager.get_or_create_store(
|
|
917
927
|
self.get_target_path()
|
|
918
928
|
)
|
|
@@ -1054,7 +1064,7 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1054
1064
|
)
|
|
1055
1065
|
|
|
1056
1066
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1057
|
-
if self.path.startswith("ds://"):
|
|
1067
|
+
if self.path and self.path.startswith("ds://"):
|
|
1058
1068
|
store, path = mlrun.store_manager.get_or_create_store(
|
|
1059
1069
|
self.get_target_path()
|
|
1060
1070
|
)
|
|
@@ -310,6 +310,7 @@ class BaseMerger(abc.ABC):
|
|
|
310
310
|
"start_time and end_time can only be provided in conjunction with "
|
|
311
311
|
"a timestamp column, or when the at least one feature_set has a timestamp key"
|
|
312
312
|
)
|
|
313
|
+
|
|
313
314
|
# join the feature data frames
|
|
314
315
|
result_timestamp = self.merge(
|
|
315
316
|
entity_timestamp_column=entity_timestamp_column,
|
|
@@ -383,6 +384,29 @@ class BaseMerger(abc.ABC):
|
|
|
383
384
|
def _unpersist_df(self, df):
|
|
384
385
|
pass
|
|
385
386
|
|
|
387
|
+
def _normalize_timestamp_column(
|
|
388
|
+
self,
|
|
389
|
+
entity_timestamp_column,
|
|
390
|
+
reference_df,
|
|
391
|
+
featureset_timestamp,
|
|
392
|
+
featureset_df,
|
|
393
|
+
featureset_name,
|
|
394
|
+
):
|
|
395
|
+
reference_df_timestamp_type = reference_df[entity_timestamp_column].dtype.name
|
|
396
|
+
featureset_df_timestamp_type = featureset_df[featureset_timestamp].dtype.name
|
|
397
|
+
|
|
398
|
+
if reference_df_timestamp_type != featureset_df_timestamp_type:
|
|
399
|
+
logger.info(
|
|
400
|
+
f"Merger detected timestamp resolution incompatibility between feature set {featureset_name} and "
|
|
401
|
+
f"others: {reference_df_timestamp_type} and {featureset_df_timestamp_type}. Converting feature set "
|
|
402
|
+
f"timestamp column '{featureset_timestamp}' to type {reference_df_timestamp_type}."
|
|
403
|
+
)
|
|
404
|
+
featureset_df[featureset_timestamp] = featureset_df[
|
|
405
|
+
featureset_timestamp
|
|
406
|
+
].astype(reference_df_timestamp_type)
|
|
407
|
+
|
|
408
|
+
return featureset_df
|
|
409
|
+
|
|
386
410
|
def merge(
|
|
387
411
|
self,
|
|
388
412
|
entity_timestamp_column: str,
|
|
@@ -52,6 +52,14 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
52
52
|
):
|
|
53
53
|
from dask.dataframe.multi import merge_asof
|
|
54
54
|
|
|
55
|
+
featureset_df = self._normalize_timestamp_column(
|
|
56
|
+
entity_timestamp_column,
|
|
57
|
+
entity_df,
|
|
58
|
+
featureset_timestamp,
|
|
59
|
+
featureset_df,
|
|
60
|
+
featureset_name,
|
|
61
|
+
)
|
|
62
|
+
|
|
55
63
|
def sort_partition(partition, timestamp):
|
|
56
64
|
return partition.sort_values(timestamp)
|
|
57
65
|
|
|
@@ -32,11 +32,10 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
32
32
|
entity_timestamp_column: str,
|
|
33
33
|
featureset_name,
|
|
34
34
|
featureset_timstamp,
|
|
35
|
-
featureset_df
|
|
35
|
+
featureset_df,
|
|
36
36
|
left_keys: list,
|
|
37
37
|
right_keys: list,
|
|
38
38
|
):
|
|
39
|
-
|
|
40
39
|
index_col_not_in_entity = "index" not in entity_df.columns
|
|
41
40
|
index_col_not_in_featureset = "index" not in featureset_df.columns
|
|
42
41
|
entity_df[entity_timestamp_column] = pd.to_datetime(
|
|
@@ -48,6 +47,14 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
48
47
|
entity_df.sort_values(by=entity_timestamp_column, inplace=True)
|
|
49
48
|
featureset_df.sort_values(by=featureset_timstamp, inplace=True)
|
|
50
49
|
|
|
50
|
+
featureset_df = self._normalize_timestamp_column(
|
|
51
|
+
entity_timestamp_column,
|
|
52
|
+
entity_df,
|
|
53
|
+
featureset_timstamp,
|
|
54
|
+
featureset_df,
|
|
55
|
+
featureset_name,
|
|
56
|
+
)
|
|
57
|
+
|
|
51
58
|
merged_df = pd.merge_asof(
|
|
52
59
|
entity_df,
|
|
53
60
|
featureset_df,
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
import datetime
|
|
17
16
|
import hashlib
|
|
@@ -30,6 +29,7 @@ from mlrun.utils import logger
|
|
|
30
29
|
|
|
31
30
|
from .batch import VirtualDrift
|
|
32
31
|
from .features_drift_table import FeaturesDriftTablePlot
|
|
32
|
+
from .helpers import bump_model_endpoint_last_request
|
|
33
33
|
from .model_endpoint import ModelEndpoint
|
|
34
34
|
|
|
35
35
|
# A union of all supported dataset types:
|
|
@@ -125,13 +125,14 @@ def record_results(
|
|
|
125
125
|
model_endpoint_name: str,
|
|
126
126
|
endpoint_id: str = "",
|
|
127
127
|
function_name: str = "",
|
|
128
|
-
context: mlrun.MLClientCtx = None,
|
|
129
|
-
infer_results_df: pd.DataFrame = None,
|
|
130
|
-
sample_set_statistics: typing.
|
|
128
|
+
context: typing.Optional[mlrun.MLClientCtx] = None,
|
|
129
|
+
infer_results_df: typing.Optional[pd.DataFrame] = None,
|
|
130
|
+
sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
|
|
131
131
|
monitoring_mode: ModelMonitoringMode = ModelMonitoringMode.enabled,
|
|
132
|
-
drift_threshold: float = None,
|
|
133
|
-
possible_drift_threshold: float = None,
|
|
132
|
+
drift_threshold: typing.Optional[float] = None,
|
|
133
|
+
possible_drift_threshold: typing.Optional[float] = None,
|
|
134
134
|
trigger_monitoring_job: bool = False,
|
|
135
|
+
last_in_batch_set: typing.Optional[bool] = True,
|
|
135
136
|
artifacts_tag: str = "",
|
|
136
137
|
default_batch_image="mlrun/mlrun",
|
|
137
138
|
) -> ModelEndpoint:
|
|
@@ -164,6 +165,14 @@ def record_results(
|
|
|
164
165
|
:param possible_drift_threshold: The threshold of which to mark possible drifts.
|
|
165
166
|
:param trigger_monitoring_job: If true, run the batch drift job. If not exists, the monitoring batch function
|
|
166
167
|
will be registered through MLRun API with the provided image.
|
|
168
|
+
:param last_in_batch_set: This flag can (and should only) be used when the model endpoint does not have
|
|
169
|
+
model-monitoring set.
|
|
170
|
+
If set to `True` (the default), this flag marks the current monitoring window
|
|
171
|
+
(on this monitoring endpoint) is completed - the data inferred so far is assumed
|
|
172
|
+
to be the total data for this monitoring window.
|
|
173
|
+
You may want to set this flag to `False` if you want to record multiple results in
|
|
174
|
+
close time proximity ("batch set"). In this case, set this flag to `False` on all
|
|
175
|
+
but the last batch in the set.
|
|
167
176
|
:param artifacts_tag: Tag to use for all the artifacts resulted from the function. Will be relevant
|
|
168
177
|
only if the monitoring batch job has been triggered.
|
|
169
178
|
|
|
@@ -186,6 +195,7 @@ def record_results(
|
|
|
186
195
|
monitoring_mode=monitoring_mode,
|
|
187
196
|
db_session=db,
|
|
188
197
|
)
|
|
198
|
+
logger.debug("Model endpoint", endpoint=model_endpoint.to_dict())
|
|
189
199
|
|
|
190
200
|
if infer_results_df is not None:
|
|
191
201
|
# Write the monitoring parquet to the relevant model endpoint context
|
|
@@ -195,6 +205,27 @@ def record_results(
|
|
|
195
205
|
infer_results_df=infer_results_df,
|
|
196
206
|
)
|
|
197
207
|
|
|
208
|
+
if model_endpoint.spec.stream_path == "":
|
|
209
|
+
if last_in_batch_set:
|
|
210
|
+
logger.info(
|
|
211
|
+
"Updating the last request time to mark the current monitoring window as completed",
|
|
212
|
+
project=project,
|
|
213
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
214
|
+
)
|
|
215
|
+
bump_model_endpoint_last_request(
|
|
216
|
+
project=project, model_endpoint=model_endpoint, db=db
|
|
217
|
+
)
|
|
218
|
+
else:
|
|
219
|
+
if last_in_batch_set is not None:
|
|
220
|
+
logger.warning(
|
|
221
|
+
"`last_in_batch_set` is not `None`, but the model endpoint has a stream path. "
|
|
222
|
+
"Ignoring `last_in_batch_set`, as it is relevant only when the model "
|
|
223
|
+
"endpoint does not have a model monitoring infrastructure in place (i.e. stream path is "
|
|
224
|
+
" empty). Set `last_in_batch_set` to `None` to resolve this warning.",
|
|
225
|
+
project=project,
|
|
226
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
227
|
+
)
|
|
228
|
+
|
|
198
229
|
if trigger_monitoring_job:
|
|
199
230
|
# Run the monitoring batch drift job
|
|
200
231
|
trigger_drift_batch_job(
|
|
@@ -371,7 +371,7 @@ class MonitoringApplicationController:
|
|
|
371
371
|
parquet_directory: str,
|
|
372
372
|
storage_options: dict,
|
|
373
373
|
model_monitoring_access_key: str,
|
|
374
|
-
):
|
|
374
|
+
) -> Optional[Tuple[str, Exception]]:
|
|
375
375
|
"""
|
|
376
376
|
Process a model endpoint and trigger the monitoring applications. This function running on different process
|
|
377
377
|
for each endpoint. In addition, this function will generate a parquet file that includes the relevant data
|
|
@@ -433,7 +433,7 @@ class MonitoringApplicationController:
|
|
|
433
433
|
start_time=start_infer_time,
|
|
434
434
|
end_time=end_infer_time,
|
|
435
435
|
)
|
|
436
|
-
|
|
436
|
+
continue
|
|
437
437
|
|
|
438
438
|
# Continue if not enough events provided since the deployment of the model endpoint
|
|
439
439
|
except FileNotFoundError:
|
|
@@ -442,7 +442,7 @@ class MonitoringApplicationController:
|
|
|
442
442
|
endpoint=endpoint[mm_constants.EventFieldType.UID],
|
|
443
443
|
min_required_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
|
|
444
444
|
)
|
|
445
|
-
|
|
445
|
+
continue
|
|
446
446
|
|
|
447
447
|
# Get the timestamp of the latest request:
|
|
448
448
|
latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
|
|
@@ -470,9 +470,10 @@ class MonitoringApplicationController:
|
|
|
470
470
|
model_monitoring_access_key=model_monitoring_access_key,
|
|
471
471
|
parquet_target_path=parquet_target_path,
|
|
472
472
|
)
|
|
473
|
-
except
|
|
473
|
+
except Exception as e:
|
|
474
474
|
logger.error(
|
|
475
|
-
|
|
475
|
+
"Encountered an exception",
|
|
476
|
+
endpoint_id=endpoint[mm_constants.EventFieldType.UID],
|
|
476
477
|
)
|
|
477
478
|
return endpoint_id, e
|
|
478
479
|
|
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
import mlrun
|
|
17
16
|
from mlrun.model_monitoring.controller import MonitoringApplicationController
|
|
@@ -29,4 +28,4 @@ def handler(context: mlrun.run.MLClientCtx):
|
|
|
29
28
|
)
|
|
30
29
|
monitor_app_controller.run()
|
|
31
30
|
if monitor_app_controller.endpoints_exceptions:
|
|
32
|
-
|
|
31
|
+
context.logger.error(monitor_app_controller.endpoints_exceptions)
|
|
@@ -11,13 +11,21 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
|
|
16
|
+
import datetime
|
|
17
17
|
import typing
|
|
18
18
|
|
|
19
|
+
import mlrun
|
|
19
20
|
import mlrun.common.model_monitoring.helpers
|
|
20
21
|
import mlrun.common.schemas
|
|
22
|
+
from mlrun.common.schemas.model_monitoring import EventFieldType
|
|
23
|
+
from mlrun.errors import MLRunInvalidArgumentError
|
|
24
|
+
from mlrun.model_monitoring.model_endpoint import ModelEndpoint
|
|
25
|
+
from mlrun.utils import logger
|
|
26
|
+
|
|
27
|
+
if typing.TYPE_CHECKING:
|
|
28
|
+
from mlrun.db.base import RunDBInterface
|
|
21
29
|
|
|
22
30
|
|
|
23
31
|
def get_stream_path(project: str = None, application_name: str = None):
|
|
@@ -89,3 +97,53 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
|
|
|
89
97
|
)
|
|
90
98
|
or mlrun.mlconf.model_endpoint_monitoring.endpoint_store_connection
|
|
91
99
|
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def bump_model_endpoint_last_request(
|
|
103
|
+
project: str,
|
|
104
|
+
model_endpoint: ModelEndpoint,
|
|
105
|
+
db: "RunDBInterface",
|
|
106
|
+
minutes_delta: int = 10, # TODO: move to config - should be the same as `batch_interval`
|
|
107
|
+
seconds_delta: int = 1,
|
|
108
|
+
) -> None:
|
|
109
|
+
"""
|
|
110
|
+
Update the last request field of the model endpoint to be after the current last request time.
|
|
111
|
+
|
|
112
|
+
:param project: Project name.
|
|
113
|
+
:param model_endpoint: Model endpoint object.
|
|
114
|
+
:param db: DB interface.
|
|
115
|
+
:param minutes_delta: Minutes delta to add to the last request time.
|
|
116
|
+
:param seconds_delta: Seconds delta to add to the last request time. This is mainly to ensure that the last
|
|
117
|
+
request time is strongly greater than the previous one (with respect to the window time)
|
|
118
|
+
after adding the minutes delta.
|
|
119
|
+
"""
|
|
120
|
+
if not model_endpoint.status.last_request:
|
|
121
|
+
logger.error(
|
|
122
|
+
"Model endpoint last request time is empty, cannot bump it.",
|
|
123
|
+
project=project,
|
|
124
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
125
|
+
)
|
|
126
|
+
raise MLRunInvalidArgumentError("Model endpoint last request time is empty")
|
|
127
|
+
|
|
128
|
+
bumped_last_request = (
|
|
129
|
+
datetime.datetime.fromisoformat(model_endpoint.status.last_request)
|
|
130
|
+
+ datetime.timedelta(
|
|
131
|
+
minutes=minutes_delta,
|
|
132
|
+
seconds=seconds_delta,
|
|
133
|
+
)
|
|
134
|
+
+ datetime.timedelta(
|
|
135
|
+
seconds=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs
|
|
136
|
+
)
|
|
137
|
+
).strftime(EventFieldType.TIME_FORMAT)
|
|
138
|
+
logger.info(
|
|
139
|
+
"Bumping model endpoint last request time",
|
|
140
|
+
project=project,
|
|
141
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
142
|
+
last_request=model_endpoint.status.last_request,
|
|
143
|
+
bumped_last_request=bumped_last_request,
|
|
144
|
+
)
|
|
145
|
+
db.patch_model_endpoint(
|
|
146
|
+
project=project,
|
|
147
|
+
endpoint_id=model_endpoint.metadata.uid,
|
|
148
|
+
attributes={EventFieldType.LAST_REQUEST: bumped_last_request},
|
|
149
|
+
)
|
mlrun/projects/operations.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
+
import warnings
|
|
15
16
|
from typing import Dict, List, Optional, Union
|
|
16
17
|
|
|
17
18
|
import kfp
|
|
@@ -270,10 +271,17 @@ def build_function(
|
|
|
270
271
|
e.g. extra_args="--skip-tls-verify --build-arg A=val"
|
|
271
272
|
:param force_build: Force building the image, even when no changes were made
|
|
272
273
|
"""
|
|
274
|
+
if not overwrite_build_params:
|
|
275
|
+
# TODO: change overwrite_build_params default to True in 1.8.0
|
|
276
|
+
warnings.warn(
|
|
277
|
+
"The `overwrite_build_params` parameter default will change from 'False' to 'True in 1.8.0.",
|
|
278
|
+
mlrun.utils.OverwriteBuildParamsWarning,
|
|
279
|
+
)
|
|
280
|
+
|
|
273
281
|
engine, function = _get_engine_and_function(function, project_object)
|
|
274
282
|
if function.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
|
|
275
283
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
276
|
-
"
|
|
284
|
+
"Cannot build use deploy_function()"
|
|
277
285
|
)
|
|
278
286
|
if engine == "kfp":
|
|
279
287
|
if overwrite_build_params:
|
|
@@ -291,15 +299,21 @@ def build_function(
|
|
|
291
299
|
skip_deployed=skip_deployed,
|
|
292
300
|
)
|
|
293
301
|
else:
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
302
|
+
# TODO: remove filter once overwrite_build_params default is changed to True in 1.8.0
|
|
303
|
+
with warnings.catch_warnings():
|
|
304
|
+
warnings.simplefilter(
|
|
305
|
+
"ignore", category=mlrun.utils.OverwriteBuildParamsWarning
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
function.build_config(
|
|
309
|
+
image=image,
|
|
310
|
+
base_image=base_image,
|
|
311
|
+
commands=commands,
|
|
312
|
+
secret=secret_name,
|
|
313
|
+
requirements=requirements,
|
|
314
|
+
overwrite=overwrite_build_params,
|
|
315
|
+
extra_args=extra_args,
|
|
316
|
+
)
|
|
303
317
|
ready = function.deploy(
|
|
304
318
|
watch=True,
|
|
305
319
|
with_mlrun=with_mlrun,
|