mlrun 1.6.0rc11__py3-none-any.whl → 1.6.0rc13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

mlrun/__main__.py CHANGED
@@ -547,7 +547,7 @@ def build(
547
547
  archive = archive or mlconf.default_archive
548
548
  if archive:
549
549
  src = b.source or "./"
550
- logger.info(f"uploading data from {src} to {archive}")
550
+ logger.info(f"Uploading data from {src} to {archive}")
551
551
  target = archive if archive.endswith("/") else archive + "/"
552
552
  target += f"src-{meta.project}-{meta.name}-{meta.tag or 'latest'}.tar.gz"
553
553
  mlrun.datastore.utils.upload_tarball(src, target)
@@ -582,7 +582,7 @@ def build(
582
582
  fp.write(image)
583
583
  with open("/tmp/fullimage", "w") as fp:
584
584
  fp.write(full_image)
585
- print("full image path = ", full_image)
585
+ print("Full image path = ", full_image)
586
586
 
587
587
  print(f"Function built, state={state} image={image}")
588
588
  else:
mlrun/config.py CHANGED
@@ -251,8 +251,8 @@ default_config = {
251
251
  },
252
252
  "port": 8080,
253
253
  "dirpath": expanduser("~/.mlrun/db"),
254
+ # in production envs we recommend to use a real db (e.g. mysql)
254
255
  "dsn": "sqlite:///db/mlrun.db?check_same_thread=false",
255
- "old_dsn": "",
256
256
  "debug": False,
257
257
  "user": "",
258
258
  "password": "",
@@ -1130,7 +1130,7 @@ class Config:
1130
1130
 
1131
1131
  def is_explicit_ack(self) -> bool:
1132
1132
  return self.httpdb.nuclio.explicit_ack == "enabled" and (
1133
- not self.nuclio_version or self.nuclio_version >= "1.12.7"
1133
+ not self.nuclio_version or self.nuclio_version >= "1.12.9"
1134
1134
  )
1135
1135
 
1136
1136
 
@@ -15,19 +15,21 @@
15
15
  import time
16
16
  from pathlib import Path
17
17
 
18
- import fsspec
19
18
  from azure.storage.blob import BlobServiceClient
19
+ from fsspec.registry import get_filesystem_class
20
20
 
21
21
  import mlrun.errors
22
22
  from mlrun.errors import err_to_str
23
23
 
24
- from .base import DataStore, FileStats
24
+ from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
25
25
 
26
26
  # Azure blobs will be represented with the following URL: az://<container name>. The storage account is already
27
27
  # pointed to by the connection string, so the user is not expected to specify it in any way.
28
28
 
29
29
 
30
30
  class AzureBlobStore(DataStore):
31
+ using_bucket = True
32
+
31
33
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
32
34
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
33
35
  self.bsc = None
@@ -50,25 +52,31 @@ class AzureBlobStore(DataStore):
50
52
  f"Azure adlfs not installed, run pip install adlfs, {err_to_str(exc)}"
51
53
  )
52
54
  return None
53
- self._filesystem = fsspec.filesystem(self.kind, **self.get_storage_options())
55
+ # in order to support az and wasbs kinds.
56
+ filesystem_class = get_filesystem_class(protocol=self.kind)
57
+ self._filesystem = makeDatastoreSchemaSanitizer(
58
+ filesystem_class,
59
+ using_bucket=self.using_bucket,
60
+ **self.get_storage_options(),
61
+ )
54
62
  return self._filesystem
55
63
 
56
64
  def get_storage_options(self):
57
65
  return dict(
58
- account_name=self._get_secret_or_env("AZURE_STORAGE_ACCOUNT_NAME")
59
- or self._get_secret_or_env("account_name"),
60
- account_key=self._get_secret_or_env("AZURE_STORAGE_KEY")
61
- or self._get_secret_or_env("account_key"),
62
- connection_string=self._get_secret_or_env("AZURE_STORAGE_CONNECTION_STRING")
63
- or self._get_secret_or_env("connection_string"),
64
- tenant_id=self._get_secret_or_env("AZURE_STORAGE_TENANT_ID")
65
- or self._get_secret_or_env("tenant_id"),
66
- client_id=self._get_secret_or_env("AZURE_STORAGE_CLIENT_ID")
67
- or self._get_secret_or_env("client_id"),
68
- client_secret=self._get_secret_or_env("AZURE_STORAGE_CLIENT_SECRET")
69
- or self._get_secret_or_env("client_secret"),
70
- sas_token=self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN")
71
- or self._get_secret_or_env("sas_token"),
66
+ account_name=self._get_secret_or_env("account_name")
67
+ or self._get_secret_or_env("AZURE_STORAGE_ACCOUNT_NAME"),
68
+ account_key=self._get_secret_or_env("account_key")
69
+ or self._get_secret_or_env("AZURE_STORAGE_KEY"),
70
+ connection_string=self._get_secret_or_env("connection_string")
71
+ or self._get_secret_or_env("AZURE_STORAGE_CONNECTION_STRING"),
72
+ tenant_id=self._get_secret_or_env("tenant_id")
73
+ or self._get_secret_or_env("AZURE_STORAGE_TENANT_ID"),
74
+ client_id=self._get_secret_or_env("client_id")
75
+ or self._get_secret_or_env("AZURE_STORAGE_CLIENT_ID"),
76
+ client_secret=self._get_secret_or_env("client_secret")
77
+ or self._get_secret_or_env("AZURE_STORAGE_CLIENT_SECRET"),
78
+ sas_token=self._get_secret_or_env("sas_token")
79
+ or self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN"),
72
80
  credential=self._get_secret_or_env("credential"),
73
81
  )
74
82
 
@@ -241,6 +241,51 @@ class DatastoreProfileGCS(DatastoreProfile):
241
241
  return res if res else None
242
242
 
243
243
 
244
+ class DatastoreProfileAzureBlob(DatastoreProfile):
245
+ type: str = pydantic.Field("az")
246
+ _private_attributes = (
247
+ "connection_string",
248
+ "account_key",
249
+ "client_secret",
250
+ "sas_token",
251
+ "credential",
252
+ )
253
+ connection_string: typing.Optional[str] = None
254
+ account_name: typing.Optional[str] = None
255
+ account_key: typing.Optional[str] = None
256
+ tenant_id: typing.Optional[str] = None
257
+ client_id: typing.Optional[str] = None
258
+ client_secret: typing.Optional[str] = None
259
+ sas_token: typing.Optional[str] = None
260
+ credential: typing.Optional[str] = None
261
+
262
+ def url(self, subpath) -> str:
263
+ if subpath.startswith("/"):
264
+ # in azure the path after schema is starts with bucket, wherefore it should not start with "/".
265
+ subpath = subpath[1:]
266
+ return f"az://{subpath}"
267
+
268
+ def secrets(self) -> dict:
269
+ res = {}
270
+ if self.connection_string:
271
+ res["connection_string"] = self.connection_string
272
+ if self.account_name:
273
+ res["account_name"] = self.account_name
274
+ if self.account_key:
275
+ res["account_key"] = self.account_key
276
+ if self.tenant_id:
277
+ res["tenant_id"] = self.tenant_id
278
+ if self.client_id:
279
+ res["client_id"] = self.client_id
280
+ if self.client_secret:
281
+ res["client_secret"] = self.client_secret
282
+ if self.sas_token:
283
+ res["sas_token"] = self.sas_token
284
+ if self.credential:
285
+ res["credential"] = self.credential
286
+ return res if res else None
287
+
288
+
244
289
  class DatastoreProfile2Json(pydantic.BaseModel):
245
290
  @staticmethod
246
291
  def _to_json(attributes):
@@ -15,6 +15,7 @@
15
15
  import pathlib
16
16
 
17
17
  from fsspec.implementations.dbfs import DatabricksFile, DatabricksFileSystem
18
+ from fsspec.registry import get_filesystem_class
18
19
 
19
20
  import mlrun.errors
20
21
 
@@ -86,9 +87,10 @@ class DBFSStore(DataStore):
86
87
 
87
88
  def get_filesystem(self, silent=True):
88
89
  """return fsspec file system object, if supported"""
90
+ filesystem_class = get_filesystem_class(protocol=self.kind)
89
91
  if not self._filesystem:
90
92
  self._filesystem = makeDatastoreSchemaSanitizer(
91
- cls=DatabricksFileSystemDisableCache,
93
+ cls=filesystem_class,
92
94
  using_bucket=False,
93
95
  **self.get_storage_options(),
94
96
  )
@@ -15,6 +15,8 @@ import os
15
15
  import tempfile
16
16
  from pathlib import Path
17
17
 
18
+ from fsspec.registry import get_filesystem_class
19
+
18
20
  import mlrun.errors
19
21
  from mlrun.utils import logger
20
22
 
@@ -73,8 +75,9 @@ class GoogleCloudStorageStore(DataStore):
73
75
  "Google gcsfs not installed, run pip install gcsfs"
74
76
  ) from exc
75
77
  return None
78
+ filesystem_class = get_filesystem_class(protocol=self.kind)
76
79
  self._filesystem = makeDatastoreSchemaSanitizer(
77
- gcsfs.core.GCSFileSystem,
80
+ filesystem_class,
78
81
  using_bucket=self.using_bucket,
79
82
  **self.get_storage_options(),
80
83
  )
mlrun/datastore/s3.py CHANGED
@@ -15,6 +15,7 @@
15
15
  import time
16
16
 
17
17
  import boto3
18
+ from fsspec.registry import get_filesystem_class
18
19
 
19
20
  import mlrun.errors
20
21
 
@@ -113,17 +114,16 @@ class S3Store(DataStore):
113
114
  if self._filesystem:
114
115
  return self._filesystem
115
116
  try:
116
- # noqa
117
- import s3fs
117
+ import s3fs # noqa
118
118
  except ImportError as exc:
119
119
  if not silent:
120
120
  raise ImportError(
121
121
  "AWS s3fs not installed, run pip install s3fs"
122
122
  ) from exc
123
123
  return None
124
-
124
+ filesystem_class = get_filesystem_class(protocol=self.kind)
125
125
  self._filesystem = makeDatastoreSchemaSanitizer(
126
- s3fs.S3FileSystem,
126
+ filesystem_class,
127
127
  using_bucket=self.using_bucket,
128
128
  **self.get_storage_options(),
129
129
  )
@@ -177,7 +177,7 @@ class CSVSource(BaseSourceDriver):
177
177
  parse_dates.append(time_field)
178
178
 
179
179
  data_item = mlrun.store_manager.object(self.path)
180
- if self.path.startswith("ds://"):
180
+ if self.path and self.path.startswith("ds://"):
181
181
  store, path = mlrun.store_manager.get_or_create_store(self.path)
182
182
  path = store.url + path
183
183
  else:
@@ -193,7 +193,7 @@ class CSVSource(BaseSourceDriver):
193
193
  )
194
194
 
195
195
  def get_spark_options(self):
196
- if self.path.startswith("ds://"):
196
+ if self.path and self.path.startswith("ds://"):
197
197
  store, path = mlrun.store_manager.get_or_create_store(self.path)
198
198
  path = store.url + path
199
199
  result = {
@@ -340,7 +340,7 @@ class ParquetSource(BaseSourceDriver):
340
340
  attributes["context"] = context
341
341
 
342
342
  data_item = mlrun.store_manager.object(self.path)
343
- if self.path.startswith("ds://"):
343
+ if self.path and self.path.startswith("ds://"):
344
344
  store, path = mlrun.store_manager.get_or_create_store(self.path)
345
345
  path = store.url + path
346
346
  else:
@@ -357,7 +357,7 @@ class ParquetSource(BaseSourceDriver):
357
357
  )
358
358
 
359
359
  def get_spark_options(self):
360
- if self.path.startswith("ds://"):
360
+ if self.path and self.path.startswith("ds://"):
361
361
  store, path = mlrun.store_manager.get_or_create_store(self.path)
362
362
  path = store.url + path
363
363
  result = {
@@ -877,7 +877,7 @@ class ParquetTarget(BaseStoreTarget):
877
877
  else:
878
878
  storage_options = storage_options or self.storage_options
879
879
 
880
- graph.add_step(
880
+ step = graph.add_step(
881
881
  name=self.name or "ParquetTarget",
882
882
  after=after,
883
883
  graph_shape="cylinder",
@@ -894,6 +894,16 @@ class ParquetTarget(BaseStoreTarget):
894
894
  **self.attributes,
895
895
  )
896
896
 
897
+ original_to_dict = step.to_dict
898
+
899
+ def delete_update_last_written(*arg, **kargs):
900
+ result = original_to_dict(*arg, **kargs)
901
+ del result["class_args"]["update_last_written"]
902
+ return result
903
+
904
+ # update_last_written is not serializable (ML-5108)
905
+ step.to_dict = delete_update_last_written
906
+
897
907
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
898
908
  partition_cols = []
899
909
  if timestamp_key:
@@ -912,7 +922,7 @@ class ParquetTarget(BaseStoreTarget):
912
922
  if unit == time_partitioning_granularity:
913
923
  break
914
924
 
915
- if self.path.startswith("ds://"):
925
+ if self.path and self.path.startswith("ds://"):
916
926
  store, path = mlrun.store_manager.get_or_create_store(
917
927
  self.get_target_path()
918
928
  )
@@ -1054,7 +1064,7 @@ class CSVTarget(BaseStoreTarget):
1054
1064
  )
1055
1065
 
1056
1066
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1057
- if self.path.startswith("ds://"):
1067
+ if self.path and self.path.startswith("ds://"):
1058
1068
  store, path = mlrun.store_manager.get_or_create_store(
1059
1069
  self.get_target_path()
1060
1070
  )
@@ -310,6 +310,7 @@ class BaseMerger(abc.ABC):
310
310
  "start_time and end_time can only be provided in conjunction with "
311
311
  "a timestamp column, or when the at least one feature_set has a timestamp key"
312
312
  )
313
+
313
314
  # join the feature data frames
314
315
  result_timestamp = self.merge(
315
316
  entity_timestamp_column=entity_timestamp_column,
@@ -383,6 +384,29 @@ class BaseMerger(abc.ABC):
383
384
  def _unpersist_df(self, df):
384
385
  pass
385
386
 
387
+ def _normalize_timestamp_column(
388
+ self,
389
+ entity_timestamp_column,
390
+ reference_df,
391
+ featureset_timestamp,
392
+ featureset_df,
393
+ featureset_name,
394
+ ):
395
+ reference_df_timestamp_type = reference_df[entity_timestamp_column].dtype.name
396
+ featureset_df_timestamp_type = featureset_df[featureset_timestamp].dtype.name
397
+
398
+ if reference_df_timestamp_type != featureset_df_timestamp_type:
399
+ logger.info(
400
+ f"Merger detected timestamp resolution incompatibility between feature set {featureset_name} and "
401
+ f"others: {reference_df_timestamp_type} and {featureset_df_timestamp_type}. Converting feature set "
402
+ f"timestamp column '{featureset_timestamp}' to type {reference_df_timestamp_type}."
403
+ )
404
+ featureset_df[featureset_timestamp] = featureset_df[
405
+ featureset_timestamp
406
+ ].astype(reference_df_timestamp_type)
407
+
408
+ return featureset_df
409
+
386
410
  def merge(
387
411
  self,
388
412
  entity_timestamp_column: str,
@@ -52,6 +52,14 @@ class DaskFeatureMerger(BaseMerger):
52
52
  ):
53
53
  from dask.dataframe.multi import merge_asof
54
54
 
55
+ featureset_df = self._normalize_timestamp_column(
56
+ entity_timestamp_column,
57
+ entity_df,
58
+ featureset_timestamp,
59
+ featureset_df,
60
+ featureset_name,
61
+ )
62
+
55
63
  def sort_partition(partition, timestamp):
56
64
  return partition.sort_values(timestamp)
57
65
 
@@ -32,11 +32,10 @@ class LocalFeatureMerger(BaseMerger):
32
32
  entity_timestamp_column: str,
33
33
  featureset_name,
34
34
  featureset_timstamp,
35
- featureset_df: list,
35
+ featureset_df,
36
36
  left_keys: list,
37
37
  right_keys: list,
38
38
  ):
39
-
40
39
  index_col_not_in_entity = "index" not in entity_df.columns
41
40
  index_col_not_in_featureset = "index" not in featureset_df.columns
42
41
  entity_df[entity_timestamp_column] = pd.to_datetime(
@@ -48,6 +47,14 @@ class LocalFeatureMerger(BaseMerger):
48
47
  entity_df.sort_values(by=entity_timestamp_column, inplace=True)
49
48
  featureset_df.sort_values(by=featureset_timstamp, inplace=True)
50
49
 
50
+ featureset_df = self._normalize_timestamp_column(
51
+ entity_timestamp_column,
52
+ entity_df,
53
+ featureset_timstamp,
54
+ featureset_df,
55
+ featureset_name,
56
+ )
57
+
51
58
  merged_df = pd.merge_asof(
52
59
  entity_df,
53
60
  featureset_df,
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  import datetime
17
16
  import hashlib
@@ -30,6 +29,7 @@ from mlrun.utils import logger
30
29
 
31
30
  from .batch import VirtualDrift
32
31
  from .features_drift_table import FeaturesDriftTablePlot
32
+ from .helpers import bump_model_endpoint_last_request
33
33
  from .model_endpoint import ModelEndpoint
34
34
 
35
35
  # A union of all supported dataset types:
@@ -125,13 +125,14 @@ def record_results(
125
125
  model_endpoint_name: str,
126
126
  endpoint_id: str = "",
127
127
  function_name: str = "",
128
- context: mlrun.MLClientCtx = None,
129
- infer_results_df: pd.DataFrame = None,
130
- sample_set_statistics: typing.Dict[str, typing.Any] = None,
128
+ context: typing.Optional[mlrun.MLClientCtx] = None,
129
+ infer_results_df: typing.Optional[pd.DataFrame] = None,
130
+ sample_set_statistics: typing.Optional[dict[str, typing.Any]] = None,
131
131
  monitoring_mode: ModelMonitoringMode = ModelMonitoringMode.enabled,
132
- drift_threshold: float = None,
133
- possible_drift_threshold: float = None,
132
+ drift_threshold: typing.Optional[float] = None,
133
+ possible_drift_threshold: typing.Optional[float] = None,
134
134
  trigger_monitoring_job: bool = False,
135
+ last_in_batch_set: typing.Optional[bool] = True,
135
136
  artifacts_tag: str = "",
136
137
  default_batch_image="mlrun/mlrun",
137
138
  ) -> ModelEndpoint:
@@ -164,6 +165,14 @@ def record_results(
164
165
  :param possible_drift_threshold: The threshold of which to mark possible drifts.
165
166
  :param trigger_monitoring_job: If true, run the batch drift job. If not exists, the monitoring batch function
166
167
  will be registered through MLRun API with the provided image.
168
+ :param last_in_batch_set: This flag can (and should only) be used when the model endpoint does not have
169
+ model-monitoring set.
170
+ If set to `True` (the default), this flag marks the current monitoring window
171
+ (on this monitoring endpoint) is completed - the data inferred so far is assumed
172
+ to be the total data for this monitoring window.
173
+ You may want to set this flag to `False` if you want to record multiple results in
174
+ close time proximity ("batch set"). In this case, set this flag to `False` on all
175
+ but the last batch in the set.
167
176
  :param artifacts_tag: Tag to use for all the artifacts resulted from the function. Will be relevant
168
177
  only if the monitoring batch job has been triggered.
169
178
 
@@ -186,6 +195,7 @@ def record_results(
186
195
  monitoring_mode=monitoring_mode,
187
196
  db_session=db,
188
197
  )
198
+ logger.debug("Model endpoint", endpoint=model_endpoint.to_dict())
189
199
 
190
200
  if infer_results_df is not None:
191
201
  # Write the monitoring parquet to the relevant model endpoint context
@@ -195,6 +205,27 @@ def record_results(
195
205
  infer_results_df=infer_results_df,
196
206
  )
197
207
 
208
+ if model_endpoint.spec.stream_path == "":
209
+ if last_in_batch_set:
210
+ logger.info(
211
+ "Updating the last request time to mark the current monitoring window as completed",
212
+ project=project,
213
+ endpoint_id=model_endpoint.metadata.uid,
214
+ )
215
+ bump_model_endpoint_last_request(
216
+ project=project, model_endpoint=model_endpoint, db=db
217
+ )
218
+ else:
219
+ if last_in_batch_set is not None:
220
+ logger.warning(
221
+ "`last_in_batch_set` is not `None`, but the model endpoint has a stream path. "
222
+ "Ignoring `last_in_batch_set`, as it is relevant only when the model "
223
+ "endpoint does not have a model monitoring infrastructure in place (i.e. stream path is "
224
+ " empty). Set `last_in_batch_set` to `None` to resolve this warning.",
225
+ project=project,
226
+ endpoint_id=model_endpoint.metadata.uid,
227
+ )
228
+
198
229
  if trigger_monitoring_job:
199
230
  # Run the monitoring batch drift job
200
231
  trigger_drift_batch_job(
@@ -371,7 +371,7 @@ class MonitoringApplicationController:
371
371
  parquet_directory: str,
372
372
  storage_options: dict,
373
373
  model_monitoring_access_key: str,
374
- ):
374
+ ) -> Optional[Tuple[str, Exception]]:
375
375
  """
376
376
  Process a model endpoint and trigger the monitoring applications. This function running on different process
377
377
  for each endpoint. In addition, this function will generate a parquet file that includes the relevant data
@@ -433,7 +433,7 @@ class MonitoringApplicationController:
433
433
  start_time=start_infer_time,
434
434
  end_time=end_infer_time,
435
435
  )
436
- return
436
+ continue
437
437
 
438
438
  # Continue if not enough events provided since the deployment of the model endpoint
439
439
  except FileNotFoundError:
@@ -442,7 +442,7 @@ class MonitoringApplicationController:
442
442
  endpoint=endpoint[mm_constants.EventFieldType.UID],
443
443
  min_required_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
444
444
  )
445
- return
445
+ continue
446
446
 
447
447
  # Get the timestamp of the latest request:
448
448
  latest_request = df[mm_constants.EventFieldType.TIMESTAMP].iloc[-1]
@@ -470,9 +470,10 @@ class MonitoringApplicationController:
470
470
  model_monitoring_access_key=model_monitoring_access_key,
471
471
  parquet_target_path=parquet_target_path,
472
472
  )
473
- except FileNotFoundError as e:
473
+ except Exception as e:
474
474
  logger.error(
475
- f"Exception for endpoint {endpoint[mm_constants.EventFieldType.UID]}"
475
+ "Encountered an exception",
476
+ endpoint_id=endpoint[mm_constants.EventFieldType.UID],
476
477
  )
477
478
  return endpoint_id, e
478
479
 
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  import mlrun
17
16
  from mlrun.model_monitoring.controller import MonitoringApplicationController
@@ -29,4 +28,4 @@ def handler(context: mlrun.run.MLClientCtx):
29
28
  )
30
29
  monitor_app_controller.run()
31
30
  if monitor_app_controller.endpoints_exceptions:
32
- print(monitor_app_controller.endpoints_exceptions)
31
+ context.logger.error(monitor_app_controller.endpoints_exceptions)
@@ -11,13 +11,21 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
 
16
+ import datetime
17
17
  import typing
18
18
 
19
+ import mlrun
19
20
  import mlrun.common.model_monitoring.helpers
20
21
  import mlrun.common.schemas
22
+ from mlrun.common.schemas.model_monitoring import EventFieldType
23
+ from mlrun.errors import MLRunInvalidArgumentError
24
+ from mlrun.model_monitoring.model_endpoint import ModelEndpoint
25
+ from mlrun.utils import logger
26
+
27
+ if typing.TYPE_CHECKING:
28
+ from mlrun.db.base import RunDBInterface
21
29
 
22
30
 
23
31
  def get_stream_path(project: str = None, application_name: str = None):
@@ -89,3 +97,53 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
89
97
  )
90
98
  or mlrun.mlconf.model_endpoint_monitoring.endpoint_store_connection
91
99
  )
100
+
101
+
102
+ def bump_model_endpoint_last_request(
103
+ project: str,
104
+ model_endpoint: ModelEndpoint,
105
+ db: "RunDBInterface",
106
+ minutes_delta: int = 10, # TODO: move to config - should be the same as `batch_interval`
107
+ seconds_delta: int = 1,
108
+ ) -> None:
109
+ """
110
+ Update the last request field of the model endpoint to be after the current last request time.
111
+
112
+ :param project: Project name.
113
+ :param model_endpoint: Model endpoint object.
114
+ :param db: DB interface.
115
+ :param minutes_delta: Minutes delta to add to the last request time.
116
+ :param seconds_delta: Seconds delta to add to the last request time. This is mainly to ensure that the last
117
+ request time is strongly greater than the previous one (with respect to the window time)
118
+ after adding the minutes delta.
119
+ """
120
+ if not model_endpoint.status.last_request:
121
+ logger.error(
122
+ "Model endpoint last request time is empty, cannot bump it.",
123
+ project=project,
124
+ endpoint_id=model_endpoint.metadata.uid,
125
+ )
126
+ raise MLRunInvalidArgumentError("Model endpoint last request time is empty")
127
+
128
+ bumped_last_request = (
129
+ datetime.datetime.fromisoformat(model_endpoint.status.last_request)
130
+ + datetime.timedelta(
131
+ minutes=minutes_delta,
132
+ seconds=seconds_delta,
133
+ )
134
+ + datetime.timedelta(
135
+ seconds=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs
136
+ )
137
+ ).strftime(EventFieldType.TIME_FORMAT)
138
+ logger.info(
139
+ "Bumping model endpoint last request time",
140
+ project=project,
141
+ endpoint_id=model_endpoint.metadata.uid,
142
+ last_request=model_endpoint.status.last_request,
143
+ bumped_last_request=bumped_last_request,
144
+ )
145
+ db.patch_model_endpoint(
146
+ project=project,
147
+ endpoint_id=model_endpoint.metadata.uid,
148
+ attributes={EventFieldType.LAST_REQUEST: bumped_last_request},
149
+ )
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
+ import warnings
15
16
  from typing import Dict, List, Optional, Union
16
17
 
17
18
  import kfp
@@ -270,10 +271,17 @@ def build_function(
270
271
  e.g. extra_args="--skip-tls-verify --build-arg A=val"
271
272
  :param force_build: Force building the image, even when no changes were made
272
273
  """
274
+ if not overwrite_build_params:
275
+ # TODO: change overwrite_build_params default to True in 1.8.0
276
+ warnings.warn(
277
+ "The `overwrite_build_params` parameter default will change from 'False' to 'True in 1.8.0.",
278
+ mlrun.utils.OverwriteBuildParamsWarning,
279
+ )
280
+
273
281
  engine, function = _get_engine_and_function(function, project_object)
274
282
  if function.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
275
283
  raise mlrun.errors.MLRunInvalidArgumentError(
276
- "cannot build use deploy_function()"
284
+ "Cannot build use deploy_function()"
277
285
  )
278
286
  if engine == "kfp":
279
287
  if overwrite_build_params:
@@ -291,15 +299,21 @@ def build_function(
291
299
  skip_deployed=skip_deployed,
292
300
  )
293
301
  else:
294
- function.build_config(
295
- image=image,
296
- base_image=base_image,
297
- commands=commands,
298
- secret=secret_name,
299
- requirements=requirements,
300
- overwrite=overwrite_build_params,
301
- extra_args=extra_args,
302
- )
302
+ # TODO: remove filter once overwrite_build_params default is changed to True in 1.8.0
303
+ with warnings.catch_warnings():
304
+ warnings.simplefilter(
305
+ "ignore", category=mlrun.utils.OverwriteBuildParamsWarning
306
+ )
307
+
308
+ function.build_config(
309
+ image=image,
310
+ base_image=base_image,
311
+ commands=commands,
312
+ secret=secret_name,
313
+ requirements=requirements,
314
+ overwrite=overwrite_build_params,
315
+ extra_args=extra_args,
316
+ )
303
317
  ready = function.deploy(
304
318
  watch=True,
305
319
  with_mlrun=with_mlrun,