mlrun 1.6.0rc20__py3-none-any.whl → 1.6.0rc22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (51) hide show
  1. mlrun/artifacts/base.py +6 -6
  2. mlrun/artifacts/dataset.py +15 -8
  3. mlrun/artifacts/manager.py +6 -3
  4. mlrun/artifacts/model.py +2 -2
  5. mlrun/artifacts/plots.py +8 -8
  6. mlrun/config.py +1 -1
  7. mlrun/data_types/to_pandas.py +1 -1
  8. mlrun/datastore/azure_blob.py +12 -16
  9. mlrun/datastore/base.py +32 -10
  10. mlrun/datastore/datastore_profile.py +4 -4
  11. mlrun/datastore/dbfs_store.py +12 -11
  12. mlrun/datastore/filestore.py +2 -1
  13. mlrun/datastore/google_cloud_storage.py +11 -10
  14. mlrun/datastore/redis.py +2 -1
  15. mlrun/datastore/s3.py +12 -15
  16. mlrun/datastore/sources.py +16 -11
  17. mlrun/datastore/targets.py +2 -13
  18. mlrun/datastore/v3io.py +18 -20
  19. mlrun/db/httpdb.py +76 -7
  20. mlrun/errors.py +4 -0
  21. mlrun/execution.py +13 -4
  22. mlrun/feature_store/api.py +3 -4
  23. mlrun/launcher/base.py +4 -4
  24. mlrun/lists.py +0 -6
  25. mlrun/model.py +8 -1
  26. mlrun/model_monitoring/api.py +9 -31
  27. mlrun/model_monitoring/batch.py +14 -13
  28. mlrun/model_monitoring/controller.py +100 -70
  29. mlrun/model_monitoring/controller_handler.py +1 -3
  30. mlrun/model_monitoring/helpers.py +65 -20
  31. mlrun/model_monitoring/stream_processing.py +0 -3
  32. mlrun/projects/operations.py +1 -1
  33. mlrun/projects/project.py +10 -4
  34. mlrun/runtimes/base.py +6 -1
  35. mlrun/runtimes/constants.py +11 -0
  36. mlrun/runtimes/databricks_job/databricks_runtime.py +7 -9
  37. mlrun/runtimes/kubejob.py +1 -1
  38. mlrun/runtimes/local.py +64 -53
  39. mlrun/runtimes/serving.py +8 -1
  40. mlrun/serving/routers.py +7 -20
  41. mlrun/serving/server.py +4 -14
  42. mlrun/serving/utils.py +0 -3
  43. mlrun/utils/helpers.py +10 -2
  44. mlrun/utils/logger.py +5 -5
  45. mlrun/utils/version/version.json +2 -2
  46. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/METADATA +5 -3
  47. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/RECORD +51 -51
  48. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/LICENSE +0 -0
  49. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/WHEEL +0 -0
  50. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/entry_points.txt +0 -0
  51. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/top_level.txt +0 -0
@@ -17,7 +17,7 @@ import datetime
17
17
  import json
18
18
  import os
19
19
  import re
20
- from typing import Any, Iterator, Optional, Tuple, Union, cast
20
+ from typing import Any, Iterator, NamedTuple, Optional, Union, cast
21
21
 
22
22
  from v3io.dataplane.response import HttpResponseError
23
23
 
@@ -29,11 +29,21 @@ from mlrun.common.model_monitoring.helpers import FeatureStats, pad_features_his
29
29
  from mlrun.datastore import get_stream_pusher
30
30
  from mlrun.datastore.targets import ParquetTarget
31
31
  from mlrun.model_monitoring.batch import calculate_inputs_statistics
32
- from mlrun.model_monitoring.helpers import get_monitoring_parquet_path, get_stream_path
33
- from mlrun.utils import logger
32
+ from mlrun.model_monitoring.helpers import (
33
+ _BatchDict,
34
+ batch_dict2timedelta,
35
+ get_monitoring_parquet_path,
36
+ get_stream_path,
37
+ )
38
+ from mlrun.utils import create_logger, datetime_now, logger
34
39
  from mlrun.utils.v3io_clients import get_v3io_client
35
40
 
36
41
 
42
+ class _Interval(NamedTuple):
43
+ start: datetime.datetime
44
+ end: datetime.datetime
45
+
46
+
37
47
  class _BatchWindow:
38
48
  V3IO_CONTAINER_FORMAT = "users/pipelines/{project}/monitoring-schedules/functions"
39
49
 
@@ -55,11 +65,15 @@ class _BatchWindow:
55
65
  self._endpoint = endpoint
56
66
  self._application = application
57
67
  self._first_request = first_request
58
- self._kv_storage = get_v3io_client(endpoint=mlrun.mlconf.v3io_api).kv
68
+ self._kv_storage = get_v3io_client(
69
+ endpoint=mlrun.mlconf.v3io_api,
70
+ # Avoid noisy warning logs before the KV table is created
71
+ logger=create_logger(name="v3io_client", level="error"),
72
+ ).kv
59
73
  self._v3io_container = self.V3IO_CONTAINER_FORMAT.format(project=project)
60
- self._start = self._get_last_analyzed()
61
74
  self._stop = last_updated
62
75
  self._step = timedelta_seconds
76
+ self._start = self._get_last_analyzed()
63
77
 
64
78
  def _get_last_analyzed(self) -> Optional[int]:
65
79
  try:
@@ -69,15 +83,26 @@ class _BatchWindow:
69
83
  key=self._application,
70
84
  )
71
85
  except HttpResponseError as err:
72
- logger.warn(
73
- "Failed to get the last analyzed time for this endpoint and application, "
74
- "as this is probably the first time this application is running. "
75
- "Using the first request time instead.",
86
+ logger.info(
87
+ "No last analyzed time was found for this endpoint and "
88
+ "application, as this is probably the first time this "
89
+ "application is running. Using the latest between first "
90
+ "request time or last update time minus one day instead",
76
91
  endpoint=self._endpoint,
77
92
  application=self._application,
78
93
  first_request=self._first_request,
79
- error=err,
94
+ last_updated=self._stop,
80
95
  )
96
+ logger.debug("Error while getting last analyzed time", err=err)
97
+ if self._first_request and self._stop:
98
+ # TODO : Change the timedelta according to the policy.
99
+ first_period_in_seconds = max(
100
+ int(datetime.timedelta(days=1).total_seconds()), self._step
101
+ ) # max between one day and the base period
102
+ return max(
103
+ self._first_request,
104
+ self._stop - first_period_in_seconds,
105
+ )
81
106
  return self._first_request
82
107
 
83
108
  last_analyzed = data.output.item[mm_constants.SchedulingKeys.LAST_ANALYZED]
@@ -105,20 +130,29 @@ class _BatchWindow:
105
130
 
106
131
  def get_intervals(
107
132
  self,
108
- ) -> Iterator[Tuple[datetime.datetime, datetime.datetime]]:
133
+ ) -> Iterator[_Interval]:
109
134
  """Generate the batch interval time ranges."""
110
135
  if self._start is not None and self._stop is not None:
111
136
  entered = False
112
- for timestamp in range(self._start, self._stop, self._step):
137
+ # Iterate timestamp from start until timestamp <= stop - step
138
+ # so that the last interval will end at (timestamp + step) <= stop.
139
+ # Add 1 to stop - step to get <= and not <.
140
+ for timestamp in range(
141
+ self._start, self._stop - self._step + 1, self._step
142
+ ):
113
143
  entered = True
114
- start_time = datetime.datetime.utcfromtimestamp(timestamp)
115
- end_time = datetime.datetime.utcfromtimestamp(timestamp + self._step)
116
- yield start_time, end_time
144
+ start_time = datetime.datetime.fromtimestamp(
145
+ timestamp, tz=datetime.timezone.utc
146
+ )
147
+ end_time = datetime.datetime.fromtimestamp(
148
+ timestamp + self._step, tz=datetime.timezone.utc
149
+ )
150
+ yield _Interval(start_time, end_time)
117
151
  self._update_last_analyzed(timestamp + self._step)
118
152
  if not entered:
119
153
  logger.info(
120
154
  "All the data is set, but no complete intervals were found. "
121
- "Wait for last_updated to be updated.",
155
+ "Wait for last_updated to be updated",
122
156
  endpoint=self._endpoint,
123
157
  application=self._application,
124
158
  start=self._start,
@@ -127,8 +161,8 @@ class _BatchWindow:
127
161
  )
128
162
  else:
129
163
  logger.warn(
130
- "The first request time is not not found for this endpoint. "
131
- "No intervals will be generated.",
164
+ "The first request time is not found for this endpoint. "
165
+ "No intervals will be generated",
132
166
  endpoint=self._endpoint,
133
167
  application=self._application,
134
168
  start=self._start,
@@ -165,38 +199,44 @@ class _BatchWindowGenerator:
165
199
  self._batch_dict[pair_list[0]] = float(pair_list[1])
166
200
 
167
201
  def _get_timedelta(self) -> int:
168
- """Get the timedelta from a batch dictionary"""
169
- self._batch_dict = cast(dict[str, int], self._batch_dict)
170
- minutes, hours, days = (
171
- self._batch_dict[mm_constants.EventFieldType.MINUTES],
172
- self._batch_dict[mm_constants.EventFieldType.HOURS],
173
- self._batch_dict[mm_constants.EventFieldType.DAYS],
174
- )
202
+ """Get the timedelta in seconds from the batch dictionary"""
175
203
  return int(
176
- datetime.timedelta(minutes=minutes, hours=hours, days=days).total_seconds()
204
+ batch_dict2timedelta(cast(_BatchDict, self._batch_dict)).total_seconds()
177
205
  )
178
206
 
179
207
  @classmethod
180
- def _get_last_updated_time(cls, last_request: Optional[str]) -> Optional[int]:
208
+ def _get_last_updated_time(
209
+ cls, last_request: Optional[str], has_stream: bool
210
+ ) -> Optional[int]:
181
211
  """
182
212
  Get the last updated time of a model endpoint.
183
213
  """
184
214
  if not last_request:
185
215
  return None
186
- return int(
216
+ last_updated = int(
187
217
  cls._date_string2timestamp(last_request)
188
218
  - cast(
189
219
  float,
190
220
  mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
191
221
  )
192
222
  )
223
+ if not has_stream:
224
+ # If the endpoint does not have a stream, `last_updated` should be
225
+ # the minimum between the current time and the last updated time.
226
+ # This compensates for the bumping mechanism - see
227
+ # `bump_model_endpoint_last_request`.
228
+ last_updated = min(int(datetime_now().timestamp()), last_updated)
229
+ logger.debug(
230
+ "The endpoint does not have a stream", last_updated=last_updated
231
+ )
232
+ return last_updated
193
233
 
194
234
  @classmethod
195
235
  def _normalize_first_request(
196
236
  cls, first_request: Optional[str], endpoint: str
197
237
  ) -> Optional[int]:
198
238
  if not first_request:
199
- logger.warn(
239
+ logger.debug(
200
240
  "There is no first request time for this endpoint.",
201
241
  endpoint=endpoint,
202
242
  first_request=first_request,
@@ -215,6 +255,7 @@ class _BatchWindowGenerator:
215
255
  application: str,
216
256
  first_request: Optional[str],
217
257
  last_request: Optional[str],
258
+ has_stream: bool,
218
259
  ) -> _BatchWindow:
219
260
  """
220
261
  Get the batch window for a specific endpoint and application.
@@ -226,7 +267,7 @@ class _BatchWindowGenerator:
226
267
  endpoint=endpoint,
227
268
  application=application,
228
269
  timedelta_seconds=self._timedelta,
229
- last_updated=self._get_last_updated_time(last_request),
270
+ last_updated=self._get_last_updated_time(last_request, has_stream),
230
271
  first_request=self._normalize_first_request(first_request, endpoint),
231
272
  )
232
273
 
@@ -251,20 +292,12 @@ class MonitoringApplicationController:
251
292
  """
252
293
  self.context = context
253
294
  self.project = project
295
+ self.project_obj = mlrun.get_or_create_project(project)
254
296
 
255
- logger.info(
256
- "Initializing MonitoringApplicationController",
257
- project=project,
258
- )
259
-
260
- # Get a runtime database
297
+ context.logger.debug(f"Initializing {self.__class__.__name__}", project=project)
261
298
 
262
299
  self.db = mlrun.model_monitoring.get_model_endpoint_store(project=project)
263
300
 
264
- # If an error occurs, it will be raised using the following argument
265
- self.endpoints_exceptions = {}
266
-
267
- # The batch window
268
301
  self._batch_window_generator = _BatchWindowGenerator(
269
302
  batch_dict=context.parameters[
270
303
  mm_constants.EventFieldType.BATCH_INTERVALS_DICT
@@ -277,7 +310,7 @@ class MonitoringApplicationController:
277
310
  )
278
311
  self.model_monitoring_access_key = self._get_model_monitoring_access_key()
279
312
  self.parquet_directory = get_monitoring_parquet_path(
280
- project=project,
313
+ self.project_obj,
281
314
  kind=mm_constants.FileTargetKind.APPS_PARQUET,
282
315
  )
283
316
  self.storage_options = None
@@ -303,21 +336,23 @@ class MonitoringApplicationController:
303
336
 
304
337
  def run(self):
305
338
  """
306
- Main method for run all the relevant monitoring application on each endpoint
339
+ Main method for run all the relevant monitoring applications on each endpoint
307
340
  """
308
341
  try:
309
342
  endpoints = self.db.list_model_endpoints(uids=self.model_endpoints)
310
- application = mlrun.get_or_create_project(
311
- self.project
312
- ).list_model_monitoring_functions()
313
- if application:
314
- applications_names = list({app.metadata.name for app in application})
343
+ monitoring_functions = self.project_obj.list_model_monitoring_functions()
344
+ if monitoring_functions:
345
+ applications_names = list(
346
+ {app.metadata.name for app in monitoring_functions}
347
+ )
315
348
  else:
316
- logger.info("There are no monitoring application found in this project")
349
+ self.context.logger.info(
350
+ "No monitoring functions found", project=self.project
351
+ )
317
352
  applications_names = []
318
353
 
319
354
  except Exception as e:
320
- logger.error("Failed to list endpoints", exc=e)
355
+ self.context.logger.error("Failed to list endpoints", exc=e)
321
356
  return
322
357
  if endpoints and applications_names:
323
358
  # Initialize a process pool that will be used to run each endpoint applications on a dedicated process
@@ -354,9 +389,7 @@ class MonitoringApplicationController:
354
389
  futures.append(future)
355
390
 
356
391
  for future in concurrent.futures.as_completed(futures):
357
- res = future.result()
358
- if res:
359
- self.endpoints_exceptions[res[0]] = res[1]
392
+ future.result()
360
393
 
361
394
  self._delete_old_parquet(endpoints=endpoints)
362
395
 
@@ -370,7 +403,7 @@ class MonitoringApplicationController:
370
403
  parquet_directory: str,
371
404
  storage_options: dict,
372
405
  model_monitoring_access_key: str,
373
- ) -> Optional[Tuple[str, Exception]]:
406
+ ) -> None:
374
407
  """
375
408
  Process a model endpoint and trigger the monitoring applications. This function running on different process
376
409
  for each endpoint. In addition, this function will generate a parquet file that includes the relevant data
@@ -405,6 +438,7 @@ class MonitoringApplicationController:
405
438
  application=application,
406
439
  first_request=endpoint[mm_constants.EventFieldType.FIRST_REQUEST],
407
440
  last_request=endpoint[mm_constants.EventFieldType.LAST_REQUEST],
441
+ has_stream=endpoint[mm_constants.EventFieldType.STREAM_PATH] != "",
408
442
  )
409
443
 
410
444
  for start_infer_time, end_infer_time in batch_window.get_intervals():
@@ -424,22 +458,18 @@ class MonitoringApplicationController:
424
458
  parquet_target_path = offline_response.vector.get_target_path()
425
459
 
426
460
  if len(df) == 0:
427
- logger.warn(
428
- "Not enough model events since the beginning of the batch interval",
429
- featureset_name=m_fs.metadata.name,
461
+ logger.info(
462
+ "During this time window, the endpoint has not received any data",
430
463
  endpoint=endpoint[mm_constants.EventFieldType.UID],
431
- min_required_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
432
464
  start_time=start_infer_time,
433
465
  end_time=end_infer_time,
434
466
  )
435
467
  continue
436
468
 
437
- # Continue if not enough events provided since the deployment of the model endpoint
438
469
  except FileNotFoundError:
439
470
  logger.warn(
440
- "Parquet not found, probably due to not enough model events",
471
+ "No parquets were written yet",
441
472
  endpoint=endpoint[mm_constants.EventFieldType.UID],
442
- min_required_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
443
473
  )
444
474
  continue
445
475
 
@@ -473,12 +503,11 @@ class MonitoringApplicationController:
473
503
  model_monitoring_access_key=model_monitoring_access_key,
474
504
  parquet_target_path=parquet_target_path,
475
505
  )
476
- except Exception as e:
477
- logger.error(
506
+ except Exception:
507
+ logger.exception(
478
508
  "Encountered an exception",
479
509
  endpoint_id=endpoint[mm_constants.EventFieldType.UID],
480
510
  )
481
- return endpoint_id, e
482
511
 
483
512
  def _delete_old_parquet(self, endpoints: list[dict[str, Any]], days: int = 1):
484
513
  """
@@ -492,12 +521,14 @@ class MonitoringApplicationController:
492
521
  self.parquet_directory,
493
522
  {"V3IO_ACCESS_KEY": self.model_monitoring_access_key},
494
523
  )
495
- fs = store.get_filesystem()
524
+ fs = store.filesystem
496
525
 
497
526
  # calculate time threshold (keep only files from the last 24 hours)
498
- time_to_keep = float(
499
- (datetime.datetime.now() - datetime.timedelta(days=days)).strftime("%s")
500
- )
527
+ time_to_keep = (
528
+ datetime.datetime.now(tz=datetime.timezone.utc)
529
+ - datetime.timedelta(days=days)
530
+ ).timestamp()
531
+
501
532
  for endpoint in endpoints:
502
533
  try:
503
534
  apps_parquet_directories = fs.listdir(
@@ -611,14 +642,13 @@ class MonitoringApplicationController:
611
642
 
612
643
  # get offline features based on application start and end time.
613
644
  # store the result parquet by partitioning by controller end processing time
614
- offline_response = fstore.get_offline_features(
615
- feature_vector=vector,
645
+ offline_response = vector.get_offline_features(
616
646
  start_time=start_infer_time,
617
647
  end_time=end_infer_time,
618
648
  timestamp_for_filtering=mm_constants.EventFieldType.TIMESTAMP,
619
649
  target=ParquetTarget(
620
650
  path=parquet_directory
621
- + f"/key={endpoint_id}/{start_infer_time.strftime('%s')}/{application_name}.parquet",
651
+ + f"/key={endpoint_id}/{int(start_infer_time.timestamp())}/{application_name}.parquet",
622
652
  storage_options=storage_options,
623
653
  ),
624
654
  )
@@ -16,7 +16,7 @@ import mlrun
16
16
  from mlrun.model_monitoring.controller import MonitoringApplicationController
17
17
 
18
18
 
19
- def handler(context: mlrun.run.MLClientCtx):
19
+ def handler(context: mlrun.run.MLClientCtx) -> None:
20
20
  """
21
21
  Run model monitoring application processor
22
22
 
@@ -27,5 +27,3 @@ def handler(context: mlrun.run.MLClientCtx):
27
27
  project=context.project,
28
28
  )
29
29
  monitor_app_controller.run()
30
- if monitor_app_controller.endpoints_exceptions:
31
- context.logger.error(monitor_app_controller.endpoints_exceptions)
@@ -12,20 +12,33 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
-
16
15
  import datetime
17
16
  import typing
18
17
 
19
18
  import mlrun
20
19
  import mlrun.common.model_monitoring.helpers
21
20
  import mlrun.common.schemas
22
- from mlrun.common.schemas.model_monitoring import EventFieldType
23
- from mlrun.errors import MLRunInvalidArgumentError
21
+ from mlrun.common.schemas.model_monitoring import (
22
+ EventFieldType,
23
+ MonitoringFunctionNames,
24
+ )
25
+ from mlrun.errors import MLRunValueError
24
26
  from mlrun.model_monitoring.model_endpoint import ModelEndpoint
25
27
  from mlrun.utils import logger
26
28
 
27
29
  if typing.TYPE_CHECKING:
28
30
  from mlrun.db.base import RunDBInterface
31
+ from mlrun.projects import MlrunProject
32
+
33
+
34
+ class _BatchDict(typing.TypedDict):
35
+ minutes: int
36
+ hours: int
37
+ days: int
38
+
39
+
40
+ class _MLRunNoRunsFoundError(Exception):
41
+ pass
29
42
 
30
43
 
31
44
  def get_stream_path(project: str = None, application_name: str = None):
@@ -55,24 +68,22 @@ def get_stream_path(project: str = None, application_name: str = None):
55
68
 
56
69
 
57
70
  def get_monitoring_parquet_path(
58
- project: str,
71
+ project: "MlrunProject",
59
72
  kind: str = mlrun.common.schemas.model_monitoring.FileTargetKind.PARQUET,
60
73
  ) -> str:
61
74
  """Get model monitoring parquet target for the current project and kind. The parquet target path is based on the
62
75
  project artifact path. If project artifact path is not defined, the parquet target path will be based on MLRun
63
76
  artifact path.
64
77
 
65
- :param project: Project name.
78
+ :param project: Project object.
66
79
  :param kind: indicate the kind of the parquet path, can be either stream_parquet or stream_controller_parquet
67
80
 
68
81
  :return: Monitoring parquet target path.
69
82
  """
70
-
71
- project_obj = mlrun.get_or_create_project(name=project)
72
- artifact_path = project_obj.spec.artifact_path
83
+ artifact_path = project.spec.artifact_path
73
84
  # Generate monitoring parquet path value
74
85
  parquet_path = mlrun.mlconf.get_model_monitoring_file_target_path(
75
- project=project,
86
+ project=project.name,
76
87
  kind=kind,
77
88
  target="offline",
78
89
  artifact_path=artifact_path,
@@ -99,12 +110,46 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
99
110
  )
100
111
 
101
112
 
113
+ def batch_dict2timedelta(batch_dict: _BatchDict) -> datetime.timedelta:
114
+ """
115
+ Convert a batch dictionary to timedelta.
116
+
117
+ :param batch_dict: Batch dict.
118
+
119
+ :return: Timedelta.
120
+ """
121
+ return datetime.timedelta(**batch_dict)
122
+
123
+
124
+ def _get_monitoring_time_window_from_controller_run(
125
+ project: str, db: "RunDBInterface"
126
+ ) -> datetime.timedelta:
127
+ """
128
+ Get timedelta for the controller to run.
129
+
130
+ :param project: Project name.
131
+ :param db: DB interface.
132
+
133
+ :return: Timedelta for the controller to run.
134
+ """
135
+ run_name = MonitoringFunctionNames.APPLICATION_CONTROLLER
136
+ runs = db.list_runs(project=project, name=run_name, sort=True)
137
+ if not runs:
138
+ raise _MLRunNoRunsFoundError(f"No {run_name} runs were found")
139
+ last_run = runs[0]
140
+ try:
141
+ batch_dict = last_run["spec"]["parameters"]["batch_intervals_dict"]
142
+ except KeyError:
143
+ raise MLRunValueError(
144
+ f"Could not find `batch_intervals_dict` in {run_name} run"
145
+ )
146
+ return batch_dict2timedelta(batch_dict)
147
+
148
+
102
149
  def bump_model_endpoint_last_request(
103
150
  project: str,
104
151
  model_endpoint: ModelEndpoint,
105
152
  db: "RunDBInterface",
106
- minutes_delta: int = 10, # TODO: move to config - should be the same as `batch_interval`
107
- seconds_delta: int = 1,
108
153
  ) -> None:
109
154
  """
110
155
  Update the last request field of the model endpoint to be after the current last request time.
@@ -112,10 +157,6 @@ def bump_model_endpoint_last_request(
112
157
  :param project: Project name.
113
158
  :param model_endpoint: Model endpoint object.
114
159
  :param db: DB interface.
115
- :param minutes_delta: Minutes delta to add to the last request time.
116
- :param seconds_delta: Seconds delta to add to the last request time. This is mainly to ensure that the last
117
- request time is strongly greater than the previous one (with respect to the window time)
118
- after adding the minutes delta.
119
160
  """
120
161
  if not model_endpoint.status.last_request:
121
162
  logger.error(
@@ -123,14 +164,18 @@ def bump_model_endpoint_last_request(
123
164
  project=project,
124
165
  endpoint_id=model_endpoint.metadata.uid,
125
166
  )
126
- raise MLRunInvalidArgumentError("Model endpoint last request time is empty")
167
+ raise MLRunValueError("Model endpoint last request time is empty")
168
+ try:
169
+ time_window = _get_monitoring_time_window_from_controller_run(project, db)
170
+ except _MLRunNoRunsFoundError:
171
+ logger.debug(
172
+ "Not bumping model endpoint last request time - no controller runs were found"
173
+ )
174
+ return
127
175
 
128
176
  bumped_last_request = (
129
177
  datetime.datetime.fromisoformat(model_endpoint.status.last_request)
130
- + datetime.timedelta(
131
- minutes=minutes_delta,
132
- seconds=seconds_delta,
133
- )
178
+ + time_window
134
179
  + datetime.timedelta(
135
180
  seconds=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs
136
181
  )
@@ -528,9 +528,6 @@ class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):
528
528
 
529
529
  # Getting event timestamp and endpoint_id
530
530
  base_event = {k: event[k] for k in base_fields}
531
- base_event[EventFieldType.TIMESTAMP] = datetime.datetime.fromisoformat(
532
- base_event[EventFieldType.TIMESTAMP]
533
- )
534
531
 
535
532
  # base_metrics includes the stats about the average latency and the amount of predictions over time
536
533
  base_metrics = {
@@ -274,7 +274,7 @@ def build_function(
274
274
  if not overwrite_build_params:
275
275
  # TODO: change overwrite_build_params default to True in 1.8.0
276
276
  warnings.warn(
277
- "The `overwrite_build_params` parameter default will change from 'False' to 'True in 1.8.0.",
277
+ "The `overwrite_build_params` parameter default will change from 'False' to 'True' in 1.8.0.",
278
278
  mlrun.utils.OverwriteBuildParamsWarning,
279
279
  )
280
280
 
mlrun/projects/project.py CHANGED
@@ -2167,7 +2167,7 @@ class MlrunProject(ModelObj):
2167
2167
  self.spec.remove_function(name)
2168
2168
 
2169
2169
  def remove_model_monitoring_function(self, name):
2170
- """remove the specified model-monitoring-app function from the project
2170
+ """remove the specified model-monitoring-app function from the project and from the db
2171
2171
 
2172
2172
  :param name: name of the model-monitoring-app function (under the project)
2173
2173
  """
@@ -2177,6 +2177,7 @@ class MlrunProject(ModelObj):
2177
2177
  == mm_constants.ModelMonitoringAppLabel.VAL
2178
2178
  ):
2179
2179
  self.remove_function(name=name)
2180
+ mlrun.db.get_run_db().delete_function(name=name.lower())
2180
2181
  logger.info(f"{name} function has been removed from {self.name} project")
2181
2182
  else:
2182
2183
  raise logger.error(
@@ -2753,6 +2754,11 @@ class MlrunProject(ModelObj):
2753
2754
  project_file_path = path.join(
2754
2755
  self.spec.context, self.spec.subpath or "", "project.yaml"
2755
2756
  )
2757
+ if filepath and "://" in str(filepath) and not archive_code:
2758
+ raise mlrun.errors.MLRunInvalidArgumentError(
2759
+ "URLs are only applicable to archives"
2760
+ )
2761
+
2756
2762
  project_dir = pathlib.Path(project_file_path).parent
2757
2763
  project_dir.mkdir(parents=True, exist_ok=True)
2758
2764
  with open(project_file_path, "w") as fp:
@@ -3011,7 +3017,7 @@ class MlrunProject(ModelObj):
3011
3017
  if not overwrite_build_params:
3012
3018
  # TODO: change overwrite_build_params default to True in 1.8.0
3013
3019
  warnings.warn(
3014
- "The `overwrite_build_params` parameter default will change from 'False' to 'True in 1.8.0.",
3020
+ "The `overwrite_build_params` parameter default will change from 'False' to 'True' in 1.8.0.",
3015
3021
  mlrun.utils.OverwriteBuildParamsWarning,
3016
3022
  )
3017
3023
  default_image_name = mlrun.mlconf.default_project_image_name.format(
@@ -3097,7 +3103,7 @@ class MlrunProject(ModelObj):
3097
3103
  if not overwrite_build_params:
3098
3104
  # TODO: change overwrite_build_params default to True in 1.8.0
3099
3105
  warnings.warn(
3100
- "The `overwrite_build_params` parameter default will change from 'False' to 'True in 1.8.0.",
3106
+ "The `overwrite_build_params` parameter default will change from 'False' to 'True' in 1.8.0.",
3101
3107
  mlrun.utils.OverwriteBuildParamsWarning,
3102
3108
  )
3103
3109
 
@@ -3402,7 +3408,7 @@ class MlrunProject(ModelObj):
3402
3408
  :param state: List only runs whose state is specified.
3403
3409
  :param sort: Whether to sort the result according to their start time. Otherwise, results will be
3404
3410
  returned by their internal order in the DB (order will not be guaranteed).
3405
- :param last: Deprecated - currently not used.
3411
+ :param last: Deprecated - currently not used (will be removed in 1.8.0).
3406
3412
  :param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
3407
3413
  :param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
3408
3414
  :param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
mlrun/runtimes/base.py CHANGED
@@ -550,7 +550,12 @@ class BaseRuntime(ModelObj):
550
550
  if err:
551
551
  updates["status.error"] = err_to_str(err)
552
552
 
553
- elif not was_none and last_state != "completed":
553
+ elif (
554
+ not was_none
555
+ and last_state != mlrun.runtimes.constants.RunStates.completed
556
+ and last_state
557
+ not in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
558
+ ):
554
559
  try:
555
560
  runtime_cls = mlrun.runtimes.get_runtime_class(kind)
556
561
  updates = runtime_cls._get_run_completion_updates(resp)
@@ -165,6 +165,17 @@ class RunStates(object):
165
165
  RunStates.aborted,
166
166
  ]
167
167
 
168
+ @staticmethod
169
+ def abortion_states():
170
+ return [
171
+ RunStates.aborted,
172
+ RunStates.aborting,
173
+ ]
174
+
175
+ @staticmethod
176
+ def error_and_abortion_states():
177
+ return list(set(RunStates.error_states()) | set(RunStates.abortion_states()))
178
+
168
179
  @staticmethod
169
180
  def non_terminal_states():
170
181
  return list(set(RunStates.all()) - set(RunStates.terminal_states()))
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import os
16
15
  from ast import FunctionDef, parse, unparse
17
16
  from base64 import b64decode, b64encode
18
17
  from typing import Callable, Dict, List, Optional, Union
@@ -197,14 +196,13 @@ class DatabricksRuntime(kubejob.KubejobRuntime):
197
196
  if value:
198
197
  task_parameters[key] = value # in order to handle reruns.
199
198
  runspec.spec.parameters["task_parameters"] = task_parameters
200
- current_file = os.path.abspath(__file__)
201
- current_dir = os.path.dirname(current_file)
202
- databricks_runtime_wrap_path = os.path.join(
203
- current_dir, "databricks_wrapper.py"
204
- )
205
- with open(databricks_runtime_wrap_path, "r") as databricks_runtime_wrap_file:
206
- wrap_code = databricks_runtime_wrap_file.read()
207
- wrap_code = b64encode(wrap_code.encode("utf-8")).decode("utf-8")
199
+ wrap_code = b"""
200
+ from mlrun.runtimes.databricks_job import databricks_wrapper
201
+
202
+ def run_mlrun_databricks_job(context,task_parameters: dict, **kwargs):
203
+ databricks_wrapper.run_mlrun_databricks_job(context, task_parameters, **kwargs)
204
+ """
205
+ wrap_code = b64encode(wrap_code).decode("utf-8")
208
206
  self.spec.build.functionSourceCode = wrap_code
209
207
  runspec.spec.handler = "run_mlrun_databricks_job"
210
208