mlrun 1.10.0rc16__py3-none-any.whl → 1.10.1rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (101) hide show
  1. mlrun/__init__.py +22 -2
  2. mlrun/artifacts/document.py +6 -1
  3. mlrun/artifacts/llm_prompt.py +21 -15
  4. mlrun/artifacts/model.py +3 -3
  5. mlrun/common/constants.py +9 -0
  6. mlrun/common/formatters/artifact.py +1 -0
  7. mlrun/common/model_monitoring/helpers.py +86 -0
  8. mlrun/common/schemas/__init__.py +2 -0
  9. mlrun/common/schemas/auth.py +2 -0
  10. mlrun/common/schemas/function.py +10 -0
  11. mlrun/common/schemas/hub.py +30 -18
  12. mlrun/common/schemas/model_monitoring/__init__.py +2 -0
  13. mlrun/common/schemas/model_monitoring/constants.py +30 -6
  14. mlrun/common/schemas/model_monitoring/functions.py +13 -4
  15. mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
  16. mlrun/common/schemas/pipeline.py +1 -1
  17. mlrun/common/schemas/serving.py +3 -0
  18. mlrun/common/schemas/workflow.py +1 -0
  19. mlrun/common/secrets.py +22 -1
  20. mlrun/config.py +34 -21
  21. mlrun/datastore/__init__.py +11 -3
  22. mlrun/datastore/azure_blob.py +162 -47
  23. mlrun/datastore/base.py +265 -7
  24. mlrun/datastore/datastore.py +10 -5
  25. mlrun/datastore/datastore_profile.py +61 -5
  26. mlrun/datastore/model_provider/huggingface_provider.py +367 -0
  27. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  28. mlrun/datastore/model_provider/model_provider.py +211 -74
  29. mlrun/datastore/model_provider/openai_provider.py +243 -71
  30. mlrun/datastore/s3.py +24 -2
  31. mlrun/datastore/store_resources.py +4 -4
  32. mlrun/datastore/storeytargets.py +2 -3
  33. mlrun/datastore/utils.py +15 -3
  34. mlrun/db/base.py +27 -19
  35. mlrun/db/httpdb.py +57 -48
  36. mlrun/db/nopdb.py +25 -10
  37. mlrun/execution.py +55 -13
  38. mlrun/hub/__init__.py +15 -0
  39. mlrun/hub/module.py +181 -0
  40. mlrun/k8s_utils.py +105 -16
  41. mlrun/launcher/base.py +13 -6
  42. mlrun/launcher/local.py +2 -0
  43. mlrun/model.py +9 -3
  44. mlrun/model_monitoring/api.py +66 -27
  45. mlrun/model_monitoring/applications/__init__.py +1 -1
  46. mlrun/model_monitoring/applications/base.py +388 -138
  47. mlrun/model_monitoring/applications/context.py +2 -4
  48. mlrun/model_monitoring/applications/results.py +4 -7
  49. mlrun/model_monitoring/controller.py +239 -101
  50. mlrun/model_monitoring/db/_schedules.py +36 -13
  51. mlrun/model_monitoring/db/_stats.py +4 -3
  52. mlrun/model_monitoring/db/tsdb/base.py +29 -9
  53. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
  54. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
  55. mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
  56. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  57. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
  58. mlrun/model_monitoring/helpers.py +28 -5
  59. mlrun/model_monitoring/stream_processing.py +45 -14
  60. mlrun/model_monitoring/writer.py +220 -1
  61. mlrun/platforms/__init__.py +3 -2
  62. mlrun/platforms/iguazio.py +7 -3
  63. mlrun/projects/operations.py +16 -11
  64. mlrun/projects/pipelines.py +2 -2
  65. mlrun/projects/project.py +157 -69
  66. mlrun/run.py +97 -20
  67. mlrun/runtimes/__init__.py +18 -0
  68. mlrun/runtimes/base.py +14 -6
  69. mlrun/runtimes/daskjob.py +1 -0
  70. mlrun/runtimes/local.py +5 -2
  71. mlrun/runtimes/mounts.py +20 -2
  72. mlrun/runtimes/nuclio/__init__.py +1 -0
  73. mlrun/runtimes/nuclio/application/application.py +147 -17
  74. mlrun/runtimes/nuclio/function.py +72 -27
  75. mlrun/runtimes/nuclio/serving.py +102 -20
  76. mlrun/runtimes/pod.py +213 -21
  77. mlrun/runtimes/utils.py +49 -9
  78. mlrun/secrets.py +54 -13
  79. mlrun/serving/remote.py +79 -6
  80. mlrun/serving/routers.py +23 -41
  81. mlrun/serving/server.py +230 -40
  82. mlrun/serving/states.py +605 -232
  83. mlrun/serving/steps.py +62 -0
  84. mlrun/serving/system_steps.py +136 -81
  85. mlrun/serving/v2_serving.py +9 -10
  86. mlrun/utils/helpers.py +215 -83
  87. mlrun/utils/logger.py +3 -1
  88. mlrun/utils/notifications/notification/base.py +18 -0
  89. mlrun/utils/notifications/notification/git.py +2 -4
  90. mlrun/utils/notifications/notification/mail.py +38 -15
  91. mlrun/utils/notifications/notification/slack.py +2 -4
  92. mlrun/utils/notifications/notification/webhook.py +2 -5
  93. mlrun/utils/notifications/notification_pusher.py +1 -1
  94. mlrun/utils/version/version.json +2 -2
  95. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/METADATA +51 -50
  96. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/RECORD +100 -95
  97. mlrun/api/schemas/__init__.py +0 -259
  98. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/WHEEL +0 -0
  99. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/entry_points.txt +0 -0
  100. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/licenses/LICENSE +0 -0
  101. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/top_level.txt +0 -0
@@ -24,15 +24,12 @@ import mlrun.common.schemas.model_monitoring.constants as mm_constants
24
24
  import mlrun.errors
25
25
  import mlrun.feature_store as fstore
26
26
  import mlrun.feature_store.feature_set as fs
27
- import mlrun.features
28
27
  import mlrun.serving
29
28
  import mlrun.utils
30
29
  from mlrun.artifacts import Artifact, DatasetArtifact, ModelArtifact, get_model
31
30
  from mlrun.common.model_monitoring.helpers import FeatureStats
32
31
  from mlrun.common.schemas import ModelEndpoint
33
- from mlrun.model_monitoring.helpers import (
34
- calculate_inputs_statistics,
35
- )
32
+ from mlrun.model_monitoring.helpers import calculate_inputs_statistics
36
33
 
37
34
 
38
35
  class _ArtifactsLogger(Protocol):
@@ -252,6 +249,7 @@ class MonitoringApplicationContext:
252
249
  project=self.project_name,
253
250
  endpoint_id=self.endpoint_id,
254
251
  feature_analysis=True,
252
+ tsdb_metrics=False,
255
253
  )
256
254
  return self._model_endpoint
257
255
 
@@ -14,16 +14,13 @@
14
14
 
15
15
  import dataclasses
16
16
  import json
17
- import re
18
17
  from abc import ABC, abstractmethod
19
18
 
20
19
  from pydantic.v1 import validator
21
20
  from pydantic.v1.dataclasses import dataclass
22
21
 
23
- import mlrun.common.helpers
24
- import mlrun.common.model_monitoring.helpers
25
22
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
26
- import mlrun.utils.v3io_clients
23
+ import mlrun.errors
27
24
  from mlrun.utils import logger
28
25
 
29
26
  _RESULT_EXTRA_DATA_MAX_SIZE = 998
@@ -33,10 +30,10 @@ class _ModelMonitoringApplicationDataRes(ABC):
33
30
  name: str
34
31
 
35
32
  def __post_init__(self):
36
- pat = re.compile(mm_constants.RESULT_NAME_PATTERN)
37
- if not re.fullmatch(pat, self.name):
33
+ if not mm_constants.RESULT_NAME_REGEX.fullmatch(self.name):
38
34
  raise mlrun.errors.MLRunValueError(
39
- f"Attribute name must comply with the regex `{mm_constants.RESULT_NAME_PATTERN}`"
35
+ "The application result or metric name must comply with the regex "
36
+ f"`{mm_constants.RESULT_NAME_REGEX.pattern}`"
40
37
  )
41
38
 
42
39
  @abstractmethod
@@ -11,20 +11,20 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
15
14
  import collections
16
15
  import concurrent.futures
17
16
  import datetime
18
17
  import json
19
18
  import os
20
19
  import traceback
21
- from collections import OrderedDict
20
+ import warnings
22
21
  from collections.abc import Iterator
23
22
  from contextlib import AbstractContextManager
24
23
  from types import TracebackType
25
- from typing import Any, NamedTuple, Optional, Union, cast
24
+ from typing import Any, Final, NamedTuple, Optional, Union, cast
26
25
 
27
26
  import nuclio_sdk
27
+ import numpy as np
28
28
  import pandas as pd
29
29
 
30
30
  import mlrun
@@ -38,7 +38,6 @@ from mlrun.common.schemas import EndpointType
38
38
  from mlrun.common.schemas.model_monitoring.constants import (
39
39
  ControllerEvent,
40
40
  ControllerEventEndpointPolicy,
41
- ControllerEventKind,
42
41
  )
43
42
  from mlrun.errors import err_to_str
44
43
  from mlrun.model_monitoring.helpers import batch_dict2timedelta
@@ -54,14 +53,17 @@ class _Interval(NamedTuple):
54
53
 
55
54
 
56
55
  class _BatchWindow:
56
+ TIMESTAMP_RESOLUTION_MICRO: Final = 1e-6 # 0.000001 seconds or 1 microsecond
57
+
57
58
  def __init__(
58
59
  self,
59
60
  *,
60
61
  schedules_file: schedules.ModelMonitoringSchedulesFileEndpoint,
61
62
  application: str,
62
63
  timedelta_seconds: int,
63
- last_updated: int,
64
- first_request: int,
64
+ last_updated: float,
65
+ first_request: float,
66
+ endpoint_mode: mm_constants.EndpointMode = mm_constants.EndpointMode.REAL_TIME,
65
67
  ) -> None:
66
68
  """
67
69
  Initialize a batch window object that handles the batch interval time range
@@ -74,21 +76,34 @@ class _BatchWindow:
74
76
  self._stop = last_updated
75
77
  self._step = timedelta_seconds
76
78
  self._db = schedules_file
79
+ self._endpoint_mode = endpoint_mode
77
80
  self._start = self._get_last_analyzed()
78
81
 
79
- def _get_saved_last_analyzed(self) -> Optional[int]:
80
- return cast(int, self._db.get_application_time(self._application))
82
+ def _get_saved_last_analyzed(
83
+ self,
84
+ ) -> Optional[float]:
85
+ return self._db.get_application_time(self._application)
81
86
 
82
- def _update_last_analyzed(self, last_analyzed: int) -> None:
87
+ def _update_last_analyzed(self, last_analyzed: float) -> None:
83
88
  self._db.update_application_time(
84
89
  application=self._application, timestamp=last_analyzed
85
90
  )
86
91
 
87
- def _get_initial_last_analyzed(self) -> int:
92
+ def _get_initial_last_analyzed(self) -> float:
93
+ if self._endpoint_mode == mm_constants.EndpointMode.BATCH:
94
+ logger.info(
95
+ "No last analyzed time was found for this endpoint and application, as this is "
96
+ "probably the first time this application is running. Initializing last analyzed "
97
+ "to the start of the batch time",
98
+ application=self._application,
99
+ start_batch_time=self._first_request,
100
+ )
101
+ return self._first_request
88
102
  logger.info(
89
103
  "No last analyzed time was found for this endpoint and application, as this is "
90
104
  "probably the first time this application is running. Initializing last analyzed "
91
- "to the latest between first request time or last update time minus one day",
105
+ "to the latest between first request the latest between first request time or last "
106
+ "update time minus one day",
92
107
  application=self._application,
93
108
  first_request=self._first_request,
94
109
  last_updated=self._stop,
@@ -100,9 +115,12 @@ class _BatchWindow:
100
115
  self._stop - first_period_in_seconds,
101
116
  )
102
117
 
103
- def _get_last_analyzed(self) -> int:
118
+ def _get_last_analyzed(self) -> float:
104
119
  saved_last_analyzed = self._get_saved_last_analyzed()
105
120
  if saved_last_analyzed is not None:
121
+ if self._endpoint_mode == mm_constants.EndpointMode.BATCH:
122
+ # Use the maximum between the saved last analyzed and the start of the batch
123
+ return max(saved_last_analyzed, self._first_request)
106
124
  return saved_last_analyzed
107
125
  else:
108
126
  last_analyzed = self._get_initial_last_analyzed()
@@ -113,16 +131,20 @@ class _BatchWindow:
113
131
  def get_intervals(self) -> Iterator[_Interval]:
114
132
  """Generate the batch interval time ranges."""
115
133
  entered = False
134
+ last_analyzed = None
116
135
  # Iterate timestamp from start until timestamp <= stop - step
117
136
  # so that the last interval will end at (timestamp + step) <= stop.
118
137
  # Add 1 to stop - step to get <= and not <.
119
- for timestamp in range(self._start, self._stop - self._step + 1, self._step):
138
+ for timestamp in np.arange(
139
+ self._start, self._stop - self._step + 1, self._step
140
+ ):
120
141
  entered = True
121
142
  start_time = datetime.datetime.fromtimestamp(
122
143
  timestamp, tz=datetime.timezone.utc
123
144
  )
124
145
  end_time = datetime.datetime.fromtimestamp(
125
- timestamp + self._step, tz=datetime.timezone.utc
146
+ timestamp - self.TIMESTAMP_RESOLUTION_MICRO + self._step,
147
+ tz=datetime.timezone.utc,
126
148
  )
127
149
  yield _Interval(start_time, end_time)
128
150
 
@@ -134,6 +156,40 @@ class _BatchWindow:
134
156
  last_analyzed=last_analyzed,
135
157
  )
136
158
 
159
+ if self._endpoint_mode == mm_constants.EndpointMode.BATCH:
160
+ # If the endpoint is a batch endpoint, we need to update the last analyzed time
161
+ # to the end of the batch time.
162
+ if last_analyzed:
163
+ if last_analyzed - self.TIMESTAMP_RESOLUTION_MICRO < self._stop:
164
+ # If the last analyzed time is earlier than the stop time,
165
+ # yield the final partial interval from last_analyzed to stop
166
+ yield _Interval(
167
+ datetime.datetime.fromtimestamp(
168
+ last_analyzed, tz=datetime.timezone.utc
169
+ ),
170
+ datetime.datetime.fromtimestamp(
171
+ self._stop, tz=datetime.timezone.utc
172
+ ),
173
+ )
174
+ else:
175
+ # The time span between the start and end of the batch is shorter than the step,
176
+ # so we need to yield a partial interval covering that range.
177
+ yield _Interval(
178
+ datetime.datetime.fromtimestamp(
179
+ self._start, tz=datetime.timezone.utc
180
+ ),
181
+ datetime.datetime.fromtimestamp(
182
+ self._stop, tz=datetime.timezone.utc
183
+ ),
184
+ )
185
+
186
+ self._update_last_analyzed(last_analyzed=self._stop)
187
+ logger.debug(
188
+ "Updated the last analyzed time for this endpoint and application to the end of the batch time",
189
+ application=self._application,
190
+ last_analyzed=self._stop,
191
+ )
192
+
137
193
  if not entered:
138
194
  logger.debug(
139
195
  "All the data is set, but no complete intervals were found. "
@@ -178,33 +234,37 @@ class _BatchWindowGenerator(AbstractContextManager):
178
234
  def get_application_list(self) -> set[str]:
179
235
  return self._schedules_file.get_application_list()
180
236
 
181
- def get_min_last_analyzed(self) -> Optional[int]:
237
+ def get_min_last_analyzed(self) -> Optional[float]:
182
238
  return self._schedules_file.get_min_timestamp()
183
239
 
184
240
  @classmethod
185
241
  def _get_last_updated_time(
186
- cls, last_request: datetime.datetime, not_batch_endpoint: bool
187
- ) -> int:
242
+ cls,
243
+ last_request: datetime.datetime,
244
+ endpoint_mode: mm_constants.EndpointMode,
245
+ not_old_batch_endpoint: bool,
246
+ ) -> float:
188
247
  """
189
248
  Get the last updated time of a model endpoint.
190
249
  """
191
- last_updated = int(
192
- last_request.timestamp()
193
- - cast(
250
+
251
+ if endpoint_mode == mm_constants.EndpointMode.REAL_TIME:
252
+ last_updated = last_request.timestamp() - cast(
194
253
  float,
195
254
  mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs,
196
255
  )
197
- )
198
- if not not_batch_endpoint:
199
- # If the endpoint does not have a stream, `last_updated` should be
200
- # the minimum between the current time and the last updated time.
201
- # This compensates for the bumping mechanism - see
202
- # `update_model_endpoint_last_request`.
203
- last_updated = min(int(datetime_now().timestamp()), last_updated)
204
- logger.debug(
205
- "The endpoint does not have a stream", last_updated=last_updated
206
- )
207
- return last_updated
256
+ if not not_old_batch_endpoint:
257
+ # If the endpoint does not have a stream, `last_updated` should be
258
+ # the minimum between the current time and the last updated time.
259
+ # This compensates for the bumping mechanism - see
260
+ # `update_model_endpoint_last_request`.
261
+ last_updated = min(datetime_now().timestamp(), last_updated)
262
+ logger.debug(
263
+ "The endpoint does not have a stream", last_updated=last_updated
264
+ )
265
+
266
+ return last_updated
267
+ return last_request.timestamp()
208
268
 
209
269
  def get_intervals(
210
270
  self,
@@ -212,19 +272,24 @@ class _BatchWindowGenerator(AbstractContextManager):
212
272
  application: str,
213
273
  first_request: datetime.datetime,
214
274
  last_request: datetime.datetime,
215
- not_batch_endpoint: bool,
275
+ endpoint_mode: mm_constants.EndpointMode,
276
+ not_old_batch_endpoint: bool,
216
277
  ) -> Iterator[_Interval]:
217
278
  """
218
279
  Get the batch window for a specific endpoint and application.
219
280
  `first_request` and `last_request` are the timestamps of the first request and last
220
281
  request to the endpoint, respectively. They are guaranteed to be nonempty at this point.
221
282
  """
283
+
222
284
  self.batch_window = _BatchWindow(
223
285
  schedules_file=self._schedules_file,
224
286
  application=application,
225
287
  timedelta_seconds=self._timedelta,
226
- last_updated=self._get_last_updated_time(last_request, not_batch_endpoint),
227
- first_request=int(first_request.timestamp()),
288
+ last_updated=self._get_last_updated_time(
289
+ last_request, endpoint_mode, not_old_batch_endpoint
290
+ ),
291
+ first_request=first_request.timestamp(),
292
+ endpoint_mode=endpoint_mode,
228
293
  )
229
294
  yield from self.batch_window.get_intervals()
230
295
 
@@ -282,9 +347,9 @@ class MonitoringApplicationController:
282
347
  mlrun.platforms.iguazio.KafkaOutputStream,
283
348
  ],
284
349
  ] = {}
285
- self.feature_sets: OrderedDict[str, mlrun.feature_store.FeatureSet] = (
286
- collections.OrderedDict()
287
- )
350
+ self.feature_sets: collections.OrderedDict[
351
+ str, mlrun.feature_store.FeatureSet
352
+ ] = collections.OrderedDict()
288
353
  self.tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
289
354
  project=self.project
290
355
  )
@@ -394,15 +459,14 @@ class MonitoringApplicationController:
394
459
  base_period_minutes, current_min_last_analyzed, current_time
395
460
  )
396
461
  and (
397
- int(endpoint.status.last_request.timestamp())
398
- != last_timestamp_sent
462
+ endpoint.status.last_request.timestamp() != last_timestamp_sent
399
463
  or current_min_last_analyzed != last_analyzed_sent
400
464
  )
401
465
  ):
402
466
  # Write to schedule chief file the last_request, min_last_analyzed we pushed event to stream
403
467
  schedules_file.update_endpoint_timestamps(
404
468
  endpoint_uid=endpoint.metadata.uid,
405
- last_request=int(endpoint.status.last_request.timestamp()),
469
+ last_request=endpoint.status.last_request.timestamp(),
406
470
  last_analyzed=current_min_last_analyzed,
407
471
  )
408
472
  return True
@@ -428,7 +492,7 @@ class MonitoringApplicationController:
428
492
  @staticmethod
429
493
  def _should_send_nop_event(
430
494
  base_period_minutes: int,
431
- min_last_analyzed: int,
495
+ min_last_analyzed: float,
432
496
  current_time: datetime.datetime,
433
497
  ):
434
498
  if min_last_analyzed:
@@ -477,24 +541,71 @@ class MonitoringApplicationController:
477
541
  try:
478
542
  project_name = event[ControllerEvent.PROJECT]
479
543
  endpoint_id = event[ControllerEvent.ENDPOINT_ID]
480
- endpoint_name = event[ControllerEvent.ENDPOINT_NAME]
481
- applications_names = event[ControllerEvent.ENDPOINT_POLICY][
482
- ControllerEventEndpointPolicy.MONITORING_APPLICATIONS
483
- ]
544
+ not_old_batch_endpoint = True
545
+ if (
546
+ event[ControllerEvent.KIND]
547
+ == mm_constants.ControllerEventKind.BATCH_COMPLETE
548
+ ):
549
+ monitoring_functions = (
550
+ self.project_obj.list_model_monitoring_functions()
551
+ )
552
+ if monitoring_functions:
553
+ applications_names = list(
554
+ {app.metadata.name for app in monitoring_functions}
555
+ )
556
+ last_stream_timestamp = datetime.datetime.fromisoformat(
557
+ event[ControllerEvent.LAST_TIMESTAMP]
558
+ )
559
+ first_request = datetime.datetime.fromisoformat(
560
+ event[ControllerEvent.FIRST_TIMESTAMP]
561
+ )
562
+ endpoint_mode = mm_constants.EndpointMode.BATCH
563
+ model_endpoint = self.project_obj.list_model_endpoints(
564
+ uids=[endpoint_id],
565
+ latest_only=True,
566
+ ).endpoints
484
567
 
485
- not_batch_endpoint = (
486
- event[ControllerEvent.ENDPOINT_TYPE] != EndpointType.BATCH_EP
487
- )
568
+ if not model_endpoint:
569
+ logger.error(
570
+ "Batch model endpoint not found",
571
+ endpoint_id=endpoint_id,
572
+ project=project_name,
573
+ )
574
+ return
575
+
576
+ endpoint_name = model_endpoint[0].metadata.name
577
+ endpoint_updated = model_endpoint[0].metadata.updated.isoformat()
578
+
579
+ else:
580
+ logger.info("No monitoring functions found", project=self.project)
581
+ return
582
+
583
+ else:
584
+ endpoint_name = event[ControllerEvent.ENDPOINT_NAME]
585
+ applications_names = event[ControllerEvent.ENDPOINT_POLICY][
586
+ ControllerEventEndpointPolicy.MONITORING_APPLICATIONS
587
+ ]
588
+ last_stream_timestamp = datetime.datetime.fromisoformat(
589
+ event[ControllerEvent.TIMESTAMP]
590
+ )
591
+ first_request = datetime.datetime.fromisoformat(
592
+ event[ControllerEvent.FIRST_REQUEST]
593
+ )
594
+
595
+ endpoint_updated = event[ControllerEvent.ENDPOINT_POLICY][
596
+ ControllerEventEndpointPolicy.ENDPOINT_UPDATED
597
+ ]
598
+
599
+ endpoint_mode = mm_constants.EndpointMode.REAL_TIME
600
+
601
+ not_old_batch_endpoint = (
602
+ event[ControllerEvent.ENDPOINT_TYPE] != EndpointType.BATCH_EP
603
+ )
488
604
 
489
605
  logger.info(
490
- "Starting analyzing for", timestamp=event[ControllerEvent.TIMESTAMP]
491
- )
492
- last_stream_timestamp = datetime.datetime.fromisoformat(
493
- event[ControllerEvent.TIMESTAMP]
494
- )
495
- first_request = datetime.datetime.fromisoformat(
496
- event[ControllerEvent.FIRST_REQUEST]
606
+ "Starting to analyze", timestamp=last_stream_timestamp.isoformat()
497
607
  )
608
+
498
609
  with _BatchWindowGenerator(
499
610
  project=project_name,
500
611
  endpoint_id=endpoint_id,
@@ -506,12 +617,13 @@ class MonitoringApplicationController:
506
617
  end_infer_time,
507
618
  ) in batch_window_generator.get_intervals(
508
619
  application=application,
509
- not_batch_endpoint=not_batch_endpoint,
510
620
  first_request=first_request,
511
621
  last_request=last_stream_timestamp,
622
+ endpoint_mode=endpoint_mode,
623
+ not_old_batch_endpoint=not_old_batch_endpoint,
512
624
  ):
513
625
  data_in_window = False
514
- if not_batch_endpoint:
626
+ if not_old_batch_endpoint:
515
627
  # Serving endpoint - get the relevant window data from the TSDB
516
628
  prediction_metric = self.tsdb_connector.read_predictions(
517
629
  start=start_infer_time,
@@ -521,6 +633,16 @@ class MonitoringApplicationController:
521
633
  if prediction_metric.data:
522
634
  data_in_window = True
523
635
  else:
636
+ # Old batch endpoint - get the relevant window data from the parquet target
637
+ warnings.warn(
638
+ "Analyzing batch model endpoints with real time processing events is "
639
+ "deprecated in 1.10.0 and will be removed in 1.12.0. "
640
+ "Instead, use job-based serving to invoke and analyze offline batch model"
641
+ "endpoints.",
642
+ # TODO: Remove this in 1.12.0
643
+ FutureWarning,
644
+ )
645
+
524
646
  if endpoint_id not in self.feature_sets:
525
647
  self.feature_sets[endpoint_id] = fstore.get_feature_set(
526
648
  event[ControllerEvent.FEATURE_SET_URI]
@@ -533,7 +655,6 @@ class MonitoringApplicationController:
533
655
  self.feature_sets.popitem(last=True)
534
656
  m_fs = self.feature_sets.get(endpoint_id)
535
657
 
536
- # Batch endpoint - get the relevant window data from the parquet target
537
658
  df = m_fs.to_dataframe(
538
659
  start_time=start_infer_time,
539
660
  end_time=end_infer_time,
@@ -542,6 +663,7 @@ class MonitoringApplicationController:
542
663
  )
543
664
  if len(df) > 0:
544
665
  data_in_window = True
666
+
545
667
  if not data_in_window:
546
668
  logger.info(
547
669
  "No data found for the given interval",
@@ -557,56 +679,60 @@ class MonitoringApplicationController:
557
679
  endpoint_id=endpoint_id,
558
680
  )
559
681
  self._push_to_applications(
560
- start_infer_time=start_infer_time,
682
+ start_infer_time=start_infer_time
683
+ - datetime.timedelta(
684
+ batch_window_generator.batch_window.TIMESTAMP_RESOLUTION_MICRO
685
+ ), # We subtract a microsecond to ensure that the apps will retrieve start time data.
561
686
  end_infer_time=end_infer_time,
562
687
  endpoint_id=endpoint_id,
563
688
  endpoint_name=endpoint_name,
564
689
  project=project_name,
565
690
  applications_names=[application],
566
691
  model_monitoring_access_key=self.model_monitoring_access_key,
567
- endpoint_updated=event[ControllerEvent.ENDPOINT_POLICY][
568
- ControllerEventEndpointPolicy.ENDPOINT_UPDATED
569
- ],
692
+ endpoint_updated=endpoint_updated,
570
693
  )
571
- base_period = event[ControllerEvent.ENDPOINT_POLICY][
572
- ControllerEventEndpointPolicy.BASE_PERIOD
573
- ]
574
- current_time = mlrun.utils.datetime_now()
694
+
575
695
  if (
576
- self._should_send_nop_event(
696
+ event[ControllerEvent.KIND]
697
+ == mm_constants.ControllerEventKind.REGULAR_EVENT
698
+ ):
699
+ base_period = event[ControllerEvent.ENDPOINT_POLICY][
700
+ ControllerEventEndpointPolicy.BASE_PERIOD
701
+ ]
702
+ current_time = mlrun.utils.datetime_now()
703
+ if self._should_send_nop_event(
577
704
  base_period,
578
705
  batch_window_generator.get_min_last_analyzed(),
579
706
  current_time,
580
- )
581
- and event[ControllerEvent.KIND] != ControllerEventKind.NOP_EVENT
582
- ):
583
- event = {
584
- ControllerEvent.KIND: mm_constants.ControllerEventKind.NOP_EVENT,
585
- ControllerEvent.PROJECT: project_name,
586
- ControllerEvent.ENDPOINT_ID: endpoint_id,
587
- ControllerEvent.ENDPOINT_NAME: endpoint_name,
588
- ControllerEvent.TIMESTAMP: current_time.isoformat(
589
- timespec="microseconds"
590
- ),
591
- ControllerEvent.ENDPOINT_POLICY: event[
592
- ControllerEvent.ENDPOINT_POLICY
593
- ],
594
- ControllerEvent.ENDPOINT_TYPE: event[
595
- ControllerEvent.ENDPOINT_TYPE
596
- ],
597
- ControllerEvent.FEATURE_SET_URI: event[
598
- ControllerEvent.FEATURE_SET_URI
599
- ],
600
- ControllerEvent.FIRST_REQUEST: event[
601
- ControllerEvent.FIRST_REQUEST
602
- ],
603
- }
604
- self._push_to_main_stream(
605
- event=event,
606
- endpoint_id=endpoint_id,
607
- )
707
+ ):
708
+ event = {
709
+ ControllerEvent.KIND: mm_constants.ControllerEventKind.NOP_EVENT,
710
+ ControllerEvent.PROJECT: project_name,
711
+ ControllerEvent.ENDPOINT_ID: endpoint_id,
712
+ ControllerEvent.ENDPOINT_NAME: endpoint_name,
713
+ ControllerEvent.TIMESTAMP: current_time.isoformat(
714
+ timespec="microseconds"
715
+ ),
716
+ ControllerEvent.ENDPOINT_POLICY: event[
717
+ ControllerEvent.ENDPOINT_POLICY
718
+ ],
719
+ ControllerEvent.ENDPOINT_TYPE: event[
720
+ ControllerEvent.ENDPOINT_TYPE
721
+ ],
722
+ ControllerEvent.FEATURE_SET_URI: event[
723
+ ControllerEvent.FEATURE_SET_URI
724
+ ],
725
+ ControllerEvent.FIRST_REQUEST: event[
726
+ ControllerEvent.FIRST_REQUEST
727
+ ],
728
+ }
729
+ self._push_to_main_stream(
730
+ event=event,
731
+ endpoint_id=endpoint_id,
732
+ )
608
733
  logger.info(
609
- "Finish analyze for", timestamp=event[ControllerEvent.TIMESTAMP]
734
+ "Finish analyze for",
735
+ timestamp=last_stream_timestamp,
610
736
  )
611
737
 
612
738
  except Exception:
@@ -674,7 +800,18 @@ class MonitoringApplicationController:
674
800
  """
675
801
  logger.info("Starting monitoring controller chief")
676
802
  applications_names = []
677
- endpoints = self.project_obj.list_model_endpoints(tsdb_metrics=False).endpoints
803
+ endpoints = self.project_obj.list_model_endpoints(
804
+ tsdb_metrics=False,
805
+ modes=[
806
+ mm_constants.EndpointMode.REAL_TIME,
807
+ mm_constants.EndpointMode.BATCH_LEGACY,
808
+ ],
809
+ ).endpoints
810
+
811
+ if not endpoints:
812
+ logger.info("No model endpoints found", project=self.project)
813
+ return
814
+
678
815
  last_request_dict = self.tsdb_connector.get_last_request(
679
816
  endpoint_ids=[mep.metadata.uid for mep in endpoints]
680
817
  )
@@ -683,9 +820,6 @@ class MonitoringApplicationController:
683
820
  mm_constants.EventFieldType.ENDPOINT_ID
684
821
  )[mm_constants.ModelEndpointSchema.LAST_REQUEST].to_dict()
685
822
 
686
- if not endpoints:
687
- logger.info("No model endpoints found", project=self.project)
688
- return
689
823
  monitoring_functions = self.project_obj.list_model_monitoring_functions()
690
824
  if monitoring_functions:
691
825
  # if monitoring_functions: - TODO : ML-7700
@@ -731,7 +865,11 @@ class MonitoringApplicationController:
731
865
  for endpoint in endpoints:
732
866
  last_request = last_request_dict.get(endpoint.metadata.uid, None)
733
867
  if isinstance(last_request, float):
734
- last_request = pd.to_datetime(last_request, unit="s", utc=True)
868
+ last_request = datetime.datetime.fromtimestamp(
869
+ last_request, tz=datetime.timezone.utc
870
+ )
871
+ elif isinstance(last_request, pd.Timestamp):
872
+ last_request = last_request.to_pydatetime()
735
873
  endpoint.status.last_request = (
736
874
  last_request or endpoint.status.last_request
737
875
  )