mlrun 1.10.0rc14__py3-none-any.whl → 1.10.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/base.py +0 -31
- mlrun/artifacts/llm_prompt.py +6 -0
- mlrun/artifacts/manager.py +0 -5
- mlrun/common/constants.py +1 -0
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/functions.py +1 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -0
- mlrun/common/schemas/workflow.py +2 -0
- mlrun/config.py +1 -1
- mlrun/datastore/model_provider/model_provider.py +42 -14
- mlrun/datastore/model_provider/openai_provider.py +96 -15
- mlrun/db/base.py +20 -0
- mlrun/db/httpdb.py +64 -9
- mlrun/db/nopdb.py +13 -0
- mlrun/launcher/local.py +13 -0
- mlrun/model_monitoring/__init__.py +1 -0
- mlrun/model_monitoring/applications/base.py +176 -20
- mlrun/model_monitoring/db/_schedules.py +84 -24
- mlrun/model_monitoring/db/tsdb/base.py +72 -1
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +7 -1
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +37 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +25 -0
- mlrun/model_monitoring/helpers.py +26 -4
- mlrun/projects/pipelines.py +44 -24
- mlrun/projects/project.py +26 -7
- mlrun/runtimes/daskjob.py +6 -0
- mlrun/runtimes/mpijob/abstract.py +6 -0
- mlrun/runtimes/mpijob/v1.py +6 -0
- mlrun/runtimes/nuclio/application/application.py +2 -0
- mlrun/runtimes/nuclio/function.py +6 -0
- mlrun/runtimes/nuclio/serving.py +12 -11
- mlrun/runtimes/pod.py +21 -0
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/runtimes/utils.py +0 -2
- mlrun/serving/server.py +122 -53
- mlrun/serving/states.py +128 -44
- mlrun/serving/system_steps.py +84 -58
- mlrun/utils/helpers.py +82 -12
- mlrun/utils/retryer.py +15 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/METADATA +2 -7
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/RECORD +48 -48
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc14.dist-info → mlrun-1.10.0rc16.dist-info}/top_level.txt +0 -0
|
@@ -17,7 +17,7 @@ import socket
|
|
|
17
17
|
from abc import ABC, abstractmethod
|
|
18
18
|
from collections import defaultdict
|
|
19
19
|
from collections.abc import Iterator
|
|
20
|
-
from contextlib import contextmanager
|
|
20
|
+
from contextlib import contextmanager, nullcontext
|
|
21
21
|
from datetime import datetime, timedelta
|
|
22
22
|
from typing import Any, Literal, Optional, Union, cast
|
|
23
23
|
|
|
@@ -31,6 +31,7 @@ import mlrun.errors
|
|
|
31
31
|
import mlrun.model_monitoring.api as mm_api
|
|
32
32
|
import mlrun.model_monitoring.applications.context as mm_context
|
|
33
33
|
import mlrun.model_monitoring.applications.results as mm_results
|
|
34
|
+
import mlrun.model_monitoring.db._schedules as mm_schedules
|
|
34
35
|
import mlrun.model_monitoring.helpers as mm_helpers
|
|
35
36
|
from mlrun.serving.utils import MonitoringApplicationToDict
|
|
36
37
|
from mlrun.utils import logger
|
|
@@ -183,14 +184,27 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
183
184
|
cls,
|
|
184
185
|
*,
|
|
185
186
|
write_output: bool,
|
|
187
|
+
application_name: str,
|
|
188
|
+
artifact_path: str,
|
|
186
189
|
stream_profile: Optional[ds_profile.DatastoreProfile],
|
|
187
190
|
project: "mlrun.MlrunProject",
|
|
188
|
-
) -> Iterator[
|
|
191
|
+
) -> Iterator[
|
|
192
|
+
tuple[
|
|
193
|
+
dict[str, list[tuple]],
|
|
194
|
+
Optional[mm_schedules.ModelMonitoringSchedulesFileApplication],
|
|
195
|
+
]
|
|
196
|
+
]:
|
|
189
197
|
endpoints_output: dict[str, list[tuple]] = defaultdict(list)
|
|
198
|
+
application_schedules = nullcontext()
|
|
190
199
|
if write_output:
|
|
191
200
|
cls._check_writer_is_up(project)
|
|
201
|
+
application_schedules = (
|
|
202
|
+
mm_schedules.ModelMonitoringSchedulesFileApplication(
|
|
203
|
+
artifact_path, application=application_name
|
|
204
|
+
)
|
|
205
|
+
)
|
|
192
206
|
try:
|
|
193
|
-
yield endpoints_output
|
|
207
|
+
yield endpoints_output, application_schedules.__enter__()
|
|
194
208
|
finally:
|
|
195
209
|
if write_output:
|
|
196
210
|
logger.debug(
|
|
@@ -218,6 +232,12 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
218
232
|
endpoints_output=endpoints_output,
|
|
219
233
|
)
|
|
220
234
|
|
|
235
|
+
logger.debug(
|
|
236
|
+
"Saving the application schedules",
|
|
237
|
+
application_name=application_name,
|
|
238
|
+
)
|
|
239
|
+
application_schedules.__exit__(None, None, None)
|
|
240
|
+
|
|
221
241
|
def _handler(
|
|
222
242
|
self,
|
|
223
243
|
context: "mlrun.MLClientCtx",
|
|
@@ -230,6 +250,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
230
250
|
end: Optional[str] = None,
|
|
231
251
|
base_period: Optional[int] = None,
|
|
232
252
|
write_output: bool = False,
|
|
253
|
+
allow_unordered_data: bool = False,
|
|
233
254
|
stream_profile: Optional[ds_profile.DatastoreProfile] = None,
|
|
234
255
|
):
|
|
235
256
|
"""
|
|
@@ -250,6 +271,8 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
250
271
|
"working with endpoints, without any custom data-frame input"
|
|
251
272
|
)
|
|
252
273
|
|
|
274
|
+
application_name = self.__class__.__name__
|
|
275
|
+
|
|
253
276
|
feature_stats = (
|
|
254
277
|
mm_api.get_sample_set_statistics(reference_data)
|
|
255
278
|
if reference_data is not None
|
|
@@ -257,8 +280,12 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
257
280
|
)
|
|
258
281
|
|
|
259
282
|
with self._push_to_writer(
|
|
260
|
-
write_output=write_output,
|
|
261
|
-
|
|
283
|
+
write_output=write_output,
|
|
284
|
+
stream_profile=stream_profile,
|
|
285
|
+
application_name=application_name,
|
|
286
|
+
artifact_path=context.artifact_path,
|
|
287
|
+
project=project,
|
|
288
|
+
) as (endpoints_output, application_schedules):
|
|
262
289
|
|
|
263
290
|
def call_do_tracking(event: Optional[dict] = None):
|
|
264
291
|
nonlocal endpoints_output
|
|
@@ -268,7 +295,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
268
295
|
monitoring_context = (
|
|
269
296
|
mm_context.MonitoringApplicationContext._from_ml_ctx(
|
|
270
297
|
event=event,
|
|
271
|
-
application_name=
|
|
298
|
+
application_name=application_name,
|
|
272
299
|
context=context,
|
|
273
300
|
project=project,
|
|
274
301
|
sample_df=sample_data,
|
|
@@ -285,10 +312,16 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
285
312
|
resolved_endpoints = self._handle_endpoints_type_evaluate(
|
|
286
313
|
project=project, endpoints=endpoints
|
|
287
314
|
)
|
|
288
|
-
for
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
315
|
+
for endpoint_name, endpoint_id in resolved_endpoints:
|
|
316
|
+
for window_start, window_end in self._window_generator(
|
|
317
|
+
start=start,
|
|
318
|
+
end=end,
|
|
319
|
+
base_period=base_period,
|
|
320
|
+
application_schedules=application_schedules,
|
|
321
|
+
endpoint_id=endpoint_id,
|
|
322
|
+
application_name=application_name,
|
|
323
|
+
allow_unordered_data=allow_unordered_data,
|
|
324
|
+
):
|
|
292
325
|
result = call_do_tracking(
|
|
293
326
|
event={
|
|
294
327
|
mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
|
|
@@ -370,8 +403,103 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
370
403
|
)
|
|
371
404
|
|
|
372
405
|
@staticmethod
|
|
406
|
+
def _validate_and_get_window_length(
|
|
407
|
+
*, base_period: int, start_dt: datetime, end_dt: datetime
|
|
408
|
+
) -> timedelta:
|
|
409
|
+
if not isinstance(base_period, int) or base_period <= 0:
|
|
410
|
+
raise mlrun.errors.MLRunValueError(
|
|
411
|
+
"`base_period` must be a nonnegative integer - the number of minutes in a monitoring window"
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
window_length = timedelta(minutes=base_period)
|
|
415
|
+
|
|
416
|
+
full_interval_length = end_dt - start_dt
|
|
417
|
+
remainder = full_interval_length % window_length
|
|
418
|
+
if remainder:
|
|
419
|
+
if full_interval_length < window_length:
|
|
420
|
+
extra_msg = (
|
|
421
|
+
"The `base_period` is longer than the difference between `end` and `start`: "
|
|
422
|
+
f"{full_interval_length}. Consider not specifying `base_period`."
|
|
423
|
+
)
|
|
424
|
+
else:
|
|
425
|
+
extra_msg = (
|
|
426
|
+
f"Consider changing the `end` time to `end`={end_dt - remainder}"
|
|
427
|
+
)
|
|
428
|
+
raise mlrun.errors.MLRunValueError(
|
|
429
|
+
"The difference between `end` and `start` must be a multiple of `base_period`: "
|
|
430
|
+
f"`base_period`={window_length}, `start`={start_dt}, `end`={end_dt}. "
|
|
431
|
+
f"{extra_msg}"
|
|
432
|
+
)
|
|
433
|
+
return window_length
|
|
434
|
+
|
|
435
|
+
@staticmethod
|
|
436
|
+
def _validate_monotonically_increasing_data(
|
|
437
|
+
*,
|
|
438
|
+
application_schedules: Optional[
|
|
439
|
+
mm_schedules.ModelMonitoringSchedulesFileApplication
|
|
440
|
+
],
|
|
441
|
+
endpoint_id: str,
|
|
442
|
+
start_dt: datetime,
|
|
443
|
+
end_dt: datetime,
|
|
444
|
+
base_period: Optional[int],
|
|
445
|
+
application_name: str,
|
|
446
|
+
allow_unordered_data: bool,
|
|
447
|
+
) -> datetime:
|
|
448
|
+
"""Make sure that the (app, endpoint) pair doesn't write output before the last analyzed window"""
|
|
449
|
+
if application_schedules:
|
|
450
|
+
last_analyzed = application_schedules.get_endpoint_last_analyzed(
|
|
451
|
+
endpoint_id
|
|
452
|
+
)
|
|
453
|
+
if last_analyzed:
|
|
454
|
+
if start_dt < last_analyzed:
|
|
455
|
+
if allow_unordered_data:
|
|
456
|
+
if last_analyzed < end_dt and base_period is None:
|
|
457
|
+
logger.warn(
|
|
458
|
+
"Setting the start time to last_analyzed since the original start time precedes "
|
|
459
|
+
"last_analyzed",
|
|
460
|
+
original_start=start_dt,
|
|
461
|
+
new_start=last_analyzed,
|
|
462
|
+
application_name=application_name,
|
|
463
|
+
endpoint_id=endpoint_id,
|
|
464
|
+
)
|
|
465
|
+
start_dt = last_analyzed
|
|
466
|
+
else:
|
|
467
|
+
raise mlrun.errors.MLRunValueError(
|
|
468
|
+
"The start time for the application and endpoint precedes the last analyzed time: "
|
|
469
|
+
f"{start_dt=}, {last_analyzed=}, {application_name=}, {endpoint_id=}. "
|
|
470
|
+
"Writing data out of order is not supported, and the start time could not be "
|
|
471
|
+
"dynamically reset, as last_analyzed is later than the given end time or that "
|
|
472
|
+
f"base_period was specified ({end_dt=}, {base_period=})."
|
|
473
|
+
)
|
|
474
|
+
else:
|
|
475
|
+
raise mlrun.errors.MLRunValueError(
|
|
476
|
+
"The start time for the application and endpoint precedes the last analyzed time: "
|
|
477
|
+
f"{start_dt=}, {last_analyzed=}, {application_name=}, {endpoint_id=}. "
|
|
478
|
+
"Writing data out of order is not supported. You should change the start time to "
|
|
479
|
+
f"'{last_analyzed}' or later."
|
|
480
|
+
)
|
|
481
|
+
else:
|
|
482
|
+
logger.debug(
|
|
483
|
+
"The application is running on the endpoint for the first time",
|
|
484
|
+
endpoint_id=endpoint_id,
|
|
485
|
+
start_dt=start_dt,
|
|
486
|
+
application_name=application_name,
|
|
487
|
+
)
|
|
488
|
+
return start_dt
|
|
489
|
+
|
|
490
|
+
@classmethod
|
|
373
491
|
def _window_generator(
|
|
374
|
-
|
|
492
|
+
cls,
|
|
493
|
+
*,
|
|
494
|
+
start: Optional[str],
|
|
495
|
+
end: Optional[str],
|
|
496
|
+
base_period: Optional[int],
|
|
497
|
+
application_schedules: Optional[
|
|
498
|
+
mm_schedules.ModelMonitoringSchedulesFileApplication
|
|
499
|
+
],
|
|
500
|
+
endpoint_id: str,
|
|
501
|
+
application_name: str,
|
|
502
|
+
allow_unordered_data: bool,
|
|
375
503
|
) -> Iterator[tuple[Optional[datetime], Optional[datetime]]]:
|
|
376
504
|
if start is None or end is None:
|
|
377
505
|
# A single window based on the `sample_data` input - see `_handler`.
|
|
@@ -381,20 +509,36 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
381
509
|
start_dt = datetime.fromisoformat(start)
|
|
382
510
|
end_dt = datetime.fromisoformat(end)
|
|
383
511
|
|
|
512
|
+
start_dt = cls._validate_monotonically_increasing_data(
|
|
513
|
+
application_schedules=application_schedules,
|
|
514
|
+
endpoint_id=endpoint_id,
|
|
515
|
+
start_dt=start_dt,
|
|
516
|
+
end_dt=end_dt,
|
|
517
|
+
base_period=base_period,
|
|
518
|
+
application_name=application_name,
|
|
519
|
+
allow_unordered_data=allow_unordered_data,
|
|
520
|
+
)
|
|
521
|
+
|
|
384
522
|
if base_period is None:
|
|
385
523
|
yield start_dt, end_dt
|
|
524
|
+
if application_schedules:
|
|
525
|
+
application_schedules.update_endpoint_last_analyzed(
|
|
526
|
+
endpoint_uid=endpoint_id, last_analyzed=end_dt
|
|
527
|
+
)
|
|
386
528
|
return
|
|
387
529
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
)
|
|
530
|
+
window_length = cls._validate_and_get_window_length(
|
|
531
|
+
base_period=base_period, start_dt=start_dt, end_dt=end_dt
|
|
532
|
+
)
|
|
392
533
|
|
|
393
|
-
window_length = timedelta(minutes=base_period)
|
|
394
534
|
current_start_time = start_dt
|
|
395
535
|
while current_start_time < end_dt:
|
|
396
536
|
current_end_time = min(current_start_time + window_length, end_dt)
|
|
397
537
|
yield current_start_time, current_end_time
|
|
538
|
+
if application_schedules:
|
|
539
|
+
application_schedules.update_endpoint_last_analyzed(
|
|
540
|
+
endpoint_uid=endpoint_id, last_analyzed=current_end_time
|
|
541
|
+
)
|
|
398
542
|
current_start_time = current_end_time
|
|
399
543
|
|
|
400
544
|
@classmethod
|
|
@@ -484,6 +628,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
484
628
|
* ``end``, ``datetime``
|
|
485
629
|
* ``base_period``, ``int``
|
|
486
630
|
* ``write_output``, ``bool``
|
|
631
|
+
* ``allow_unordered_data``, ``bool``
|
|
487
632
|
|
|
488
633
|
For Git sources, add the source archive to the returned job and change the handler:
|
|
489
634
|
|
|
@@ -567,6 +712,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
567
712
|
end: Optional[datetime] = None,
|
|
568
713
|
base_period: Optional[int] = None,
|
|
569
714
|
write_output: bool = False,
|
|
715
|
+
allow_unordered_data: bool = False,
|
|
570
716
|
stream_profile: Optional[ds_profile.DatastoreProfile] = None,
|
|
571
717
|
) -> "mlrun.RunObject":
|
|
572
718
|
"""
|
|
@@ -608,6 +754,8 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
608
754
|
:param start: The start time of the endpoint's data, not included.
|
|
609
755
|
If you want the model endpoint's data at ``start`` included, you need to subtract a
|
|
610
756
|
small ``datetime.timedelta`` from it.
|
|
757
|
+
Make sure to include the time zone when constructing `datetime.datetime` objects
|
|
758
|
+
manually.
|
|
611
759
|
:param end: The end time of the endpoint's data, included.
|
|
612
760
|
Please note: when ``start`` and ``end`` are set, they create a left-open time interval
|
|
613
761
|
("window") :math:`(\\operatorname{start}, \\operatorname{end}]` that excludes the
|
|
@@ -616,17 +764,24 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
616
764
|
taken in the window's data.
|
|
617
765
|
:param base_period: The window length in minutes. If ``None``, the whole window from ``start`` to ``end``
|
|
618
766
|
is taken. If an integer is specified, the application is run from ``start`` to ``end``
|
|
619
|
-
in ``base_period`` length windows
|
|
620
|
-
therefore may be shorter:
|
|
767
|
+
in ``base_period`` length windows:
|
|
621
768
|
:math:`(\\operatorname{start}, \\operatorname{start} + \\operatorname{base\\_period}],
|
|
622
769
|
(\\operatorname{start} + \\operatorname{base\\_period},
|
|
623
770
|
\\operatorname{start} + 2\\cdot\\operatorname{base\\_period}],
|
|
624
771
|
..., (\\operatorname{start} +
|
|
625
|
-
m\\cdot\\operatorname{base\\_period}, \\operatorname{end}]`,
|
|
626
|
-
where :math:`m` is
|
|
772
|
+
(m - 1)\\cdot\\operatorname{base\\_period}, \\operatorname{end}]`,
|
|
773
|
+
where :math:`m` is a positive integer and :math:`\\operatorname{end} =
|
|
774
|
+
\\operatorname{start} + m\\cdot\\operatorname{base\\_period}`.
|
|
775
|
+
Please note that the difference between ``end`` and ``start`` must be a multiple of
|
|
776
|
+
``base_period``.
|
|
627
777
|
:param write_output: Whether to write the results and metrics to the time-series DB. Can be ``True`` only
|
|
628
778
|
if ``endpoints`` are passed.
|
|
629
779
|
Note: the model monitoring infrastructure must be up for the writing to work.
|
|
780
|
+
:param allow_unordered_data: Relevant only when writing outputs to the database. When ``False``, and the
|
|
781
|
+
requested ``start`` time precedes the ``end`` time of a previous run that also
|
|
782
|
+
wrote to the database - an error is raised.
|
|
783
|
+
If ``True``, when the previously described situation occurs, the relevant time
|
|
784
|
+
window is cut so that it starts at the earliest possible time after ``start``.
|
|
630
785
|
:param stream_profile: The stream datastore profile. It should be provided only when running locally and
|
|
631
786
|
writing the outputs to the database (i.e., when both ``run_local`` and
|
|
632
787
|
``write_output`` are set to ``True``).
|
|
@@ -666,6 +821,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
666
821
|
params["end"] = end.isoformat() if isinstance(end, datetime) else end
|
|
667
822
|
params["base_period"] = base_period
|
|
668
823
|
params["write_output"] = write_output
|
|
824
|
+
params["allow_unordered_data"] = allow_unordered_data
|
|
669
825
|
if stream_profile:
|
|
670
826
|
if not run_local:
|
|
671
827
|
raise mlrun.errors.MLRunValueError(
|
|
@@ -13,25 +13,36 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
|
+
import sys
|
|
16
17
|
from abc import ABC, abstractmethod
|
|
17
18
|
from contextlib import AbstractContextManager
|
|
19
|
+
from datetime import datetime, timezone
|
|
18
20
|
from types import TracebackType
|
|
19
|
-
from typing import Final, Optional
|
|
21
|
+
from typing import TYPE_CHECKING, Final, Optional
|
|
20
22
|
|
|
21
23
|
import botocore.exceptions
|
|
22
24
|
|
|
25
|
+
import mlrun
|
|
23
26
|
import mlrun.common.schemas as schemas
|
|
24
27
|
import mlrun.errors
|
|
25
28
|
import mlrun.model_monitoring.helpers
|
|
29
|
+
import mlrun.utils.helpers
|
|
26
30
|
from mlrun.utils import logger
|
|
27
31
|
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
if sys.version_info >= (3, 11):
|
|
34
|
+
from typing import Self
|
|
35
|
+
else:
|
|
36
|
+
from typing_extensions import Self
|
|
37
|
+
|
|
28
38
|
|
|
29
39
|
class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
30
40
|
DEFAULT_SCHEDULES: Final = {}
|
|
31
41
|
INITIAL_CONTENT = json.dumps(DEFAULT_SCHEDULES)
|
|
32
42
|
ENCODING = "utf-8"
|
|
33
43
|
|
|
34
|
-
def __init__(self):
|
|
44
|
+
def __init__(self) -> None:
|
|
45
|
+
# `self._item` is the persistent version of the monitoring schedules.
|
|
35
46
|
self._item = self.get_data_item_object()
|
|
36
47
|
if self._item:
|
|
37
48
|
self._path = self._item.url
|
|
@@ -43,9 +54,16 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
|
43
54
|
self._open_schedules = False
|
|
44
55
|
|
|
45
56
|
@abstractmethod
|
|
46
|
-
def get_data_item_object(self) -> mlrun.DataItem:
|
|
57
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
47
58
|
pass
|
|
48
59
|
|
|
60
|
+
def _exists(self) -> bool:
|
|
61
|
+
"""Return whether the file exists or not"""
|
|
62
|
+
return (
|
|
63
|
+
self._fs is None # In-memory store
|
|
64
|
+
or self._fs.exists(self._path)
|
|
65
|
+
)
|
|
66
|
+
|
|
49
67
|
def create(self) -> None:
|
|
50
68
|
"""Create a schedules file with initial content - an empty dictionary"""
|
|
51
69
|
logger.debug("Creating model monitoring schedules file", path=self._item.url)
|
|
@@ -53,10 +71,7 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
|
53
71
|
|
|
54
72
|
def delete(self) -> None:
|
|
55
73
|
"""Delete schedules file if it exists"""
|
|
56
|
-
if (
|
|
57
|
-
self._fs is None # In-memory store
|
|
58
|
-
or self._fs.exists(self._path)
|
|
59
|
-
):
|
|
74
|
+
if self._exists():
|
|
60
75
|
logger.debug(
|
|
61
76
|
"Deleting model monitoring schedules file", path=self._item.url
|
|
62
77
|
)
|
|
@@ -100,7 +115,7 @@ class ModelMonitoringSchedulesFileBase(AbstractContextManager, ABC):
|
|
|
100
115
|
self._schedules = self.DEFAULT_SCHEDULES
|
|
101
116
|
self._open_schedules = False
|
|
102
117
|
|
|
103
|
-
def __enter__(self) -> "
|
|
118
|
+
def __enter__(self) -> "Self":
|
|
104
119
|
self._open()
|
|
105
120
|
return super().__enter__()
|
|
106
121
|
|
|
@@ -129,12 +144,11 @@ class ModelMonitoringSchedulesFileEndpoint(ModelMonitoringSchedulesFileBase):
|
|
|
129
144
|
:param project: The project name.
|
|
130
145
|
:param endpoint_id: The endpoint ID.
|
|
131
146
|
"""
|
|
132
|
-
# `self._item` is the persistent version of the monitoring schedules.
|
|
133
147
|
self._project = project
|
|
134
148
|
self._endpoint_id = endpoint_id
|
|
135
149
|
super().__init__()
|
|
136
150
|
|
|
137
|
-
def get_data_item_object(self) -> mlrun.DataItem:
|
|
151
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
138
152
|
return mlrun.model_monitoring.helpers.get_monitoring_schedules_endpoint_data(
|
|
139
153
|
project=self._project, endpoint_id=self._endpoint_id
|
|
140
154
|
)
|
|
@@ -179,7 +193,7 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
|
|
|
179
193
|
self._project = project
|
|
180
194
|
super().__init__()
|
|
181
195
|
|
|
182
|
-
def get_data_item_object(self) -> mlrun.DataItem:
|
|
196
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
183
197
|
return mlrun.model_monitoring.helpers.get_monitoring_schedules_chief_data(
|
|
184
198
|
project=self._project
|
|
185
199
|
)
|
|
@@ -216,22 +230,49 @@ class ModelMonitoringSchedulesFileChief(ModelMonitoringSchedulesFileBase):
|
|
|
216
230
|
return set(self._schedules.keys())
|
|
217
231
|
|
|
218
232
|
def get_or_create(self) -> None:
|
|
219
|
-
|
|
220
|
-
self._open()
|
|
221
|
-
except (
|
|
222
|
-
mlrun.errors.MLRunNotFoundError,
|
|
223
|
-
# Different errors are raised for S3 or local storage, see ML-8042
|
|
224
|
-
botocore.exceptions.ClientError,
|
|
225
|
-
FileNotFoundError,
|
|
226
|
-
):
|
|
233
|
+
if not self._exists():
|
|
227
234
|
self.create()
|
|
228
235
|
|
|
229
236
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
237
|
+
class ModelMonitoringSchedulesFileApplication(ModelMonitoringSchedulesFileBase):
|
|
238
|
+
def __init__(self, out_path: str, application: str) -> None:
|
|
239
|
+
self._out_path = out_path
|
|
240
|
+
self._application = application
|
|
241
|
+
super().__init__()
|
|
242
|
+
|
|
243
|
+
def get_data_item_object(self) -> "mlrun.DataItem":
|
|
244
|
+
return mlrun.model_monitoring.helpers.get_monitoring_schedules_user_application_data(
|
|
245
|
+
out_path=self._out_path, application=self._application
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def _open(self) -> None:
|
|
249
|
+
if not self._exists():
|
|
250
|
+
# Create the file when it is needed the first time
|
|
251
|
+
logger.info(
|
|
252
|
+
"Creating the application schedules file",
|
|
253
|
+
application=self._application,
|
|
254
|
+
path=self._path,
|
|
255
|
+
)
|
|
256
|
+
self.create()
|
|
257
|
+
super()._open()
|
|
258
|
+
|
|
259
|
+
def get_endpoint_last_analyzed(self, endpoint_uid: str) -> Optional[datetime]:
|
|
260
|
+
self._check_open_schedules()
|
|
261
|
+
if endpoint_uid in self._schedules:
|
|
262
|
+
return datetime.fromisoformat(self._schedules[endpoint_uid])
|
|
263
|
+
else:
|
|
264
|
+
return None
|
|
265
|
+
|
|
266
|
+
def update_endpoint_last_analyzed(
|
|
267
|
+
self, endpoint_uid: str, last_analyzed: datetime
|
|
268
|
+
) -> None:
|
|
269
|
+
self._check_open_schedules()
|
|
270
|
+
self._schedules[endpoint_uid] = last_analyzed.astimezone(
|
|
271
|
+
timezone.utc
|
|
272
|
+
).isoformat()
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _delete_folder(folder: str) -> None:
|
|
235
276
|
fs = mlrun.datastore.store_manager.object(folder).store.filesystem
|
|
236
277
|
if fs and fs.exists(folder):
|
|
237
278
|
logger.debug("Deleting model monitoring schedules folder", folder=folder)
|
|
@@ -240,3 +281,22 @@ def delete_model_monitoring_schedules_folder(project: str) -> None:
|
|
|
240
281
|
raise mlrun.errors.MLRunValueError(
|
|
241
282
|
"Cannot delete a folder without a file-system"
|
|
242
283
|
)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def delete_model_monitoring_schedules_folder(project: str) -> None:
|
|
287
|
+
"""Delete the model monitoring schedules folder of the project"""
|
|
288
|
+
folder = mlrun.model_monitoring.helpers._get_monitoring_schedules_folder_path(
|
|
289
|
+
project
|
|
290
|
+
)
|
|
291
|
+
_delete_folder(folder)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def delete_model_monitoring_schedules_user_folder(project: str) -> None:
|
|
295
|
+
"""Delete the user created schedules folder (created through `app.evaluate`)"""
|
|
296
|
+
out_path = mlrun.utils.helpers.template_artifact_path(
|
|
297
|
+
mlrun.mlconf.artifact_path, project=project
|
|
298
|
+
)
|
|
299
|
+
folder = mlrun.model_monitoring.helpers._get_monitoring_schedules_user_folder_path(
|
|
300
|
+
out_path
|
|
301
|
+
)
|
|
302
|
+
_delete_folder(folder)
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from abc import ABC, abstractmethod
|
|
16
|
-
from datetime import datetime
|
|
16
|
+
from datetime import datetime, timedelta
|
|
17
17
|
from typing import Callable, ClassVar, Literal, Optional, Union
|
|
18
18
|
|
|
19
19
|
import pandas as pd
|
|
@@ -80,6 +80,21 @@ class TSDBConnector(ABC):
|
|
|
80
80
|
:raise mlrun.errors.MLRunRuntimeError: If an error occurred while writing the event.
|
|
81
81
|
"""
|
|
82
82
|
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def get_drift_data(
|
|
85
|
+
self,
|
|
86
|
+
start: datetime,
|
|
87
|
+
end: datetime,
|
|
88
|
+
) -> mm_schemas.ModelEndpointDriftValues:
|
|
89
|
+
"""
|
|
90
|
+
Fetches drift counts per interval in the specified time range.
|
|
91
|
+
|
|
92
|
+
:param start: The start time of the query.
|
|
93
|
+
:param end: The end time of the query.
|
|
94
|
+
|
|
95
|
+
:return: A ModelEndpointDriftValues object containing drift data.
|
|
96
|
+
"""
|
|
97
|
+
|
|
83
98
|
@abstractmethod
|
|
84
99
|
def delete_tsdb_records(
|
|
85
100
|
self,
|
|
@@ -703,3 +718,59 @@ class TSDBConnector(ABC):
|
|
|
703
718
|
)
|
|
704
719
|
)
|
|
705
720
|
return {dict_key: metrics}
|
|
721
|
+
|
|
722
|
+
@staticmethod
|
|
723
|
+
def _prepare_aligned_start_end(
|
|
724
|
+
start: datetime, end: datetime
|
|
725
|
+
) -> tuple[datetime, datetime, str]:
|
|
726
|
+
delta = end - start
|
|
727
|
+
if delta <= timedelta(hours=6):
|
|
728
|
+
interval = "10m"
|
|
729
|
+
start = start.replace(
|
|
730
|
+
minute=start.minute // 10 * 10, second=0, microsecond=0
|
|
731
|
+
)
|
|
732
|
+
elif delta <= timedelta(hours=72):
|
|
733
|
+
interval = "1h"
|
|
734
|
+
start = start.replace(minute=0, second=0, microsecond=0)
|
|
735
|
+
else:
|
|
736
|
+
interval = "1d"
|
|
737
|
+
start = start.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
738
|
+
|
|
739
|
+
interval_map = {
|
|
740
|
+
"10m": timedelta(minutes=10),
|
|
741
|
+
"1h": timedelta(hours=1),
|
|
742
|
+
"1d": timedelta(days=1),
|
|
743
|
+
}
|
|
744
|
+
delta = end - start
|
|
745
|
+
interval_td = interval_map[interval]
|
|
746
|
+
end = start + (delta // interval_td) * interval_td
|
|
747
|
+
return start, end, interval
|
|
748
|
+
|
|
749
|
+
@staticmethod
|
|
750
|
+
def _df_to_drift_data(df: pd.DataFrame) -> mm_schemas.ModelEndpointDriftValues:
|
|
751
|
+
suspected_val = mm_schemas.constants.ResultStatusApp.potential_detection.value
|
|
752
|
+
detected_val = mm_schemas.constants.ResultStatusApp.detected.value
|
|
753
|
+
aggregated_df = (
|
|
754
|
+
df.groupby(["_wstart", f"max({mm_schemas.ResultData.RESULT_STATUS})"])
|
|
755
|
+
.size() # add size column for each interval x result-status combination
|
|
756
|
+
.unstack() # create a size column for each result-status
|
|
757
|
+
.reindex(
|
|
758
|
+
columns=[suspected_val, detected_val], fill_value=0
|
|
759
|
+
) # ensure both columns exists
|
|
760
|
+
.fillna(0)
|
|
761
|
+
.astype(int)
|
|
762
|
+
.rename(
|
|
763
|
+
columns={
|
|
764
|
+
suspected_val: "count_suspected",
|
|
765
|
+
detected_val: "count_detected",
|
|
766
|
+
}
|
|
767
|
+
)
|
|
768
|
+
)
|
|
769
|
+
values = list(
|
|
770
|
+
zip(
|
|
771
|
+
aggregated_df.index,
|
|
772
|
+
aggregated_df["count_suspected"],
|
|
773
|
+
aggregated_df["count_detected"],
|
|
774
|
+
)
|
|
775
|
+
)
|
|
776
|
+
return mm_schemas.ModelEndpointDriftValues(values=values)
|
|
@@ -165,6 +165,7 @@ class TDEngineSchema:
|
|
|
165
165
|
preform_agg_funcs_columns: Optional[list[str]] = None,
|
|
166
166
|
order_by: Optional[str] = None,
|
|
167
167
|
desc: Optional[bool] = None,
|
|
168
|
+
partition_by: Optional[str] = None,
|
|
168
169
|
) -> str:
|
|
169
170
|
if agg_funcs and not columns_to_filter:
|
|
170
171
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -176,7 +177,10 @@ class TDEngineSchema:
|
|
|
176
177
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
177
178
|
"`agg_funcs` must be provided when using interval"
|
|
178
179
|
)
|
|
179
|
-
|
|
180
|
+
if partition_by and not agg_funcs:
|
|
181
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
182
|
+
"`agg_funcs` must be provided when using partition by"
|
|
183
|
+
)
|
|
180
184
|
if sliding_window_step and not interval:
|
|
181
185
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
182
186
|
"`interval` must be provided when using sliding window"
|
|
@@ -232,6 +236,8 @@ class TDEngineSchema:
|
|
|
232
236
|
if isinstance(group_by, list):
|
|
233
237
|
group_by = ", ".join(group_by)
|
|
234
238
|
query.write(f" GROUP BY {group_by}")
|
|
239
|
+
if partition_by:
|
|
240
|
+
query.write(f" PARTITION BY {partition_by}")
|
|
235
241
|
if order_by:
|
|
236
242
|
desc = " DESC" if desc else ""
|
|
237
243
|
query.write(f" ORDER BY {order_by}{desc}")
|