mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +22 -2
- mlrun/artifacts/base.py +0 -31
- mlrun/artifacts/document.py +6 -1
- mlrun/artifacts/llm_prompt.py +123 -25
- mlrun/artifacts/manager.py +0 -5
- mlrun/artifacts/model.py +3 -3
- mlrun/common/constants.py +10 -1
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/model_monitoring/helpers.py +86 -0
- mlrun/common/schemas/__init__.py +3 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/hub.py +30 -18
- mlrun/common/schemas/model_monitoring/__init__.py +3 -0
- mlrun/common/schemas/model_monitoring/constants.py +30 -6
- mlrun/common/schemas/model_monitoring/functions.py +14 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
- mlrun/common/schemas/pipeline.py +1 -1
- mlrun/common/schemas/serving.py +3 -0
- mlrun/common/schemas/workflow.py +3 -1
- mlrun/common/secrets.py +22 -1
- mlrun/config.py +33 -11
- mlrun/datastore/__init__.py +11 -3
- mlrun/datastore/azure_blob.py +162 -47
- mlrun/datastore/datastore.py +9 -4
- mlrun/datastore/datastore_profile.py +61 -5
- mlrun/datastore/model_provider/huggingface_provider.py +363 -0
- mlrun/datastore/model_provider/mock_model_provider.py +87 -0
- mlrun/datastore/model_provider/model_provider.py +230 -65
- mlrun/datastore/model_provider/openai_provider.py +295 -42
- mlrun/datastore/s3.py +24 -2
- mlrun/datastore/storeytargets.py +2 -3
- mlrun/datastore/utils.py +15 -3
- mlrun/db/base.py +47 -19
- mlrun/db/httpdb.py +120 -56
- mlrun/db/nopdb.py +38 -10
- mlrun/execution.py +70 -19
- mlrun/hub/__init__.py +15 -0
- mlrun/hub/module.py +181 -0
- mlrun/k8s_utils.py +105 -16
- mlrun/launcher/base.py +13 -6
- mlrun/launcher/local.py +15 -0
- mlrun/model.py +24 -3
- mlrun/model_monitoring/__init__.py +1 -0
- mlrun/model_monitoring/api.py +66 -27
- mlrun/model_monitoring/applications/__init__.py +1 -1
- mlrun/model_monitoring/applications/base.py +509 -117
- mlrun/model_monitoring/applications/context.py +2 -4
- mlrun/model_monitoring/applications/results.py +4 -7
- mlrun/model_monitoring/controller.py +239 -101
- mlrun/model_monitoring/db/_schedules.py +116 -33
- mlrun/model_monitoring/db/_stats.py +4 -3
- mlrun/model_monitoring/db/tsdb/base.py +100 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
- mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
- mlrun/model_monitoring/helpers.py +54 -9
- mlrun/model_monitoring/stream_processing.py +45 -14
- mlrun/model_monitoring/writer.py +220 -1
- mlrun/platforms/__init__.py +3 -2
- mlrun/platforms/iguazio.py +7 -3
- mlrun/projects/operations.py +6 -1
- mlrun/projects/pipelines.py +46 -26
- mlrun/projects/project.py +166 -58
- mlrun/run.py +94 -17
- mlrun/runtimes/__init__.py +18 -0
- mlrun/runtimes/base.py +14 -6
- mlrun/runtimes/daskjob.py +7 -0
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mounts.py +20 -2
- mlrun/runtimes/mpijob/abstract.py +6 -0
- mlrun/runtimes/mpijob/v1.py +6 -0
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +149 -17
- mlrun/runtimes/nuclio/function.py +76 -27
- mlrun/runtimes/nuclio/serving.py +97 -15
- mlrun/runtimes/pod.py +234 -21
- mlrun/runtimes/remotesparkjob.py +6 -0
- mlrun/runtimes/sparkjob/spark3job.py +6 -0
- mlrun/runtimes/utils.py +49 -11
- mlrun/secrets.py +54 -13
- mlrun/serving/__init__.py +2 -0
- mlrun/serving/remote.py +79 -6
- mlrun/serving/routers.py +23 -41
- mlrun/serving/server.py +320 -80
- mlrun/serving/states.py +725 -157
- mlrun/serving/steps.py +62 -0
- mlrun/serving/system_steps.py +200 -119
- mlrun/serving/v2_serving.py +9 -10
- mlrun/utils/helpers.py +288 -88
- mlrun/utils/logger.py +3 -1
- mlrun/utils/notifications/notification/base.py +18 -0
- mlrun/utils/notifications/notification/git.py +2 -4
- mlrun/utils/notifications/notification/slack.py +2 -4
- mlrun/utils/notifications/notification/webhook.py +2 -5
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/retryer.py +15 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0
|
@@ -17,25 +17,35 @@ import socket
|
|
|
17
17
|
from abc import ABC, abstractmethod
|
|
18
18
|
from collections import defaultdict
|
|
19
19
|
from collections.abc import Iterator
|
|
20
|
-
from contextlib import contextmanager
|
|
21
|
-
from datetime import datetime, timedelta
|
|
20
|
+
from contextlib import contextmanager, nullcontext
|
|
21
|
+
from datetime import datetime, timedelta, timezone
|
|
22
22
|
from typing import Any, Literal, Optional, Union, cast
|
|
23
23
|
|
|
24
24
|
import pandas as pd
|
|
25
25
|
|
|
26
26
|
import mlrun
|
|
27
27
|
import mlrun.common.constants as mlrun_constants
|
|
28
|
+
import mlrun.common.helpers
|
|
28
29
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
30
|
+
import mlrun.common.types
|
|
29
31
|
import mlrun.datastore.datastore_profile as ds_profile
|
|
30
32
|
import mlrun.errors
|
|
31
33
|
import mlrun.model_monitoring.api as mm_api
|
|
32
34
|
import mlrun.model_monitoring.applications.context as mm_context
|
|
33
35
|
import mlrun.model_monitoring.applications.results as mm_results
|
|
36
|
+
import mlrun.model_monitoring.db._schedules as mm_schedules
|
|
34
37
|
import mlrun.model_monitoring.helpers as mm_helpers
|
|
38
|
+
import mlrun.utils
|
|
35
39
|
from mlrun.serving.utils import MonitoringApplicationToDict
|
|
36
40
|
from mlrun.utils import logger
|
|
37
41
|
|
|
38
42
|
|
|
43
|
+
class ExistingDataHandling(mlrun.common.types.StrEnum):
|
|
44
|
+
fail_on_overlap = "fail_on_overlap"
|
|
45
|
+
skip_overlap = "skip_overlap"
|
|
46
|
+
delete_all = "delete_all"
|
|
47
|
+
|
|
48
|
+
|
|
39
49
|
def _serialize_context_and_result(
|
|
40
50
|
*,
|
|
41
51
|
context: mm_context.MonitoringApplicationContext,
|
|
@@ -183,16 +193,47 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
183
193
|
cls,
|
|
184
194
|
*,
|
|
185
195
|
write_output: bool,
|
|
196
|
+
application_name: str,
|
|
197
|
+
artifact_path: str,
|
|
186
198
|
stream_profile: Optional[ds_profile.DatastoreProfile],
|
|
187
199
|
project: "mlrun.MlrunProject",
|
|
188
|
-
) -> Iterator[
|
|
189
|
-
|
|
200
|
+
) -> Iterator[
|
|
201
|
+
tuple[
|
|
202
|
+
dict[str, list[tuple]],
|
|
203
|
+
Optional[mm_schedules.ModelMonitoringSchedulesFileApplication],
|
|
204
|
+
]
|
|
205
|
+
]:
|
|
206
|
+
endpoints_output: dict[
|
|
207
|
+
str,
|
|
208
|
+
list[
|
|
209
|
+
tuple[
|
|
210
|
+
mm_context.MonitoringApplicationContext,
|
|
211
|
+
Union[
|
|
212
|
+
mm_results.ModelMonitoringApplicationResult,
|
|
213
|
+
mm_results.ModelMonitoringApplicationMetric,
|
|
214
|
+
list[
|
|
215
|
+
Union[
|
|
216
|
+
mm_results.ModelMonitoringApplicationResult,
|
|
217
|
+
mm_results.ModelMonitoringApplicationMetric,
|
|
218
|
+
mm_results._ModelMonitoringApplicationStats,
|
|
219
|
+
]
|
|
220
|
+
],
|
|
221
|
+
],
|
|
222
|
+
]
|
|
223
|
+
],
|
|
224
|
+
] = defaultdict(list)
|
|
225
|
+
application_schedules = nullcontext()
|
|
190
226
|
if write_output:
|
|
191
227
|
cls._check_writer_is_up(project)
|
|
228
|
+
application_schedules = (
|
|
229
|
+
mm_schedules.ModelMonitoringSchedulesFileApplication(
|
|
230
|
+
artifact_path, application=application_name
|
|
231
|
+
)
|
|
232
|
+
)
|
|
192
233
|
try:
|
|
193
|
-
yield endpoints_output
|
|
234
|
+
yield endpoints_output, application_schedules.__enter__()
|
|
194
235
|
finally:
|
|
195
|
-
if write_output:
|
|
236
|
+
if write_output and any(endpoints_output.values()):
|
|
196
237
|
logger.debug(
|
|
197
238
|
"Pushing model monitoring application job data to the writer stream",
|
|
198
239
|
passed_stream_profile=str(stream_profile),
|
|
@@ -206,11 +247,21 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
206
247
|
profile=stream_profile,
|
|
207
248
|
)
|
|
208
249
|
for endpoint_id, outputs in endpoints_output.items():
|
|
250
|
+
writer_events = []
|
|
251
|
+
for ctx, res in outputs:
|
|
252
|
+
if isinstance(res, list):
|
|
253
|
+
writer_events.extend(
|
|
254
|
+
_serialize_context_and_result(
|
|
255
|
+
context=ctx, result=sub_res
|
|
256
|
+
)
|
|
257
|
+
for sub_res in res
|
|
258
|
+
)
|
|
259
|
+
else:
|
|
260
|
+
writer_events.append(
|
|
261
|
+
_serialize_context_and_result(context=ctx, result=res)
|
|
262
|
+
)
|
|
209
263
|
writer_stream.push(
|
|
210
|
-
|
|
211
|
-
_serialize_context_and_result(context=ctx, result=res)
|
|
212
|
-
for ctx, res in outputs
|
|
213
|
-
],
|
|
264
|
+
writer_events,
|
|
214
265
|
partition_key=endpoint_id,
|
|
215
266
|
)
|
|
216
267
|
logger.debug(
|
|
@@ -218,6 +269,20 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
218
269
|
endpoints_output=endpoints_output,
|
|
219
270
|
)
|
|
220
271
|
|
|
272
|
+
logger.debug(
|
|
273
|
+
"Saving the application schedules",
|
|
274
|
+
application_name=application_name,
|
|
275
|
+
)
|
|
276
|
+
application_schedules.__exit__(None, None, None)
|
|
277
|
+
|
|
278
|
+
@classmethod
|
|
279
|
+
def _get_application_name(cls, context: "mlrun.MLClientCtx") -> str:
|
|
280
|
+
"""Get the application name from the context via the function URI"""
|
|
281
|
+
_, application_name, _, _ = mlrun.common.helpers.parse_versioned_object_uri(
|
|
282
|
+
context.to_dict().get("spec", {}).get("function", "")
|
|
283
|
+
)
|
|
284
|
+
return application_name
|
|
285
|
+
|
|
221
286
|
def _handler(
|
|
222
287
|
self,
|
|
223
288
|
context: "mlrun.MLClientCtx",
|
|
@@ -230,6 +295,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
230
295
|
end: Optional[str] = None,
|
|
231
296
|
base_period: Optional[int] = None,
|
|
232
297
|
write_output: bool = False,
|
|
298
|
+
existing_data_handling: ExistingDataHandling = ExistingDataHandling.fail_on_overlap,
|
|
233
299
|
stream_profile: Optional[ds_profile.DatastoreProfile] = None,
|
|
234
300
|
):
|
|
235
301
|
"""
|
|
@@ -250,6 +316,8 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
250
316
|
"working with endpoints, without any custom data-frame input"
|
|
251
317
|
)
|
|
252
318
|
|
|
319
|
+
application_name = self._get_application_name(context)
|
|
320
|
+
|
|
253
321
|
feature_stats = (
|
|
254
322
|
mm_api.get_sample_set_statistics(reference_data)
|
|
255
323
|
if reference_data is not None
|
|
@@ -257,24 +325,18 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
257
325
|
)
|
|
258
326
|
|
|
259
327
|
with self._push_to_writer(
|
|
260
|
-
write_output=write_output,
|
|
261
|
-
|
|
328
|
+
write_output=write_output,
|
|
329
|
+
stream_profile=stream_profile,
|
|
330
|
+
application_name=application_name,
|
|
331
|
+
artifact_path=context.artifact_path,
|
|
332
|
+
project=project,
|
|
333
|
+
) as (endpoints_output, application_schedules):
|
|
262
334
|
|
|
263
|
-
def call_do_tracking(
|
|
335
|
+
def call_do_tracking(
|
|
336
|
+
monitoring_context: mm_context.MonitoringApplicationContext,
|
|
337
|
+
):
|
|
264
338
|
nonlocal endpoints_output
|
|
265
339
|
|
|
266
|
-
if event is None:
|
|
267
|
-
event = {}
|
|
268
|
-
monitoring_context = (
|
|
269
|
-
mm_context.MonitoringApplicationContext._from_ml_ctx(
|
|
270
|
-
event=event,
|
|
271
|
-
application_name=self.__class__.__name__,
|
|
272
|
-
context=context,
|
|
273
|
-
project=project,
|
|
274
|
-
sample_df=sample_data,
|
|
275
|
-
feature_stats=feature_stats,
|
|
276
|
-
)
|
|
277
|
-
)
|
|
278
340
|
result = self.do_tracking(monitoring_context)
|
|
279
341
|
endpoints_output[monitoring_context.endpoint_id].append(
|
|
280
342
|
(monitoring_context, result)
|
|
@@ -282,119 +344,383 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
282
344
|
return result
|
|
283
345
|
|
|
284
346
|
if endpoints is not None:
|
|
285
|
-
resolved_endpoints = self.
|
|
347
|
+
resolved_endpoints = self._normalize_and_validate_endpoints(
|
|
286
348
|
project=project, endpoints=endpoints
|
|
287
349
|
)
|
|
288
|
-
|
|
289
|
-
|
|
350
|
+
if (
|
|
351
|
+
write_output
|
|
352
|
+
and existing_data_handling == ExistingDataHandling.delete_all
|
|
290
353
|
):
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
354
|
+
endpoint_ids = [
|
|
355
|
+
endpoint_id for _, endpoint_id in resolved_endpoints
|
|
356
|
+
]
|
|
357
|
+
context.logger.info(
|
|
358
|
+
"Deleting all the application data before running the application",
|
|
359
|
+
application_name=application_name,
|
|
360
|
+
endpoint_ids=endpoint_ids,
|
|
361
|
+
)
|
|
362
|
+
self._delete_application_data(
|
|
363
|
+
project_name=project.name,
|
|
364
|
+
application_name=application_name,
|
|
365
|
+
endpoint_ids=endpoint_ids,
|
|
366
|
+
application_schedules=application_schedules,
|
|
367
|
+
)
|
|
368
|
+
for endpoint_name, endpoint_id in resolved_endpoints:
|
|
369
|
+
for monitoring_ctx in self._window_generator(
|
|
370
|
+
start=start,
|
|
371
|
+
end=end,
|
|
372
|
+
base_period=base_period,
|
|
373
|
+
application_schedules=application_schedules,
|
|
374
|
+
endpoint_id=endpoint_id,
|
|
375
|
+
endpoint_name=endpoint_name,
|
|
376
|
+
application_name=application_name,
|
|
377
|
+
existing_data_handling=existing_data_handling,
|
|
378
|
+
sample_data=sample_data,
|
|
379
|
+
context=context,
|
|
380
|
+
project=project,
|
|
381
|
+
):
|
|
382
|
+
result = call_do_tracking(monitoring_ctx)
|
|
300
383
|
result_key = (
|
|
301
|
-
f"{endpoint_name}-{endpoint_id}_{
|
|
302
|
-
if
|
|
384
|
+
f"{endpoint_name}-{endpoint_id}_{monitoring_ctx.start_infer_time.isoformat()}_{monitoring_ctx.end_infer_time.isoformat()}"
|
|
385
|
+
if monitoring_ctx.start_infer_time
|
|
386
|
+
and monitoring_ctx.end_infer_time
|
|
303
387
|
else f"{endpoint_name}-{endpoint_id}"
|
|
304
388
|
)
|
|
305
389
|
|
|
306
390
|
context.log_result(
|
|
307
391
|
result_key, self._flatten_data_result(result)
|
|
308
392
|
)
|
|
393
|
+
# Check if no result was produced for any endpoint (e.g., due to no data in all windows)
|
|
394
|
+
if not any(endpoints_output.values()):
|
|
395
|
+
context.logger.warning(
|
|
396
|
+
"No data was found for any of the specified endpoints. "
|
|
397
|
+
"No results were produced",
|
|
398
|
+
application_name=application_name,
|
|
399
|
+
endpoints=endpoints,
|
|
400
|
+
start=start,
|
|
401
|
+
end=end,
|
|
402
|
+
)
|
|
309
403
|
else:
|
|
310
|
-
|
|
404
|
+
result = call_do_tracking(
|
|
405
|
+
mm_context.MonitoringApplicationContext._from_ml_ctx(
|
|
406
|
+
context=context,
|
|
407
|
+
project=project,
|
|
408
|
+
application_name=application_name,
|
|
409
|
+
event={},
|
|
410
|
+
sample_df=sample_data,
|
|
411
|
+
feature_stats=feature_stats,
|
|
412
|
+
)
|
|
413
|
+
)
|
|
414
|
+
return self._flatten_data_result(result)
|
|
311
415
|
|
|
312
416
|
@staticmethod
|
|
313
|
-
def
|
|
417
|
+
def _check_endpoints_first_request(
|
|
418
|
+
endpoints: list[mlrun.common.schemas.ModelEndpoint],
|
|
419
|
+
) -> None:
|
|
420
|
+
"""Make sure that all the endpoints have had at least one request"""
|
|
421
|
+
endpoints_no_requests = [
|
|
422
|
+
(endpoint.metadata.name, endpoint.metadata.uid)
|
|
423
|
+
for endpoint in endpoints
|
|
424
|
+
if not endpoint.status.first_request
|
|
425
|
+
]
|
|
426
|
+
if endpoints_no_requests:
|
|
427
|
+
raise mlrun.errors.MLRunValueError(
|
|
428
|
+
"The following model endpoints have not had any requests yet and "
|
|
429
|
+
"have no data, cannot run the model monitoring application on them: "
|
|
430
|
+
f"{endpoints_no_requests}"
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
@classmethod
|
|
434
|
+
def _normalize_and_validate_endpoints(
|
|
435
|
+
cls,
|
|
314
436
|
project: "mlrun.MlrunProject",
|
|
315
437
|
endpoints: Union[
|
|
316
438
|
list[tuple[str, str]], list[list[str]], list[str], Literal["all"]
|
|
317
439
|
],
|
|
318
|
-
) ->
|
|
319
|
-
if
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
)
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
440
|
+
) -> list[tuple[str, str]]:
|
|
441
|
+
if isinstance(endpoints, list):
|
|
442
|
+
if all(
|
|
443
|
+
isinstance(endpoint, (tuple, list)) and len(endpoint) == 2
|
|
444
|
+
for endpoint in endpoints
|
|
445
|
+
):
|
|
446
|
+
# A list of [(name, uid), ...] / [[name, uid], ...] tuples/lists
|
|
447
|
+
endpoint_uids_to_names = {
|
|
448
|
+
endpoint[1]: endpoint[0] for endpoint in endpoints
|
|
449
|
+
}
|
|
450
|
+
endpoints_list = project.list_model_endpoints(
|
|
451
|
+
uids=list(endpoint_uids_to_names.keys()), latest_only=True
|
|
452
|
+
).endpoints
|
|
453
|
+
|
|
454
|
+
# Check for missing endpoint uids or name/uid mismatches
|
|
455
|
+
for endpoint in endpoints_list:
|
|
456
|
+
if (
|
|
457
|
+
endpoint_uids_to_names[cast(str, endpoint.metadata.uid)]
|
|
458
|
+
!= endpoint.metadata.name
|
|
459
|
+
):
|
|
460
|
+
raise mlrun.errors.MLRunNotFoundError(
|
|
461
|
+
"Could not find model endpoint with name "
|
|
462
|
+
f"'{endpoint_uids_to_names[cast(str, endpoint.metadata.uid)]}' "
|
|
463
|
+
f"and uid '{endpoint.metadata.uid}'"
|
|
464
|
+
)
|
|
465
|
+
missing = set(endpoint_uids_to_names.keys()) - {
|
|
466
|
+
cast(str, endpoint.metadata.uid) for endpoint in endpoints_list
|
|
467
|
+
}
|
|
468
|
+
if missing:
|
|
469
|
+
raise mlrun.errors.MLRunNotFoundError(
|
|
470
|
+
"Could not find model endpoints with the following uids: "
|
|
471
|
+
f"{missing}"
|
|
335
472
|
)
|
|
336
|
-
else:
|
|
337
|
-
raise mlrun.errors.MLRunValueError(
|
|
338
|
-
f"Could not resolve endpoints as list of [(name, uid)], {endpoints=}"
|
|
339
|
-
)
|
|
340
473
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
if endpoints_list:
|
|
350
|
-
list_endpoints_result = [
|
|
351
|
-
(endpoint.metadata.name, endpoint.metadata.uid)
|
|
352
|
-
for endpoint in endpoints_list
|
|
353
|
-
]
|
|
354
|
-
if endpoints != "all":
|
|
474
|
+
elif all(isinstance(endpoint, str) for endpoint in endpoints):
|
|
475
|
+
# A list of [name, ...] strings
|
|
476
|
+
endpoint_names = cast(list[str], endpoints)
|
|
477
|
+
endpoints_list = project.list_model_endpoints(
|
|
478
|
+
names=endpoint_names, latest_only=True
|
|
479
|
+
).endpoints
|
|
480
|
+
|
|
481
|
+
# Check for missing endpoint names
|
|
355
482
|
missing = set(endpoints) - {
|
|
356
|
-
endpoint
|
|
483
|
+
endpoint.metadata.name for endpoint in endpoints_list
|
|
357
484
|
}
|
|
358
485
|
if missing:
|
|
359
486
|
logger.warning(
|
|
360
487
|
"Could not list all the required endpoints",
|
|
361
|
-
|
|
362
|
-
|
|
488
|
+
missing_endpoints=missing,
|
|
489
|
+
endpoints_list=endpoints_list,
|
|
363
490
|
)
|
|
364
|
-
|
|
491
|
+
else:
|
|
492
|
+
raise mlrun.errors.MLRunValueError(
|
|
493
|
+
"Could not resolve the following list as a list of endpoints:\n"
|
|
494
|
+
f"{endpoints}\n"
|
|
495
|
+
"The list must be either a list of (name, uid) tuples/lists or a list of names."
|
|
496
|
+
)
|
|
497
|
+
elif endpoints == "all":
|
|
498
|
+
endpoints_list = project.list_model_endpoints(latest_only=True).endpoints
|
|
499
|
+
elif isinstance(endpoints, str):
|
|
500
|
+
raise mlrun.errors.MLRunValueError(
|
|
501
|
+
'A string input for `endpoints` can only be "all" for all the model endpoints in '
|
|
502
|
+
"the project. If you want to select a single model endpoint with the given name, "
|
|
503
|
+
f'use a list: `endpoints=["{endpoints}"]`.'
|
|
504
|
+
)
|
|
365
505
|
else:
|
|
366
|
-
|
|
367
|
-
|
|
506
|
+
raise mlrun.errors.MLRunValueError(
|
|
507
|
+
"Could not resolve the `endpoints` parameter. The parameter must be either:\n"
|
|
508
|
+
"- a list of (name, uid) tuples/lists\n"
|
|
509
|
+
"- a list of names\n"
|
|
510
|
+
'- the string "all" for all the model endpoints in the project.'
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
if not endpoints_list:
|
|
368
514
|
raise mlrun.errors.MLRunNotFoundError(
|
|
369
|
-
f"Did not find any model endpoints {
|
|
515
|
+
f"Did not find any model endpoints {endpoints=}"
|
|
370
516
|
)
|
|
371
517
|
|
|
518
|
+
cls._check_endpoints_first_request(endpoints_list)
|
|
519
|
+
|
|
520
|
+
return [
|
|
521
|
+
(endpoint.metadata.name, cast(str, endpoint.metadata.uid))
|
|
522
|
+
for endpoint in endpoints_list
|
|
523
|
+
]
|
|
524
|
+
|
|
372
525
|
@staticmethod
|
|
526
|
+
def _validate_and_get_window_length(
|
|
527
|
+
*, base_period: int, start_dt: datetime, end_dt: datetime
|
|
528
|
+
) -> timedelta:
|
|
529
|
+
if not isinstance(base_period, int) or base_period <= 0:
|
|
530
|
+
raise mlrun.errors.MLRunValueError(
|
|
531
|
+
"`base_period` must be a nonnegative integer - the number of minutes in a monitoring window"
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
window_length = timedelta(minutes=base_period)
|
|
535
|
+
|
|
536
|
+
full_interval_length = end_dt - start_dt
|
|
537
|
+
remainder = full_interval_length % window_length
|
|
538
|
+
if remainder:
|
|
539
|
+
if full_interval_length < window_length:
|
|
540
|
+
extra_msg = (
|
|
541
|
+
"The `base_period` is longer than the difference between `end` and `start`: "
|
|
542
|
+
f"{full_interval_length}. Consider not specifying `base_period`."
|
|
543
|
+
)
|
|
544
|
+
else:
|
|
545
|
+
extra_msg = (
|
|
546
|
+
f"Consider changing the `end` time to `end`={end_dt - remainder}"
|
|
547
|
+
)
|
|
548
|
+
raise mlrun.errors.MLRunValueError(
|
|
549
|
+
"The difference between `end` and `start` must be a multiple of `base_period`: "
|
|
550
|
+
f"`base_period`={window_length}, `start`={start_dt}, `end`={end_dt}. "
|
|
551
|
+
f"{extra_msg}"
|
|
552
|
+
)
|
|
553
|
+
return window_length
|
|
554
|
+
|
|
555
|
+
@staticmethod
|
|
556
|
+
def _validate_monotonically_increasing_data(
|
|
557
|
+
*,
|
|
558
|
+
application_schedules: Optional[
|
|
559
|
+
mm_schedules.ModelMonitoringSchedulesFileApplication
|
|
560
|
+
],
|
|
561
|
+
endpoint_id: str,
|
|
562
|
+
start_dt: datetime,
|
|
563
|
+
end_dt: datetime,
|
|
564
|
+
base_period: Optional[int],
|
|
565
|
+
application_name: str,
|
|
566
|
+
existing_data_handling: ExistingDataHandling,
|
|
567
|
+
) -> datetime:
|
|
568
|
+
"""Make sure that the (app, endpoint) pair doesn't write output before the last analyzed window"""
|
|
569
|
+
if application_schedules:
|
|
570
|
+
last_analyzed = application_schedules.get_endpoint_last_analyzed(
|
|
571
|
+
endpoint_id
|
|
572
|
+
)
|
|
573
|
+
if last_analyzed:
|
|
574
|
+
if start_dt < last_analyzed:
|
|
575
|
+
if existing_data_handling == ExistingDataHandling.skip_overlap:
|
|
576
|
+
if last_analyzed < end_dt and base_period is None:
|
|
577
|
+
logger.warn(
|
|
578
|
+
"Setting the start time to last_analyzed since the original start time precedes "
|
|
579
|
+
"last_analyzed",
|
|
580
|
+
original_start=start_dt,
|
|
581
|
+
new_start=last_analyzed,
|
|
582
|
+
application_name=application_name,
|
|
583
|
+
endpoint_id=endpoint_id,
|
|
584
|
+
)
|
|
585
|
+
start_dt = last_analyzed
|
|
586
|
+
else:
|
|
587
|
+
raise mlrun.errors.MLRunValueError(
|
|
588
|
+
"The start time for the application and endpoint precedes the last analyzed time: "
|
|
589
|
+
f"start_dt='{start_dt}', last_analyzed='{last_analyzed}', {application_name=}, "
|
|
590
|
+
f"{endpoint_id=}. "
|
|
591
|
+
"Writing data out of order is not supported, and the start time could not be "
|
|
592
|
+
"dynamically reset, as last_analyzed is later than the given end time or that "
|
|
593
|
+
f"base_period was specified (end_dt='{end_dt}', {base_period=})."
|
|
594
|
+
)
|
|
595
|
+
else:
|
|
596
|
+
raise mlrun.errors.MLRunValueError(
|
|
597
|
+
"The start time for the application and endpoint precedes the last analyzed time: "
|
|
598
|
+
f"start_dt='{start_dt}', last_analyzed='{last_analyzed}', {application_name=}, "
|
|
599
|
+
f"{endpoint_id=}. "
|
|
600
|
+
"Writing data out of order is not supported. You should change the start time to "
|
|
601
|
+
f"'{last_analyzed}' or later."
|
|
602
|
+
)
|
|
603
|
+
else:
|
|
604
|
+
logger.debug(
|
|
605
|
+
"The application is running on the endpoint for the first time",
|
|
606
|
+
endpoint_id=endpoint_id,
|
|
607
|
+
start_dt=start_dt,
|
|
608
|
+
application_name=application_name,
|
|
609
|
+
)
|
|
610
|
+
return start_dt
|
|
611
|
+
|
|
612
|
+
@staticmethod
|
|
613
|
+
def _delete_application_data(
|
|
614
|
+
project_name: str,
|
|
615
|
+
application_name: str,
|
|
616
|
+
endpoint_ids: list[str],
|
|
617
|
+
application_schedules: Optional[
|
|
618
|
+
mm_schedules.ModelMonitoringSchedulesFileApplication
|
|
619
|
+
],
|
|
620
|
+
) -> None:
|
|
621
|
+
mlrun.get_run_db().delete_model_monitoring_metrics(
|
|
622
|
+
project=project_name,
|
|
623
|
+
application_name=application_name,
|
|
624
|
+
endpoint_ids=endpoint_ids,
|
|
625
|
+
)
|
|
626
|
+
if application_schedules:
|
|
627
|
+
application_schedules.delete_endpoints_last_analyzed(
|
|
628
|
+
endpoint_uids=endpoint_ids
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
@classmethod
|
|
373
632
|
def _window_generator(
|
|
374
|
-
|
|
375
|
-
|
|
633
|
+
cls,
|
|
634
|
+
*,
|
|
635
|
+
start: Optional[str],
|
|
636
|
+
end: Optional[str],
|
|
637
|
+
base_period: Optional[int],
|
|
638
|
+
application_schedules: Optional[
|
|
639
|
+
mm_schedules.ModelMonitoringSchedulesFileApplication
|
|
640
|
+
],
|
|
641
|
+
endpoint_name: str,
|
|
642
|
+
endpoint_id: str,
|
|
643
|
+
application_name: str,
|
|
644
|
+
existing_data_handling: ExistingDataHandling,
|
|
645
|
+
context: "mlrun.MLClientCtx",
|
|
646
|
+
project: "mlrun.MlrunProject",
|
|
647
|
+
sample_data: Optional[pd.DataFrame],
|
|
648
|
+
) -> Iterator[mm_context.MonitoringApplicationContext]:
|
|
649
|
+
def yield_monitoring_ctx(
|
|
650
|
+
window_start: Optional[datetime], window_end: Optional[datetime]
|
|
651
|
+
) -> Iterator[mm_context.MonitoringApplicationContext]:
|
|
652
|
+
ctx = mm_context.MonitoringApplicationContext._from_ml_ctx(
|
|
653
|
+
event={
|
|
654
|
+
mm_constants.ApplicationEvent.ENDPOINT_NAME: endpoint_name,
|
|
655
|
+
mm_constants.ApplicationEvent.ENDPOINT_ID: endpoint_id,
|
|
656
|
+
mm_constants.ApplicationEvent.START_INFER_TIME: window_start,
|
|
657
|
+
mm_constants.ApplicationEvent.END_INFER_TIME: window_end,
|
|
658
|
+
},
|
|
659
|
+
application_name=application_name,
|
|
660
|
+
context=context,
|
|
661
|
+
project=project,
|
|
662
|
+
sample_df=sample_data,
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
if ctx.sample_df.empty:
|
|
666
|
+
# The current sample is empty
|
|
667
|
+
context.logger.debug(
|
|
668
|
+
"No sample data available for tracking",
|
|
669
|
+
application_name=application_name,
|
|
670
|
+
endpoint_id=ctx.endpoint_id,
|
|
671
|
+
start_time=ctx.start_infer_time,
|
|
672
|
+
end_time=ctx.end_infer_time,
|
|
673
|
+
)
|
|
674
|
+
return
|
|
675
|
+
|
|
676
|
+
yield ctx
|
|
677
|
+
|
|
678
|
+
if application_schedules and window_end:
|
|
679
|
+
application_schedules.update_endpoint_last_analyzed(
|
|
680
|
+
endpoint_uid=endpoint_id, last_analyzed=window_end
|
|
681
|
+
)
|
|
682
|
+
|
|
376
683
|
if start is None or end is None:
|
|
377
684
|
# A single window based on the `sample_data` input - see `_handler`.
|
|
378
|
-
yield None, None
|
|
685
|
+
yield from yield_monitoring_ctx(None, None)
|
|
379
686
|
return
|
|
380
687
|
|
|
381
688
|
start_dt = datetime.fromisoformat(start)
|
|
382
689
|
end_dt = datetime.fromisoformat(end)
|
|
383
690
|
|
|
691
|
+
# If `start_dt` and `end_dt` do not include time zone information - change them to UTC
|
|
692
|
+
if (start_dt.tzinfo is None) and (end_dt.tzinfo is None):
|
|
693
|
+
start_dt = start_dt.replace(tzinfo=timezone.utc)
|
|
694
|
+
end_dt = end_dt.replace(tzinfo=timezone.utc)
|
|
695
|
+
elif (start_dt.tzinfo is None) or (end_dt.tzinfo is None):
|
|
696
|
+
raise mlrun.errors.MLRunValueError(
|
|
697
|
+
"The start and end times must either both include time zone information or both be naive (no time "
|
|
698
|
+
f"zone). Asserting the above failed, aborting the evaluate request: start={start}, end={end}."
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
if existing_data_handling != ExistingDataHandling.delete_all:
|
|
702
|
+
start_dt = cls._validate_monotonically_increasing_data(
|
|
703
|
+
application_schedules=application_schedules,
|
|
704
|
+
endpoint_id=endpoint_id,
|
|
705
|
+
start_dt=start_dt,
|
|
706
|
+
end_dt=end_dt,
|
|
707
|
+
base_period=base_period,
|
|
708
|
+
application_name=application_name,
|
|
709
|
+
existing_data_handling=existing_data_handling,
|
|
710
|
+
)
|
|
711
|
+
|
|
384
712
|
if base_period is None:
|
|
385
|
-
yield start_dt, end_dt
|
|
713
|
+
yield from yield_monitoring_ctx(start_dt, end_dt)
|
|
386
714
|
return
|
|
387
715
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
)
|
|
716
|
+
window_length = cls._validate_and_get_window_length(
|
|
717
|
+
base_period=base_period, start_dt=start_dt, end_dt=end_dt
|
|
718
|
+
)
|
|
392
719
|
|
|
393
|
-
window_length = timedelta(minutes=base_period)
|
|
394
720
|
current_start_time = start_dt
|
|
395
721
|
while current_start_time < end_dt:
|
|
396
722
|
current_end_time = min(current_start_time + window_length, end_dt)
|
|
397
|
-
yield current_start_time, current_end_time
|
|
723
|
+
yield from yield_monitoring_ctx(current_start_time, current_end_time)
|
|
398
724
|
current_start_time = current_end_time
|
|
399
725
|
|
|
400
726
|
@classmethod
|
|
@@ -445,6 +771,45 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
445
771
|
"""
|
|
446
772
|
return f"{handler_to_class}::{cls._handler.__name__}"
|
|
447
773
|
|
|
774
|
+
@classmethod
|
|
775
|
+
def _determine_job_name(
|
|
776
|
+
cls,
|
|
777
|
+
*,
|
|
778
|
+
func_name: Optional[str],
|
|
779
|
+
class_handler: Optional[str],
|
|
780
|
+
handler_to_class: str,
|
|
781
|
+
) -> str:
|
|
782
|
+
"""
|
|
783
|
+
Determine the batch app's job name. This name is used also as the application name,
|
|
784
|
+
which is retrieved in `_get_application_name`.
|
|
785
|
+
"""
|
|
786
|
+
if func_name:
|
|
787
|
+
job_name = func_name
|
|
788
|
+
else:
|
|
789
|
+
if not class_handler:
|
|
790
|
+
class_name = cls.__name__
|
|
791
|
+
else:
|
|
792
|
+
class_name = handler_to_class.split(".")[-1].split("::")[0]
|
|
793
|
+
|
|
794
|
+
job_name = mlrun.utils.normalize_name(class_name)
|
|
795
|
+
|
|
796
|
+
if not mm_constants.APP_NAME_REGEX.fullmatch(job_name):
|
|
797
|
+
raise mlrun.errors.MLRunValueError(
|
|
798
|
+
"The function name does not comply with the required pattern "
|
|
799
|
+
f"`{mm_constants.APP_NAME_REGEX.pattern}`. "
|
|
800
|
+
"Please choose another `func_name`."
|
|
801
|
+
)
|
|
802
|
+
job_name, was_renamed, suffix = mlrun.utils.helpers.ensure_batch_job_suffix(
|
|
803
|
+
job_name
|
|
804
|
+
)
|
|
805
|
+
if was_renamed:
|
|
806
|
+
mlrun.utils.logger.info(
|
|
807
|
+
f'Changing function name - adding `"{suffix}"` suffix',
|
|
808
|
+
func_name=job_name,
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
return job_name
|
|
812
|
+
|
|
448
813
|
@classmethod
|
|
449
814
|
def to_job(
|
|
450
815
|
cls,
|
|
@@ -484,6 +849,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
484
849
|
* ``end``, ``datetime``
|
|
485
850
|
* ``base_period``, ``int``
|
|
486
851
|
* ``write_output``, ``bool``
|
|
852
|
+
* ``existing_data_handling``, ``str``
|
|
487
853
|
|
|
488
854
|
For Git sources, add the source archive to the returned job and change the handler:
|
|
489
855
|
|
|
@@ -502,7 +868,10 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
502
868
|
:py:class:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase`,
|
|
503
869
|
is used.
|
|
504
870
|
:param func_path: The path to the function. If ``None``, the current notebook is used.
|
|
505
|
-
:param func_name: The name of the function. If
|
|
871
|
+
:param func_name: The name of the function. If ``None``, the normalized class name is used
|
|
872
|
+
(:py:meth:`mlrun.utils.helpers.normalize_name`).
|
|
873
|
+
A ``"-batch"`` suffix is guaranteed to be added if not already there.
|
|
874
|
+
The function name is also used as the application name to use for the results.
|
|
506
875
|
:param tag: Tag for the function.
|
|
507
876
|
:param image: Docker image to run the job on (when running remotely).
|
|
508
877
|
:param with_repo: Whether to clone the current repo to the build source.
|
|
@@ -523,12 +892,11 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
523
892
|
handler_to_class = class_handler or cls.__name__
|
|
524
893
|
handler = cls.get_job_handler(handler_to_class)
|
|
525
894
|
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
job_name = func_name if func_name else class_name
|
|
895
|
+
job_name = cls._determine_job_name(
|
|
896
|
+
func_name=func_name,
|
|
897
|
+
class_handler=class_handler,
|
|
898
|
+
handler_to_class=handler_to_class,
|
|
899
|
+
)
|
|
532
900
|
|
|
533
901
|
job = cast(
|
|
534
902
|
mlrun.runtimes.KubejobRuntime,
|
|
@@ -567,6 +935,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
567
935
|
end: Optional[datetime] = None,
|
|
568
936
|
base_period: Optional[int] = None,
|
|
569
937
|
write_output: bool = False,
|
|
938
|
+
existing_data_handling: ExistingDataHandling = ExistingDataHandling.fail_on_overlap,
|
|
570
939
|
stream_profile: Optional[ds_profile.DatastoreProfile] = None,
|
|
571
940
|
) -> "mlrun.RunObject":
|
|
572
941
|
"""
|
|
@@ -574,11 +943,14 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
574
943
|
:py:meth:`~mlrun.model_monitoring.applications.ModelMonitoringApplicationBase.do_tracking`
|
|
575
944
|
model monitoring logic as a :py:class:`~mlrun.runtimes.KubejobRuntime`, which is an MLRun function.
|
|
576
945
|
|
|
577
|
-
This function has default values for all of its arguments. You should
|
|
946
|
+
This function has default values for all of its arguments. You should change them when you want to pass
|
|
578
947
|
data to the application.
|
|
579
948
|
|
|
580
949
|
:param func_path: The path to the function. If ``None``, the current notebook is used.
|
|
581
|
-
:param func_name: The name of the function. If
|
|
950
|
+
:param func_name: The name of the function. If ``None``, the normalized class name is used
|
|
951
|
+
(:py:meth:`mlrun.utils.helpers.normalize_name`).
|
|
952
|
+
A ``"-batch"`` suffix is guaranteed to be added if not already there.
|
|
953
|
+
The function name is also used as the application name to use for the results.
|
|
582
954
|
:param tag: Tag for the function.
|
|
583
955
|
:param run_local: Whether to run the function locally or remotely.
|
|
584
956
|
:param auto_build: Whether to auto build the function.
|
|
@@ -588,6 +960,7 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
588
960
|
:param reference_data: Pandas data-frame or :py:class:`~mlrun.artifacts.dataset.DatasetArtifact` URI as
|
|
589
961
|
the reference dataset.
|
|
590
962
|
When set, its statistics override the model endpoint's feature statistics.
|
|
963
|
+
You do not need to have a model endpoint to use this option.
|
|
591
964
|
:param image: Docker image to run the job on (when running remotely).
|
|
592
965
|
:param with_repo: Whether to clone the current repo to the build source.
|
|
593
966
|
:param class_handler: The relative path to the class, useful when using Git sources or code from images.
|
|
@@ -608,6 +981,9 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
608
981
|
:param start: The start time of the endpoint's data, not included.
|
|
609
982
|
If you want the model endpoint's data at ``start`` included, you need to subtract a
|
|
610
983
|
small ``datetime.timedelta`` from it.
|
|
984
|
+
Make sure to include the time zone when constructing ``datetime.datetime`` objects
|
|
985
|
+
manually. When both ``start`` and ``end`` times do not include a time zone, they will
|
|
986
|
+
be treated as UTC.
|
|
611
987
|
:param end: The end time of the endpoint's data, included.
|
|
612
988
|
Please note: when ``start`` and ``end`` are set, they create a left-open time interval
|
|
613
989
|
("window") :math:`(\\operatorname{start}, \\operatorname{end}]` that excludes the
|
|
@@ -616,17 +992,31 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
616
992
|
taken in the window's data.
|
|
617
993
|
:param base_period: The window length in minutes. If ``None``, the whole window from ``start`` to ``end``
|
|
618
994
|
is taken. If an integer is specified, the application is run from ``start`` to ``end``
|
|
619
|
-
in ``base_period`` length windows
|
|
620
|
-
therefore may be shorter:
|
|
995
|
+
in ``base_period`` length windows:
|
|
621
996
|
:math:`(\\operatorname{start}, \\operatorname{start} + \\operatorname{base\\_period}],
|
|
622
997
|
(\\operatorname{start} + \\operatorname{base\\_period},
|
|
623
998
|
\\operatorname{start} + 2\\cdot\\operatorname{base\\_period}],
|
|
624
999
|
..., (\\operatorname{start} +
|
|
625
|
-
m\\cdot\\operatorname{base\\_period}, \\operatorname{end}]`,
|
|
626
|
-
where :math:`m` is
|
|
1000
|
+
(m - 1)\\cdot\\operatorname{base\\_period}, \\operatorname{end}]`,
|
|
1001
|
+
where :math:`m` is a positive integer and :math:`\\operatorname{end} =
|
|
1002
|
+
\\operatorname{start} + m\\cdot\\operatorname{base\\_period}`.
|
|
1003
|
+
Please note that the difference between ``end`` and ``start`` must be a multiple of
|
|
1004
|
+
``base_period``.
|
|
627
1005
|
:param write_output: Whether to write the results and metrics to the time-series DB. Can be ``True`` only
|
|
628
1006
|
if ``endpoints`` are passed.
|
|
629
1007
|
Note: the model monitoring infrastructure must be up for the writing to work.
|
|
1008
|
+
:param existing_data_handling:
|
|
1009
|
+
How to handle the existing application data for the model endpoints when writing
|
|
1010
|
+
new data whose requested ``start`` time precedes the ``end`` time of a previous run
|
|
1011
|
+
that also wrote to the database. Relevant only when ``write_output=True``.
|
|
1012
|
+
The options are:
|
|
1013
|
+
|
|
1014
|
+
- ``"fail_on_overlap"``: Default. An error is raised.
|
|
1015
|
+
- ``"skip_overlap"``: the overlapping data is ignored and the
|
|
1016
|
+
time window is cut so that it starts at the earliest possible time after ``start``.
|
|
1017
|
+
- ``"delete_all"``: delete all the data that was written by the application to the
|
|
1018
|
+
model endpoints, regardless of the time window, and write the new data.
|
|
1019
|
+
|
|
630
1020
|
:param stream_profile: The stream datastore profile. It should be provided only when running locally and
|
|
631
1021
|
writing the outputs to the database (i.e., when both ``run_local`` and
|
|
632
1022
|
``write_output`` are set to ``True``).
|
|
@@ -665,17 +1055,6 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
665
1055
|
)
|
|
666
1056
|
params["end"] = end.isoformat() if isinstance(end, datetime) else end
|
|
667
1057
|
params["base_period"] = base_period
|
|
668
|
-
params["write_output"] = write_output
|
|
669
|
-
if stream_profile:
|
|
670
|
-
if not run_local:
|
|
671
|
-
raise mlrun.errors.MLRunValueError(
|
|
672
|
-
"Passing a `stream_profile` is relevant only when running locally"
|
|
673
|
-
)
|
|
674
|
-
if not write_output:
|
|
675
|
-
raise mlrun.errors.MLRunValueError(
|
|
676
|
-
"Passing a `stream_profile` is relevant only when writing the outputs"
|
|
677
|
-
)
|
|
678
|
-
params["stream_profile"] = stream_profile
|
|
679
1058
|
elif start or end or base_period:
|
|
680
1059
|
raise mlrun.errors.MLRunValueError(
|
|
681
1060
|
"Custom `start` and `end` times or base_period are supported only with endpoints data"
|
|
@@ -685,6 +1064,19 @@ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
|
|
|
685
1064
|
"Writing the application output or passing `stream_profile` are supported only with endpoints data"
|
|
686
1065
|
)
|
|
687
1066
|
|
|
1067
|
+
params["write_output"] = write_output
|
|
1068
|
+
params["existing_data_handling"] = existing_data_handling
|
|
1069
|
+
if stream_profile:
|
|
1070
|
+
if not run_local:
|
|
1071
|
+
raise mlrun.errors.MLRunValueError(
|
|
1072
|
+
"Passing a `stream_profile` is relevant only when running locally"
|
|
1073
|
+
)
|
|
1074
|
+
if not write_output:
|
|
1075
|
+
raise mlrun.errors.MLRunValueError(
|
|
1076
|
+
"Passing a `stream_profile` is relevant only when writing the outputs"
|
|
1077
|
+
)
|
|
1078
|
+
params["stream_profile"] = stream_profile
|
|
1079
|
+
|
|
688
1080
|
inputs: dict[str, str] = {}
|
|
689
1081
|
for data, identifier in [
|
|
690
1082
|
(sample_data, "sample_data"),
|