mlrun 1.10.0rc16__py3-none-any.whl → 1.10.1rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (101) hide show
  1. mlrun/__init__.py +22 -2
  2. mlrun/artifacts/document.py +6 -1
  3. mlrun/artifacts/llm_prompt.py +21 -15
  4. mlrun/artifacts/model.py +3 -3
  5. mlrun/common/constants.py +9 -0
  6. mlrun/common/formatters/artifact.py +1 -0
  7. mlrun/common/model_monitoring/helpers.py +86 -0
  8. mlrun/common/schemas/__init__.py +2 -0
  9. mlrun/common/schemas/auth.py +2 -0
  10. mlrun/common/schemas/function.py +10 -0
  11. mlrun/common/schemas/hub.py +30 -18
  12. mlrun/common/schemas/model_monitoring/__init__.py +2 -0
  13. mlrun/common/schemas/model_monitoring/constants.py +30 -6
  14. mlrun/common/schemas/model_monitoring/functions.py +13 -4
  15. mlrun/common/schemas/model_monitoring/model_endpoints.py +11 -0
  16. mlrun/common/schemas/pipeline.py +1 -1
  17. mlrun/common/schemas/serving.py +3 -0
  18. mlrun/common/schemas/workflow.py +1 -0
  19. mlrun/common/secrets.py +22 -1
  20. mlrun/config.py +34 -21
  21. mlrun/datastore/__init__.py +11 -3
  22. mlrun/datastore/azure_blob.py +162 -47
  23. mlrun/datastore/base.py +265 -7
  24. mlrun/datastore/datastore.py +10 -5
  25. mlrun/datastore/datastore_profile.py +61 -5
  26. mlrun/datastore/model_provider/huggingface_provider.py +367 -0
  27. mlrun/datastore/model_provider/mock_model_provider.py +87 -0
  28. mlrun/datastore/model_provider/model_provider.py +211 -74
  29. mlrun/datastore/model_provider/openai_provider.py +243 -71
  30. mlrun/datastore/s3.py +24 -2
  31. mlrun/datastore/store_resources.py +4 -4
  32. mlrun/datastore/storeytargets.py +2 -3
  33. mlrun/datastore/utils.py +15 -3
  34. mlrun/db/base.py +27 -19
  35. mlrun/db/httpdb.py +57 -48
  36. mlrun/db/nopdb.py +25 -10
  37. mlrun/execution.py +55 -13
  38. mlrun/hub/__init__.py +15 -0
  39. mlrun/hub/module.py +181 -0
  40. mlrun/k8s_utils.py +105 -16
  41. mlrun/launcher/base.py +13 -6
  42. mlrun/launcher/local.py +2 -0
  43. mlrun/model.py +9 -3
  44. mlrun/model_monitoring/api.py +66 -27
  45. mlrun/model_monitoring/applications/__init__.py +1 -1
  46. mlrun/model_monitoring/applications/base.py +388 -138
  47. mlrun/model_monitoring/applications/context.py +2 -4
  48. mlrun/model_monitoring/applications/results.py +4 -7
  49. mlrun/model_monitoring/controller.py +239 -101
  50. mlrun/model_monitoring/db/_schedules.py +36 -13
  51. mlrun/model_monitoring/db/_stats.py +4 -3
  52. mlrun/model_monitoring/db/tsdb/base.py +29 -9
  53. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +4 -5
  54. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +154 -50
  55. mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
  56. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
  57. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +245 -51
  58. mlrun/model_monitoring/helpers.py +28 -5
  59. mlrun/model_monitoring/stream_processing.py +45 -14
  60. mlrun/model_monitoring/writer.py +220 -1
  61. mlrun/platforms/__init__.py +3 -2
  62. mlrun/platforms/iguazio.py +7 -3
  63. mlrun/projects/operations.py +16 -11
  64. mlrun/projects/pipelines.py +2 -2
  65. mlrun/projects/project.py +157 -69
  66. mlrun/run.py +97 -20
  67. mlrun/runtimes/__init__.py +18 -0
  68. mlrun/runtimes/base.py +14 -6
  69. mlrun/runtimes/daskjob.py +1 -0
  70. mlrun/runtimes/local.py +5 -2
  71. mlrun/runtimes/mounts.py +20 -2
  72. mlrun/runtimes/nuclio/__init__.py +1 -0
  73. mlrun/runtimes/nuclio/application/application.py +147 -17
  74. mlrun/runtimes/nuclio/function.py +72 -27
  75. mlrun/runtimes/nuclio/serving.py +102 -20
  76. mlrun/runtimes/pod.py +213 -21
  77. mlrun/runtimes/utils.py +49 -9
  78. mlrun/secrets.py +54 -13
  79. mlrun/serving/remote.py +79 -6
  80. mlrun/serving/routers.py +23 -41
  81. mlrun/serving/server.py +230 -40
  82. mlrun/serving/states.py +605 -232
  83. mlrun/serving/steps.py +62 -0
  84. mlrun/serving/system_steps.py +136 -81
  85. mlrun/serving/v2_serving.py +9 -10
  86. mlrun/utils/helpers.py +215 -83
  87. mlrun/utils/logger.py +3 -1
  88. mlrun/utils/notifications/notification/base.py +18 -0
  89. mlrun/utils/notifications/notification/git.py +2 -4
  90. mlrun/utils/notifications/notification/mail.py +38 -15
  91. mlrun/utils/notifications/notification/slack.py +2 -4
  92. mlrun/utils/notifications/notification/webhook.py +2 -5
  93. mlrun/utils/notifications/notification_pusher.py +1 -1
  94. mlrun/utils/version/version.json +2 -2
  95. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/METADATA +51 -50
  96. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/RECORD +100 -95
  97. mlrun/api/schemas/__init__.py +0 -259
  98. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/WHEEL +0 -0
  99. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/entry_points.txt +0 -0
  100. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/licenses/LICENSE +0 -0
  101. {mlrun-1.10.0rc16.dist-info → mlrun-1.10.1rc4.dist-info}/top_level.txt +0 -0
@@ -13,9 +13,12 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import json
16
+ import typing
16
17
  from datetime import datetime, timezone
17
18
  from typing import Any, Callable, NewType, Optional
18
19
 
20
+ import storey
21
+
19
22
  import mlrun.common.model_monitoring
20
23
  import mlrun.common.schemas
21
24
  import mlrun.common.schemas.alert as alert_objects
@@ -31,6 +34,8 @@ from mlrun.common.schemas.model_monitoring.constants import (
31
34
  WriterEvent,
32
35
  WriterEventKind,
33
36
  )
37
+ from mlrun.config import config
38
+ from mlrun.model_monitoring.db import TSDBConnector
34
39
  from mlrun.model_monitoring.db._stats import (
35
40
  ModelMonitoringCurrentStatsFile,
36
41
  ModelMonitoringDriftMeasuresFile,
@@ -73,7 +78,6 @@ class ModelMonitoringWriter(StepToDict):
73
78
  self._tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
74
79
  project=self.project, secret_provider=secret_provider
75
80
  )
76
- self._endpoints_records = {}
77
81
 
78
82
  def _generate_event_on_drift(
79
83
  self,
@@ -226,3 +230,218 @@ class ModelMonitoringWriter(StepToDict):
226
230
  )
227
231
 
228
232
  logger.info("Model monitoring writer finished handling event")
233
+
234
+
235
+ class WriterGraphFactory:
236
+ def __init__(
237
+ self,
238
+ parquet_path: str,
239
+ ):
240
+ self.parquet_path = parquet_path
241
+ self.parquet_batching_max_events = (
242
+ config.model_endpoint_monitoring.writer_graph.max_events
243
+ )
244
+ self.parquet_batching_timeout_secs = (
245
+ config.model_endpoint_monitoring.writer_graph.parquet_batching_timeout_secs
246
+ )
247
+
248
+ def apply_writer_graph(
249
+ self,
250
+ fn: mlrun.runtimes.ServingRuntime,
251
+ tsdb_connector: TSDBConnector,
252
+ ):
253
+ graph = typing.cast(
254
+ mlrun.serving.states.RootFlowStep,
255
+ fn.set_topology(mlrun.serving.states.StepKinds.flow, engine="async"),
256
+ )
257
+
258
+ graph.to("ReconstructWriterEvent", "event_reconstructor")
259
+ step = tsdb_connector.add_pre_writer_steps(
260
+ graph=graph, after="event_reconstructor"
261
+ )
262
+ before_choice = step.name if step else "event_reconstructor"
263
+ graph.add_step("KindChoice", "kind_choice_step", after=before_choice)
264
+ tsdb_connector.apply_writer_steps(
265
+ graph=graph,
266
+ after="kind_choice_step",
267
+ )
268
+ graph.add_step(
269
+ "AlertGenerator",
270
+ "alert_generator",
271
+ after="kind_choice_step",
272
+ project=fn.metadata.project,
273
+ )
274
+ graph.add_step(
275
+ "storey.Filter",
276
+ name="filter_none",
277
+ _fn="(event is not None)",
278
+ after="alert_generator",
279
+ )
280
+ graph.add_step(
281
+ "mlrun.serving.remote.MLRunAPIRemoteStep",
282
+ name="alert_generator_api_call",
283
+ after="filter_none",
284
+ method="POST",
285
+ path=f"projects/{fn.metadata.project}/events/{{kind}}",
286
+ fill_placeholders=True,
287
+ )
288
+
289
+ graph.add_step(
290
+ "mlrun.datastore.storeytargets.ParquetStoreyTarget",
291
+ alternative_v3io_access_key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ACCESS_KEY,
292
+ name="stats_writer",
293
+ after="kind_choice_step",
294
+ graph_shape="cylinder",
295
+ path=self.parquet_path
296
+ if self.parquet_path.endswith("/")
297
+ else self.parquet_path + "/",
298
+ max_events=self.parquet_batching_max_events,
299
+ flush_after_seconds=self.parquet_batching_timeout_secs,
300
+ columns=[
301
+ StatsData.TIMESTAMP,
302
+ StatsData.STATS,
303
+ WriterEvent.ENDPOINT_ID,
304
+ StatsData.STATS_NAME,
305
+ ],
306
+ partition_cols=[WriterEvent.ENDPOINT_ID, StatsData.STATS_NAME],
307
+ single_file=True,
308
+ )
309
+
310
+
311
+ class ReconstructWriterEvent(storey.MapClass):
312
+ def __init__(self):
313
+ super().__init__()
314
+
315
+ def do(self, event: dict) -> dict[str, Any]:
316
+ logger.info("Reconstructing the event", event=event)
317
+ kind = event.pop(WriterEvent.EVENT_KIND, WriterEventKind.RESULT)
318
+ result_event = _AppResultEvent(json.loads(event.pop(WriterEvent.DATA, "{}")))
319
+ result_event.update(_AppResultEvent(event))
320
+
321
+ expected_keys = list(
322
+ set(WriterEvent.list()).difference(
323
+ [WriterEvent.EVENT_KIND, WriterEvent.DATA]
324
+ )
325
+ )
326
+ if kind == WriterEventKind.METRIC:
327
+ expected_keys.extend(MetricData.list())
328
+ elif kind == WriterEventKind.RESULT:
329
+ expected_keys.extend(ResultData.list())
330
+ elif kind == WriterEventKind.STATS:
331
+ expected_keys.extend(StatsData.list())
332
+ else:
333
+ raise _WriterEventValueError(
334
+ f"Unknown event kind: {kind}, expected one of: {WriterEventKind.list()}"
335
+ )
336
+ missing_keys = [key for key in expected_keys if key not in result_event]
337
+ if missing_keys:
338
+ raise _WriterEventValueError(
339
+ f"The received event misses some keys compared to the expected "
340
+ f"monitoring application event schema: {missing_keys} for event kind {kind}"
341
+ )
342
+ result_event["kind"] = kind
343
+ if kind in WriterEventKind.user_app_outputs():
344
+ result_event[WriterEvent.END_INFER_TIME] = datetime.fromisoformat(
345
+ event[WriterEvent.END_INFER_TIME]
346
+ )
347
+ if kind == WriterEventKind.STATS:
348
+ result_event[StatsData.STATS] = json.dumps(result_event[StatsData.STATS])
349
+ return result_event
350
+
351
+
352
+ class KindChoice(storey.Choice):
353
+ def select_outlets(self, event):
354
+ kind = event.get("kind")
355
+ logger.info("Selecting the outlet for the event", kind=kind)
356
+ if kind == WriterEventKind.METRIC:
357
+ outlets = ["tsdb_metrics"]
358
+ elif kind == WriterEventKind.RESULT:
359
+ outlets = ["tsdb_app_results", "alert_generator"]
360
+ elif kind == WriterEventKind.STATS:
361
+ outlets = ["stats_writer"]
362
+ else:
363
+ raise _WriterEventValueError(
364
+ f"Unknown event kind: {kind}, expected one of: {WriterEventKind.list()}"
365
+ )
366
+ return outlets
367
+
368
+
369
+ class AlertGenerator(storey.MapClass):
370
+ def __init__(self, project: str, **kwargs):
371
+ self.project = project
372
+ super().__init__(**kwargs)
373
+
374
+ def do(self, event: dict) -> Optional[dict[str, Any]]:
375
+ kind = event.pop(WriterEvent.EVENT_KIND, WriterEventKind.RESULT)
376
+ if (
377
+ mlrun.mlconf.alerts.mode == mlrun.common.schemas.alert.AlertsModes.enabled
378
+ and kind == WriterEventKind.RESULT
379
+ and (
380
+ event[ResultData.RESULT_STATUS] == ResultStatusApp.detected.value
381
+ or event[ResultData.RESULT_STATUS]
382
+ == ResultStatusApp.potential_detection.value
383
+ )
384
+ ):
385
+ event_value = {
386
+ "app_name": event[WriterEvent.APPLICATION_NAME],
387
+ "model": event[WriterEvent.ENDPOINT_NAME],
388
+ "model_endpoint_id": event[WriterEvent.ENDPOINT_ID],
389
+ "result_name": event[ResultData.RESULT_NAME],
390
+ "result_value": event[ResultData.RESULT_VALUE],
391
+ }
392
+ data = self._generate_event_data(
393
+ entity_id=get_result_instance_fqn(
394
+ event[WriterEvent.ENDPOINT_ID],
395
+ event[WriterEvent.APPLICATION_NAME],
396
+ event[ResultData.RESULT_NAME],
397
+ ),
398
+ result_status=event[ResultData.RESULT_STATUS],
399
+ event_value=event_value,
400
+ project_name=self.project,
401
+ result_kind=event[ResultData.RESULT_KIND],
402
+ )
403
+ event = data.dict()
404
+ logger.info("Generated alert event", event=event)
405
+ return event
406
+ return None
407
+
408
+ @staticmethod
409
+ def _generate_alert_event_kind(
410
+ result_kind: int, result_status: int
411
+ ) -> alert_objects.EventKind:
412
+ """Generate the required Event Kind format for the alerting system"""
413
+ event_kind = ResultKindApp(value=result_kind).name
414
+
415
+ if result_status == ResultStatusApp.detected.value:
416
+ event_kind = f"{event_kind}_detected"
417
+ else:
418
+ event_kind = f"{event_kind}_suspected"
419
+ return alert_objects.EventKind(
420
+ value=mlrun.utils.helpers.normalize_name(event_kind)
421
+ )
422
+
423
+ def _generate_event_data(
424
+ self,
425
+ entity_id: str,
426
+ result_status: int,
427
+ event_value: dict,
428
+ project_name: str,
429
+ result_kind: int,
430
+ ) -> mlrun.common.schemas.Event:
431
+ entity = mlrun.common.schemas.alert.EventEntities(
432
+ kind=alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT,
433
+ project=project_name,
434
+ ids=[entity_id],
435
+ )
436
+
437
+ event_kind = self._generate_alert_event_kind(
438
+ result_status=result_status, result_kind=result_kind
439
+ )
440
+
441
+ event_data = mlrun.common.schemas.Event(
442
+ kind=alert_objects.EventKind(value=event_kind),
443
+ entity=entity,
444
+ value_dict=event_value,
445
+ )
446
+
447
+ return event_data
@@ -25,6 +25,7 @@ from .iguazio import (
25
25
  )
26
26
 
27
27
 
28
+ # TODO: Remove in 1.11.0
28
29
  class _DeprecationHelper:
29
30
  """A helper class to deprecate old schemas"""
30
31
 
@@ -48,12 +49,12 @@ class _DeprecationHelper:
48
49
  def _warn(self):
49
50
  warnings.warn(
50
51
  f"mlrun.platforms.{self._new_target} is deprecated since version {self._version}, "
51
- f"and will be removed in 1.10. Use mlrun.runtimes.mounts.{self._new_target} instead.",
52
+ f"and will be removed in 1.11.0. Use mlrun.runtimes.mounts.{self._new_target} instead.",
52
53
  FutureWarning,
53
54
  )
54
55
 
55
56
 
56
- # TODO: Remove in 1.10
57
+ # TODO: Remove in 1.11.0
57
58
  # For backwards compatibility
58
59
  VolumeMount = _DeprecationHelper("VolumeMount")
59
60
  auto_mount = _DeprecationHelper("auto_mount")
@@ -96,7 +96,11 @@ class OutputStream:
96
96
  if access_key:
97
97
  v3io_client_kwargs["access_key"] = access_key
98
98
 
99
- self._v3io_client = v3io.dataplane.Client(**v3io_client_kwargs)
99
+ if not mock:
100
+ self._v3io_client = v3io.dataplane.Client(**v3io_client_kwargs)
101
+ else:
102
+ self._v3io_client = None
103
+
100
104
  self._container, self._stream_path = split_path(stream_path)
101
105
  self._shards = shards
102
106
  self._retention_in_hours = retention_in_hours
@@ -105,7 +109,7 @@ class OutputStream:
105
109
  self._mock = mock
106
110
  self._mock_queue = []
107
111
 
108
- def create_stream(self):
112
+ def create_stream(self) -> None:
109
113
  # this import creates an import loop via the utils module, so putting it in execution path
110
114
  from mlrun.utils.helpers import logger
111
115
 
@@ -210,7 +214,7 @@ class KafkaOutputStream:
210
214
  self._initialized = False
211
215
 
212
216
  def _lazy_init(self):
213
- if self._initialized:
217
+ if self._initialized or self._mock:
214
218
  return
215
219
 
216
220
  import kafka
@@ -85,17 +85,17 @@ def run_function(
85
85
  ) -> Union[mlrun.model.RunObject, mlrun_pipelines.models.PipelineNodeWrapper]:
86
86
  """Run a local or remote task as part of a local/kubeflow pipeline
87
87
 
88
- run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
89
- function can be specified as an object or by name (str), when the function is specified by name it is looked up
90
- in the current project eliminating the need to redefine/edit functions.
88
+ run_function() allows you to execute a function locally, on a remote cluster, or as part of an automated workflow.
89
+ The function can be specified as an object or by name (str). When the function is specified by name it is looked up
90
+ in the current project, eliminating the need to redefine/edit functions.
91
91
 
92
- when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
92
+ When functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
93
93
  e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
94
- project runs provide additional notifications/reporting and exception handling.
95
- inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
96
- some behavior may differ between regular runs and deferred KFP runs.
94
+ Project runs provide additional notifications/reporting and exception handling.
95
+ Inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG.
96
+ Some behavior may differ between regular runs and deferred KFP runs.
97
97
 
98
- example (use with function object)::
98
+ Example (use with function object)::
99
99
 
100
100
  LABELS = "is_error"
101
101
  MODEL_CLASS = "sklearn.ensemble.RandomForestClassifier"
@@ -107,7 +107,7 @@ def run_function(
107
107
  inputs={"dataset": DATA_PATH},
108
108
  )
109
109
 
110
- example (use with project)::
110
+ Example (use with project)::
111
111
 
112
112
  # create a project with two functions (local and from hub)
113
113
  project = mlrun.new_project(project_name, "./proj)
@@ -119,7 +119,7 @@ def run_function(
119
119
  run2 = run_function("train", params={"label_columns": LABELS, "model_class": MODEL_CLASS},
120
120
  inputs={"dataset": run1.outputs["data"]})
121
121
 
122
- example (use in pipeline)::
122
+ Example (use in pipeline)::
123
123
 
124
124
  @dsl.pipeline(name="test pipeline", description="test")
125
125
  def my_pipe(url=""):
@@ -177,7 +177,12 @@ def run_function(
177
177
  This ensures latest code changes are executed. This argument must be used in
178
178
  conjunction with the local=True argument.
179
179
  :param output_path: path to store artifacts, when running in a workflow this will be set automatically
180
- :param retry: Retry configuration for the run, can be a dict or an instance of mlrun.model.Retry.
180
+ :param retry: Retry configuration for the run, can be a dict or an instance of
181
+ :py:class:`~mlrun.model.Retry`.
182
+ The `count` field in the `Retry` object specifies the number of retry attempts.
183
+ If `count=0`, the run will not be retried.
184
+ The `backoff` field specifies the retry backoff strategy between retry attempts.
185
+ If not provided, the default backoff delay is 30 seconds.
181
186
  :return: MLRun RunObject or PipelineNodeWrapper
182
187
  """
183
188
  if artifact_path:
@@ -228,11 +228,11 @@ class _PipelineContext:
228
228
  force_run_local = mlrun.mlconf.force_run_local
229
229
  if force_run_local is None or force_run_local == "auto":
230
230
  force_run_local = not mlrun.mlconf.is_api_running_on_k8s()
231
+
232
+ if self.workflow:
231
233
  if not mlrun.mlconf.kfp_url:
232
234
  logger.debug("Kubeflow pipeline URL is not set, running locally")
233
235
  force_run_local = True
234
-
235
- if self.workflow:
236
236
  force_run_local = force_run_local or self.workflow.run_local
237
237
 
238
238
  return force_run_local