mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (59) hide show
  1. mlrun/alerts/alert.py +30 -27
  2. mlrun/common/constants.py +3 -0
  3. mlrun/common/helpers.py +0 -1
  4. mlrun/common/schemas/alert.py +3 -0
  5. mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
  6. mlrun/common/schemas/notification.py +1 -0
  7. mlrun/config.py +1 -1
  8. mlrun/data_types/to_pandas.py +9 -9
  9. mlrun/datastore/alibaba_oss.py +3 -2
  10. mlrun/datastore/azure_blob.py +7 -9
  11. mlrun/datastore/base.py +13 -1
  12. mlrun/datastore/dbfs_store.py +3 -7
  13. mlrun/datastore/filestore.py +1 -3
  14. mlrun/datastore/google_cloud_storage.py +84 -29
  15. mlrun/datastore/redis.py +1 -0
  16. mlrun/datastore/s3.py +3 -2
  17. mlrun/datastore/sources.py +54 -0
  18. mlrun/datastore/storeytargets.py +147 -0
  19. mlrun/datastore/targets.py +76 -122
  20. mlrun/datastore/v3io.py +1 -0
  21. mlrun/db/httpdb.py +6 -1
  22. mlrun/errors.py +8 -0
  23. mlrun/execution.py +7 -0
  24. mlrun/feature_store/api.py +5 -0
  25. mlrun/feature_store/retrieval/job.py +1 -0
  26. mlrun/model.py +24 -3
  27. mlrun/model_monitoring/api.py +10 -2
  28. mlrun/model_monitoring/applications/_application_steps.py +52 -34
  29. mlrun/model_monitoring/applications/context.py +206 -70
  30. mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
  31. mlrun/model_monitoring/controller.py +15 -12
  32. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
  33. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +19 -9
  34. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
  35. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +46 -10
  36. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +38 -24
  37. mlrun/model_monitoring/helpers.py +54 -18
  38. mlrun/model_monitoring/stream_processing.py +10 -29
  39. mlrun/projects/pipelines.py +19 -30
  40. mlrun/projects/project.py +86 -67
  41. mlrun/run.py +8 -6
  42. mlrun/runtimes/__init__.py +4 -0
  43. mlrun/runtimes/nuclio/api_gateway.py +18 -0
  44. mlrun/runtimes/nuclio/application/application.py +150 -59
  45. mlrun/runtimes/nuclio/function.py +5 -11
  46. mlrun/runtimes/nuclio/serving.py +2 -2
  47. mlrun/runtimes/utils.py +16 -0
  48. mlrun/serving/routers.py +1 -1
  49. mlrun/serving/server.py +19 -5
  50. mlrun/serving/states.py +8 -0
  51. mlrun/serving/v2_serving.py +34 -26
  52. mlrun/utils/helpers.py +33 -2
  53. mlrun/utils/version/version.json +2 -2
  54. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/METADATA +9 -12
  55. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/RECORD +59 -58
  56. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/WHEEL +1 -1
  57. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/LICENSE +0 -0
  58. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/entry_points.txt +0 -0
  59. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/top_level.txt +0 -0
@@ -19,11 +19,11 @@ import numpy as np
19
19
  import pandas as pd
20
20
 
21
21
  import mlrun
22
+ import mlrun.artifacts
22
23
  import mlrun.common.model_monitoring.helpers
23
- import mlrun.common.schemas
24
- from mlrun.common.schemas.model_monitoring import (
25
- EventFieldType,
26
- )
24
+ import mlrun.common.schemas.model_monitoring.constants as mm_constants
25
+ import mlrun.data_types.infer
26
+ import mlrun.model_monitoring
27
27
  from mlrun.common.schemas.model_monitoring.model_endpoints import (
28
28
  ModelEndpointMonitoringMetric,
29
29
  ModelEndpointMonitoringMetricType,
@@ -35,7 +35,6 @@ from mlrun.utils import logger
35
35
  if typing.TYPE_CHECKING:
36
36
  from mlrun.db.base import RunDBInterface
37
37
  from mlrun.projects import MlrunProject
38
- import mlrun.common.schemas.model_monitoring.constants as mm_constants
39
38
 
40
39
 
41
40
  class _BatchDict(typing.TypedDict):
@@ -45,26 +44,29 @@ class _BatchDict(typing.TypedDict):
45
44
 
46
45
 
47
46
  def get_stream_path(
48
- project: str, function_name: str = mm_constants.MonitoringFunctionNames.STREAM
47
+ project: str,
48
+ function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
49
+ stream_uri: typing.Optional[str] = None,
49
50
  ) -> str:
50
51
  """
51
52
  Get stream path from the project secret. If wasn't set, take it from the system configurations
52
53
 
53
54
  :param project: Project name.
54
- :param function_name: Application name. Default is model_monitoring_stream.
55
+ :param function_name: Application name. Default is model_monitoring_stream.
56
+ :param stream_uri: Stream URI. If provided, it will be used instead of the one from the project secret.
55
57
 
56
58
  :return: Monitoring stream path to the relevant application.
57
59
  """
58
60
 
59
- stream_uri = mlrun.get_secret_or_env(
60
- mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
61
+ stream_uri = stream_uri or mlrun.get_secret_or_env(
62
+ mm_constants.ProjectSecretKeys.STREAM_PATH
61
63
  )
62
64
 
63
65
  if not stream_uri or stream_uri == "v3io":
64
66
  # TODO : remove the first part of this condition in 1.9.0
65
67
  stream_uri = mlrun.mlconf.get_model_monitoring_file_target_path(
66
68
  project=project,
67
- kind=mlrun.common.schemas.model_monitoring.FileTargetKind.STREAM,
69
+ kind=mm_constants.FileTargetKind.STREAM,
68
70
  target="online",
69
71
  function_name=function_name,
70
72
  )
@@ -78,7 +80,7 @@ def get_stream_path(
78
80
 
79
81
  def get_monitoring_parquet_path(
80
82
  project: "MlrunProject",
81
- kind: str = mlrun.common.schemas.model_monitoring.FileTargetKind.PARQUET,
83
+ kind: str = mm_constants.FileTargetKind.PARQUET,
82
84
  ) -> str:
83
85
  """Get model monitoring parquet target for the current project and kind. The parquet target path is based on the
84
86
  project artifact path. If project artifact path is not defined, the parquet target path will be based on MLRun
@@ -111,7 +113,7 @@ def get_connection_string(secret_provider: typing.Callable[[str], str] = None) -
111
113
  """
112
114
 
113
115
  return mlrun.get_secret_or_env(
114
- key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
116
+ key=mm_constants.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
115
117
  secret_provider=secret_provider,
116
118
  )
117
119
 
@@ -126,7 +128,7 @@ def get_tsdb_connection_string(
126
128
  """
127
129
 
128
130
  return mlrun.get_secret_or_env(
129
- key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION,
131
+ key=mm_constants.ProjectSecretKeys.TSDB_CONNECTION,
130
132
  secret_provider=secret_provider,
131
133
  )
132
134
 
@@ -200,7 +202,7 @@ def update_model_endpoint_last_request(
200
202
  db.patch_model_endpoint(
201
203
  project=project,
202
204
  endpoint_id=model_endpoint.metadata.uid,
203
- attributes={EventFieldType.LAST_REQUEST: current_request},
205
+ attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
204
206
  )
205
207
  else:
206
208
  try:
@@ -229,7 +231,7 @@ def update_model_endpoint_last_request(
229
231
  db.patch_model_endpoint(
230
232
  project=project,
231
233
  endpoint_id=model_endpoint.metadata.uid,
232
- attributes={EventFieldType.LAST_REQUEST: bumped_last_request},
234
+ attributes={mm_constants.EventFieldType.LAST_REQUEST: bumped_last_request},
233
235
  )
234
236
 
235
237
 
@@ -249,12 +251,11 @@ def calculate_inputs_statistics(
249
251
 
250
252
  # Use `DFDataInfer` to calculate the statistics over the inputs:
251
253
  inputs_statistics = mlrun.data_types.infer.DFDataInfer.get_stats(
252
- df=inputs,
253
- options=mlrun.data_types.infer.InferOptions.Histogram,
254
+ df=inputs, options=mlrun.data_types.infer.InferOptions.Histogram
254
255
  )
255
256
 
256
257
  # Recalculate the histograms over the bins that are set in the sample-set of the end point:
257
- for feature in inputs_statistics.keys():
258
+ for feature in list(inputs_statistics):
258
259
  if feature in sample_set_statistics:
259
260
  counts, bins = np.histogram(
260
261
  inputs[feature].to_numpy(),
@@ -271,6 +272,9 @@ def calculate_inputs_statistics(
271
272
  inputs_statistics[feature]["hist"]
272
273
  )
273
274
  )
275
+ else:
276
+ # If the feature is not in the sample set and doesn't have a histogram, remove it from the statistics:
277
+ inputs_statistics.pop(feature)
274
278
 
275
279
  return inputs_statistics
276
280
 
@@ -323,3 +327,35 @@ def get_invocations_metric(project: str) -> ModelEndpointMonitoringMetric:
323
327
  name=mm_constants.PredictionsQueryConstants.INVOCATIONS,
324
328
  full_name=get_invocations_fqn(project),
325
329
  )
330
+
331
+
332
+ def enrich_model_endpoint_with_model_uri(
333
+ model_endpoint: ModelEndpoint,
334
+ model_obj: mlrun.artifacts.ModelArtifact,
335
+ ):
336
+ """
337
+ Enrich the model endpoint object with the model uri from the model object. We will use a unique reference
338
+ to the model object that includes the project, db_key, iter, and tree.
339
+ In addition, we verify that the model object is of type `ModelArtifact`.
340
+
341
+ :param model_endpoint: An object representing the model endpoint that will be enriched with the model uri.
342
+ :param model_obj: An object representing the model artifact.
343
+
344
+ :raise: `MLRunInvalidArgumentError` if the model object is not of type `ModelArtifact`.
345
+ """
346
+ mlrun.utils.helpers.verify_field_of_type(
347
+ field_name="model_endpoint.spec.model_uri",
348
+ field_value=model_obj,
349
+ expected_type=mlrun.artifacts.ModelArtifact,
350
+ )
351
+
352
+ # Update model_uri with a unique reference to handle future changes
353
+ model_artifact_uri = mlrun.utils.helpers.generate_artifact_uri(
354
+ project=model_endpoint.metadata.project,
355
+ key=model_obj.db_key,
356
+ iter=model_obj.iter,
357
+ tree=model_obj.tree,
358
+ )
359
+ model_endpoint.spec.model_uri = mlrun.datastore.get_store_uri(
360
+ kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
361
+ )
@@ -37,6 +37,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
37
37
  ModelEndpointTarget,
38
38
  ProjectSecretKeys,
39
39
  )
40
+ from mlrun.model_monitoring.db import StoreBase, TSDBConnector
40
41
  from mlrun.utils import logger
41
42
 
42
43
 
@@ -48,14 +49,12 @@ class EventStreamProcessor:
48
49
  parquet_batching_max_events: int,
49
50
  parquet_batching_timeout_secs: int,
50
51
  parquet_target: str,
51
- sample_window: int = 10,
52
52
  aggregate_windows: typing.Optional[list[str]] = None,
53
- aggregate_period: str = "30s",
53
+ aggregate_period: str = "5m",
54
54
  model_monitoring_access_key: str = None,
55
55
  ):
56
56
  # General configurations, mainly used for the storey steps in the future serving graph
57
57
  self.project = project
58
- self.sample_window = sample_window
59
58
  self.aggregate_windows = aggregate_windows or ["5m", "1h"]
60
59
  self.aggregate_period = aggregate_period
61
60
 
@@ -133,7 +132,8 @@ class EventStreamProcessor:
133
132
  def apply_monitoring_serving_graph(
134
133
  self,
135
134
  fn: mlrun.runtimes.ServingRuntime,
136
- secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
135
+ tsdb_connector: TSDBConnector,
136
+ endpoint_store: StoreBase,
137
137
  ) -> None:
138
138
  """
139
139
  Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
@@ -161,8 +161,8 @@ class EventStreamProcessor:
161
161
  using CE, the parquet target path is based on the defined MLRun artifact path.
162
162
 
163
163
  :param fn: A serving function.
164
- :param secret_provider: An optional callable function that provides the connection string from the project
165
- secret.
164
+ :param tsdb_connector: Time series database connector.
165
+ :param endpoint_store: KV/SQL store used for endpoint data.
166
166
  """
167
167
 
168
168
  graph = typing.cast(
@@ -190,10 +190,6 @@ class EventStreamProcessor:
190
190
  _fn="(event.get('error') is not None)",
191
191
  )
192
192
 
193
- tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
194
- project=self.project, secret_provider=secret_provider
195
- )
196
-
197
193
  tsdb_connector.handle_model_error(
198
194
  graph,
199
195
  )
@@ -202,7 +198,7 @@ class EventStreamProcessor:
202
198
  def apply_process_endpoint_event():
203
199
  graph.add_step(
204
200
  "ProcessEndpointEvent",
205
- after="FilterError",
201
+ after="extract_endpoint", # TODO: change this to FilterError in ML-7456
206
202
  full_event=True,
207
203
  project=self.project,
208
204
  )
@@ -306,24 +302,9 @@ class EventStreamProcessor:
306
302
  table=self.kv_path,
307
303
  )
308
304
 
309
- store_object = mlrun.model_monitoring.get_store_object(
310
- project=self.project, secret_provider=secret_provider
311
- )
312
- if store_object.type == ModelEndpointTarget.V3IO_NOSQL:
305
+ if endpoint_store.type == ModelEndpointTarget.V3IO_NOSQL:
313
306
  apply_infer_schema()
314
307
 
315
- # Emits the event in window size of events based on sample_window size (10 by default)
316
- def apply_storey_sample_window():
317
- graph.add_step(
318
- "storey.steps.SampleWindow",
319
- name="sample",
320
- after="Rename",
321
- window_size=self.sample_window,
322
- key=EventFieldType.ENDPOINT_ID,
323
- )
324
-
325
- apply_storey_sample_window()
326
-
327
308
  tsdb_connector.apply_monitoring_stream_steps(graph=graph)
328
309
 
329
310
  # Parquet branch
@@ -353,6 +334,7 @@ class EventStreamProcessor:
353
334
  index_cols=[EventFieldType.ENDPOINT_ID],
354
335
  key_bucketing_number=0,
355
336
  time_partitioning_granularity="hour",
337
+ time_field=EventFieldType.TIMESTAMP,
356
338
  partition_cols=["$key", "$year", "$month", "$day", "$hour"],
357
339
  )
358
340
 
@@ -527,9 +509,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
527
509
  # If error key has been found in the current event,
528
510
  # increase the error counter by 1 and raise the error description
529
511
  error = event.get("error")
530
- if error:
512
+ if error: # TODO: delete this in ML-7456
531
513
  self.error_count[endpoint_id] += 1
532
- # TODO: write to tsdb / kv once in a while
533
514
  raise mlrun.errors.MLRunInvalidArgumentError(str(error))
534
515
 
535
516
  # Validate event fields
@@ -447,7 +447,6 @@ class _PipelineRunner(abc.ABC):
447
447
  namespace=None,
448
448
  source=None,
449
449
  notifications: list[mlrun.model.Notification] = None,
450
- send_start_notification: bool = True,
451
450
  ) -> _PipelineRunStatus:
452
451
  pass
453
452
 
@@ -567,7 +566,6 @@ class _KFPRunner(_PipelineRunner):
567
566
  namespace=None,
568
567
  source=None,
569
568
  notifications: list[mlrun.model.Notification] = None,
570
- send_start_notification: bool = True,
571
569
  ) -> _PipelineRunStatus:
572
570
  pipeline_context.set(project, workflow_spec)
573
571
  workflow_handler = _PipelineRunner._get_handler(
@@ -585,7 +583,8 @@ class _KFPRunner(_PipelineRunner):
585
583
  "Notifications will only be sent if you wait for pipeline completion. "
586
584
  "To use the new notification behavior, use the remote pipeline runner."
587
585
  )
588
- for notification in notifications:
586
+ # for start message, fallback to old notification behavior
587
+ for notification in notifications or []:
589
588
  project.notifiers.add_notification(
590
589
  notification.kind, notification.params
591
590
  )
@@ -616,13 +615,12 @@ class _KFPRunner(_PipelineRunner):
616
615
  func_name=func.metadata.name,
617
616
  exc_info=err_to_str(exc),
618
617
  )
619
- if send_start_notification:
620
- project.notifiers.push_pipeline_start_message(
621
- project.metadata.name,
622
- project.get_param("commit_id", None),
623
- run_id,
624
- True,
625
- )
618
+ project.notifiers.push_pipeline_start_message(
619
+ project.metadata.name,
620
+ project.get_param("commit_id", None),
621
+ run_id,
622
+ True,
623
+ )
626
624
  pipeline_context.clear()
627
625
  return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
628
626
 
@@ -670,7 +668,6 @@ class _LocalRunner(_PipelineRunner):
670
668
  namespace=None,
671
669
  source=None,
672
670
  notifications: list[mlrun.model.Notification] = None,
673
- send_start_notification: bool = True,
674
671
  ) -> _PipelineRunStatus:
675
672
  pipeline_context.set(project, workflow_spec)
676
673
  workflow_handler = _PipelineRunner._get_handler(
@@ -692,10 +689,9 @@ class _LocalRunner(_PipelineRunner):
692
689
  project.set_source(source=source)
693
690
  pipeline_context.workflow_artifact_path = artifact_path
694
691
 
695
- if send_start_notification:
696
- project.notifiers.push_pipeline_start_message(
697
- project.metadata.name, pipeline_id=workflow_id
698
- )
692
+ project.notifiers.push_pipeline_start_message(
693
+ project.metadata.name, pipeline_id=workflow_id
694
+ )
699
695
  err = None
700
696
  try:
701
697
  workflow_handler(**workflow_spec.args)
@@ -755,22 +751,10 @@ class _RemoteRunner(_PipelineRunner):
755
751
  namespace: str = None,
756
752
  source: str = None,
757
753
  notifications: list[mlrun.model.Notification] = None,
758
- send_start_notification: bool = True,
759
754
  ) -> typing.Optional[_PipelineRunStatus]:
760
755
  workflow_name = normalize_workflow_name(name=name, project_name=project.name)
761
756
  workflow_id = None
762
757
 
763
- # for start message, fallback to old notification behavior
764
- if send_start_notification:
765
- for notification in notifications or []:
766
- project.notifiers.add_notification(
767
- notification.kind, notification.params
768
- )
769
- # if a notification with `when=running` is provided, it will be used explicitly and others
770
- # will be ignored
771
- if "running" in notification.when:
772
- break
773
-
774
758
  # The returned engine for this runner is the engine of the workflow.
775
759
  # In this way wait_for_completion/get_run_status would be executed by the correct pipeline runner.
776
760
  inner_engine = get_workflow_engine(workflow_spec.engine)
@@ -870,9 +854,6 @@ class _RemoteRunner(_PipelineRunner):
870
854
  state = mlrun_pipelines.common.models.RunStatuses.failed
871
855
  else:
872
856
  state = mlrun_pipelines.common.models.RunStatuses.running
873
- project.notifiers.push_pipeline_start_message(
874
- project.metadata.name,
875
- )
876
857
  pipeline_context.clear()
877
858
  return _PipelineRunStatus(
878
859
  run_id=workflow_id,
@@ -1078,6 +1059,13 @@ def load_and_run(
1078
1059
  if load_only:
1079
1060
  return
1080
1061
 
1062
+ # extract "start" notification if exists
1063
+ start_notifications = [
1064
+ notification
1065
+ for notification in context.get_notifications()
1066
+ if "running" in notification.when
1067
+ ]
1068
+
1081
1069
  workflow_log_message = workflow_name or workflow_path
1082
1070
  context.logger.info(f"Running workflow {workflow_log_message} from remote")
1083
1071
  run = project.run(
@@ -1093,6 +1081,7 @@ def load_and_run(
1093
1081
  cleanup_ttl=cleanup_ttl,
1094
1082
  engine=engine,
1095
1083
  local=local,
1084
+ notifications=start_notifications,
1096
1085
  )
1097
1086
  context.log_result(key="workflow_id", value=run.run_id)
1098
1087
  context.log_result(key="engine", value=run._engine.engine, commit=True)