mlrun 1.7.0rc22__py3-none-any.whl → 1.7.0rc28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (81) hide show
  1. mlrun/__main__.py +10 -8
  2. mlrun/alerts/alert.py +13 -1
  3. mlrun/artifacts/manager.py +5 -0
  4. mlrun/common/constants.py +2 -2
  5. mlrun/common/formatters/__init__.py +1 -0
  6. mlrun/common/formatters/artifact.py +26 -3
  7. mlrun/common/formatters/base.py +9 -9
  8. mlrun/common/formatters/run.py +26 -0
  9. mlrun/common/helpers.py +11 -0
  10. mlrun/common/schemas/__init__.py +4 -0
  11. mlrun/common/schemas/alert.py +5 -9
  12. mlrun/common/schemas/api_gateway.py +64 -16
  13. mlrun/common/schemas/artifact.py +11 -0
  14. mlrun/common/schemas/constants.py +3 -0
  15. mlrun/common/schemas/feature_store.py +58 -28
  16. mlrun/common/schemas/model_monitoring/constants.py +21 -12
  17. mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
  18. mlrun/common/schemas/pipeline.py +16 -0
  19. mlrun/common/schemas/project.py +17 -0
  20. mlrun/common/schemas/runs.py +17 -0
  21. mlrun/common/schemas/schedule.py +1 -1
  22. mlrun/common/types.py +5 -0
  23. mlrun/config.py +10 -25
  24. mlrun/datastore/azure_blob.py +2 -1
  25. mlrun/datastore/datastore.py +3 -3
  26. mlrun/datastore/google_cloud_storage.py +6 -2
  27. mlrun/datastore/snowflake_utils.py +3 -1
  28. mlrun/datastore/sources.py +26 -11
  29. mlrun/datastore/store_resources.py +2 -0
  30. mlrun/datastore/targets.py +68 -16
  31. mlrun/db/base.py +64 -2
  32. mlrun/db/httpdb.py +129 -41
  33. mlrun/db/nopdb.py +44 -3
  34. mlrun/errors.py +5 -3
  35. mlrun/execution.py +18 -10
  36. mlrun/feature_store/retrieval/spark_merger.py +2 -1
  37. mlrun/frameworks/__init__.py +0 -6
  38. mlrun/model.py +23 -0
  39. mlrun/model_monitoring/api.py +6 -52
  40. mlrun/model_monitoring/applications/histogram_data_drift.py +1 -1
  41. mlrun/model_monitoring/db/stores/__init__.py +37 -24
  42. mlrun/model_monitoring/db/stores/base/store.py +40 -1
  43. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +42 -87
  44. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +27 -35
  45. mlrun/model_monitoring/db/tsdb/__init__.py +15 -15
  46. mlrun/model_monitoring/db/tsdb/base.py +1 -1
  47. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +6 -4
  48. mlrun/model_monitoring/helpers.py +17 -9
  49. mlrun/model_monitoring/stream_processing.py +9 -11
  50. mlrun/model_monitoring/writer.py +11 -11
  51. mlrun/package/__init__.py +1 -13
  52. mlrun/package/packagers/__init__.py +1 -6
  53. mlrun/projects/pipelines.py +10 -9
  54. mlrun/projects/project.py +95 -81
  55. mlrun/render.py +10 -5
  56. mlrun/run.py +13 -8
  57. mlrun/runtimes/base.py +11 -4
  58. mlrun/runtimes/daskjob.py +7 -1
  59. mlrun/runtimes/local.py +16 -3
  60. mlrun/runtimes/nuclio/application/application.py +0 -2
  61. mlrun/runtimes/nuclio/function.py +20 -0
  62. mlrun/runtimes/nuclio/serving.py +9 -6
  63. mlrun/runtimes/pod.py +5 -29
  64. mlrun/serving/routers.py +75 -59
  65. mlrun/serving/server.py +11 -0
  66. mlrun/serving/states.py +29 -0
  67. mlrun/serving/v2_serving.py +62 -39
  68. mlrun/utils/helpers.py +39 -1
  69. mlrun/utils/logger.py +36 -2
  70. mlrun/utils/notifications/notification/base.py +43 -7
  71. mlrun/utils/notifications/notification/git.py +21 -0
  72. mlrun/utils/notifications/notification/slack.py +9 -14
  73. mlrun/utils/notifications/notification/webhook.py +41 -1
  74. mlrun/utils/notifications/notification_pusher.py +3 -9
  75. mlrun/utils/version/version.json +2 -2
  76. {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc28.dist-info}/METADATA +12 -7
  77. {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc28.dist-info}/RECORD +81 -80
  78. {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc28.dist-info}/WHEEL +1 -1
  79. {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc28.dist-info}/LICENSE +0 -0
  80. {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc28.dist-info}/entry_points.txt +0 -0
  81. {mlrun-1.7.0rc22.dist-info → mlrun-1.7.0rc28.dist-info}/top_level.txt +0 -0
mlrun/model.py CHANGED
@@ -732,6 +732,25 @@ class Notification(ModelObj):
732
732
  "Notification params size exceeds max size of 1 MB"
733
733
  )
734
734
 
735
+ def validate_notification_params(self):
736
+ notification_class = mlrun.utils.notifications.NotificationTypes(
737
+ self.kind
738
+ ).get_notification()
739
+
740
+ secret_params = self.secret_params
741
+ params = self.params
742
+
743
+ if not secret_params and not params:
744
+ raise mlrun.errors.MLRunInvalidArgumentError(
745
+ "Both 'secret_params' and 'params' are empty, at least one must be defined."
746
+ )
747
+ if secret_params and params and secret_params != params:
748
+ raise mlrun.errors.MLRunInvalidArgumentError(
749
+ "Both 'secret_params' and 'params' are defined but they contain different values"
750
+ )
751
+
752
+ notification_class.validate_params(secret_params or params)
753
+
735
754
  @staticmethod
736
755
  def validate_notification_uniqueness(notifications: list["Notification"]):
737
756
  """Validate that all notifications in the list are unique by name"""
@@ -873,6 +892,7 @@ class RunSpec(ModelObj):
873
892
  notifications=None,
874
893
  state_thresholds=None,
875
894
  reset_on_run=None,
895
+ node_selector=None,
876
896
  ):
877
897
  # A dictionary of parsing configurations that will be read from the inputs the user set. The keys are the inputs
878
898
  # keys (parameter names) and the values are the type hint given in the input keys after the colon.
@@ -910,6 +930,7 @@ class RunSpec(ModelObj):
910
930
  self._notifications = notifications or []
911
931
  self.state_thresholds = state_thresholds or {}
912
932
  self.reset_on_run = reset_on_run
933
+ self.node_selector = node_selector or {}
913
934
 
914
935
  def _serialize_field(
915
936
  self, struct: dict, field_name: str = None, strip: bool = False
@@ -1989,6 +2010,7 @@ class DataTarget(DataTargetBase):
1989
2010
  "name",
1990
2011
  "kind",
1991
2012
  "path",
2013
+ "attributes",
1992
2014
  "start_time",
1993
2015
  "online",
1994
2016
  "status",
@@ -2020,6 +2042,7 @@ class DataTarget(DataTargetBase):
2020
2042
  self.last_written = None
2021
2043
  self._producer = None
2022
2044
  self.producer = {}
2045
+ self.attributes = {}
2023
2046
 
2024
2047
  @property
2025
2048
  def producer(self) -> FeatureSetProducer:
@@ -47,8 +47,8 @@ def get_or_create_model_endpoint(
47
47
  function_name: str = "",
48
48
  context: mlrun.MLClientCtx = None,
49
49
  sample_set_statistics: dict[str, typing.Any] = None,
50
- drift_threshold: float = None,
51
- possible_drift_threshold: float = None,
50
+ drift_threshold: typing.Optional[float] = None,
51
+ possible_drift_threshold: typing.Optional[float] = None,
52
52
  monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
53
53
  db_session=None,
54
54
  ) -> ModelEndpoint:
@@ -69,14 +69,14 @@ def get_or_create_model_endpoint(
69
69
  full function hash.
70
70
  :param sample_set_statistics: Dictionary of sample set statistics that will be used as a reference data for
71
71
  the new model endpoint (applicable only to new endpoint_id).
72
- :param drift_threshold: The threshold of which to mark drifts (applicable only to new endpoint_id).
73
- :param possible_drift_threshold: The threshold of which to mark possible drifts (applicable only to new
72
+ :param drift_threshold: (deprecated) The threshold of which to mark drifts (applicable only to new
73
+ endpoint_id).
74
+ :param possible_drift_threshold: (deprecated) The threshold of which to mark possible drifts (applicable only to new
74
75
  endpoint_id).
75
76
  :param monitoring_mode: If enabled, apply model monitoring features on the provided endpoint id
76
77
  (applicable only to new endpoint_id).
77
78
  :param db_session: A runtime session that manages the current dialog with the database.
78
79
 
79
-
80
80
  :return: A ModelEndpoint object
81
81
  """
82
82
 
@@ -98,8 +98,6 @@ def get_or_create_model_endpoint(
98
98
  model_endpoint=model_endpoint,
99
99
  model_path=model_path,
100
100
  sample_set_statistics=sample_set_statistics,
101
- drift_threshold=drift_threshold,
102
- possible_drift_threshold=possible_drift_threshold,
103
101
  )
104
102
 
105
103
  except mlrun.errors.MLRunNotFoundError:
@@ -113,8 +111,6 @@ def get_or_create_model_endpoint(
113
111
  function_name=function_name,
114
112
  context=context,
115
113
  sample_set_statistics=sample_set_statistics,
116
- drift_threshold=drift_threshold,
117
- possible_drift_threshold=possible_drift_threshold,
118
114
  monitoring_mode=monitoring_mode,
119
115
  )
120
116
  return model_endpoint
@@ -241,9 +237,7 @@ def _model_endpoint_validations(
241
237
  model_endpoint: ModelEndpoint,
242
238
  model_path: str = "",
243
239
  sample_set_statistics: dict[str, typing.Any] = None,
244
- drift_threshold: float = None,
245
- possible_drift_threshold: float = None,
246
- ):
240
+ ) -> None:
247
241
  """
248
242
  Validate that provided model endpoint configurations match the stored fields of the provided `ModelEndpoint`
249
243
  object. Usually, this method is called by `get_or_create_model_endpoint()` in cases that the model endpoint
@@ -257,11 +251,6 @@ def _model_endpoint_validations(
257
251
  is forbidden to provide a different reference data to that model endpoint.
258
252
  In case of discrepancy between the provided `sample_set_statistics` and the
259
253
  `model_endpoints.spec.feature_stats`, a warning will be presented to the user.
260
- :param drift_threshold: The threshold of which to mark drifts. Should be similar to the drift threshold
261
- that has already assigned to the current model endpoint.
262
- :param possible_drift_threshold: The threshold of which to mark possible drifts. Should be similar to the possible
263
- drift threshold that has already assigned to the current model endpoint.
264
-
265
254
  """
266
255
  # Model path
267
256
  if model_path and model_endpoint.spec.model_uri != model_path:
@@ -280,28 +269,6 @@ def _model_endpoint_validations(
280
269
  "Provided sample set statistics is different from the registered statistics. "
281
270
  "If new sample set statistics is to be used, new model endpoint should be created"
282
271
  )
283
- # drift and possible drift thresholds
284
- if drift_threshold:
285
- current_drift_threshold = model_endpoint.spec.monitor_configuration.get(
286
- mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD,
287
- mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected,
288
- )
289
- if current_drift_threshold != drift_threshold:
290
- raise mlrun.errors.MLRunInvalidArgumentError(
291
- f"Cannot change existing drift threshold. Expected {current_drift_threshold}, got {drift_threshold} "
292
- f"Please update drift threshold or generate a new model endpoint record"
293
- )
294
-
295
- if possible_drift_threshold:
296
- current_possible_drift_threshold = model_endpoint.spec.monitor_configuration.get(
297
- mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD,
298
- mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift,
299
- )
300
- if current_possible_drift_threshold != possible_drift_threshold:
301
- raise mlrun.errors.MLRunInvalidArgumentError(
302
- f"Cannot change existing possible drift threshold. Expected {current_possible_drift_threshold}, "
303
- f"got {possible_drift_threshold}. Please update drift threshold or generate a new model endpoint record"
304
- )
305
272
 
306
273
 
307
274
  def write_monitoring_df(
@@ -354,8 +321,6 @@ def _generate_model_endpoint(
354
321
  function_name: str,
355
322
  context: mlrun.MLClientCtx,
356
323
  sample_set_statistics: dict[str, typing.Any],
357
- drift_threshold: float,
358
- possible_drift_threshold: float,
359
324
  monitoring_mode: mm_constants.ModelMonitoringMode = mm_constants.ModelMonitoringMode.disabled,
360
325
  ) -> ModelEndpoint:
361
326
  """
@@ -374,8 +339,6 @@ def _generate_model_endpoint(
374
339
  :param sample_set_statistics: Dictionary of sample set statistics that will be used as a reference data for
375
340
  the current model endpoint. Will be stored under
376
341
  `model_endpoint.status.feature_stats`.
377
- :param drift_threshold: The threshold of which to mark drifts.
378
- :param possible_drift_threshold: The threshold of which to mark possible drifts.
379
342
 
380
343
  :return `mlrun.model_monitoring.model_endpoint.ModelEndpoint` object.
381
344
  """
@@ -393,15 +356,6 @@ def _generate_model_endpoint(
393
356
  model_endpoint.spec.model_uri = model_path
394
357
  model_endpoint.spec.model = model_endpoint_name
395
358
  model_endpoint.spec.model_class = "drift-analysis"
396
- if drift_threshold:
397
- model_endpoint.spec.monitor_configuration[
398
- mm_constants.EventFieldType.DRIFT_DETECTED_THRESHOLD
399
- ] = drift_threshold
400
- if possible_drift_threshold:
401
- model_endpoint.spec.monitor_configuration[
402
- mm_constants.EventFieldType.POSSIBLE_DRIFT_THRESHOLD
403
- ] = possible_drift_threshold
404
-
405
359
  model_endpoint.spec.monitoring_mode = monitoring_mode
406
360
  model_endpoint.status.first_request = model_endpoint.status.last_request = (
407
361
  datetime_now().isoformat()
@@ -193,7 +193,7 @@ class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
193
193
  status=status,
194
194
  extra_data={
195
195
  EventFieldType.CURRENT_STATS: json.dumps(
196
- monitoring_context.feature_stats
196
+ monitoring_context.sample_df_stats
197
197
  ),
198
198
  EventFieldType.DRIFT_MEASURES: metrics_per_feature.T.to_json(),
199
199
  EventFieldType.DRIFT_STATUS: status.value,
@@ -31,17 +31,12 @@ class ObjectStoreFactory(enum.Enum):
31
31
  def to_object_store(
32
32
  self,
33
33
  project: str,
34
- access_key: str = None,
35
- secret_provider: typing.Callable = None,
34
+ **kwargs,
36
35
  ) -> StoreBase:
37
36
  """
38
37
  Return a StoreBase object based on the provided enum value.
39
38
 
40
39
  :param project: The name of the project.
41
- :param access_key: Access key with permission to the DB table. Note that if access key is None
42
- and the endpoint target is from type KV then the access key will be
43
- retrieved from the environment variable.
44
- :param secret_provider: An optional secret provider to get the connection string secret.
45
40
 
46
41
  :return: `StoreBase` object.
47
42
 
@@ -50,10 +45,7 @@ class ObjectStoreFactory(enum.Enum):
50
45
  if self == self.v3io_nosql:
51
46
  from mlrun.model_monitoring.db.stores.v3io_kv.kv_store import KVStoreBase
52
47
 
53
- # Get V3IO access key from env
54
- access_key = access_key or mlrun.mlconf.get_v3io_access_key()
55
-
56
- return KVStoreBase(project=project, access_key=access_key)
48
+ return KVStoreBase(project=project)
57
49
 
58
50
  # Assuming SQL store target if store type is not KV.
59
51
  # Update these lines once there are more than two store target types.
@@ -62,7 +54,7 @@ class ObjectStoreFactory(enum.Enum):
62
54
 
63
55
  return SQLStoreBase(
64
56
  project=project,
65
- secret_provider=secret_provider,
57
+ **kwargs,
66
58
  )
67
59
 
68
60
  @classmethod
@@ -71,7 +63,7 @@ class ObjectStoreFactory(enum.Enum):
71
63
  :param value: Provided enum (invalid) value.
72
64
  """
73
65
  valid_values = list(cls.__members__.keys())
74
- raise mlrun.errors.MLRunInvalidArgumentError(
66
+ raise mlrun.errors.MLRunInvalidMMStoreType(
75
67
  f"{value} is not a valid endpoint store, please choose a valid value: %{valid_values}."
76
68
  )
77
69
 
@@ -79,7 +71,7 @@ class ObjectStoreFactory(enum.Enum):
79
71
  def get_model_endpoint_store(
80
72
  project: str,
81
73
  access_key: str = None,
82
- secret_provider: typing.Callable = None,
74
+ secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
83
75
  ) -> StoreBase:
84
76
  # Leaving here for backwards compatibility
85
77
  warnings.warn(
@@ -95,24 +87,45 @@ def get_model_endpoint_store(
95
87
 
96
88
  def get_store_object(
97
89
  project: str,
98
- access_key: str = None,
99
- secret_provider: typing.Callable = None,
90
+ secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
91
+ store_connection_string: typing.Optional[str] = None,
92
+ **kwargs,
100
93
  ) -> StoreBase:
101
94
  """
102
- Getting the DB target type based on mlrun.config.model_endpoint_monitoring.store_type.
95
+ Generate a store object. If a connection string is provided, the store type will be updated according to the
96
+ connection string. Currently, the supported store types are SQL and v3io-nosql.
103
97
 
104
- :param project: The name of the project.
105
- :param access_key: Access key with permission to the DB table.
106
- :param secret_provider: An optional secret provider to get the connection string secret.
98
+ :param project: The name of the project.
99
+ :param secret_provider: An optional secret provider to get the connection string secret.
100
+ :param store_connection_string: Optional explicit connection string of the store.
107
101
 
108
- :return: `StoreBase` object. Using this object, the user can apply different operations on the
109
- model monitoring record such as write, update, get and delete a model endpoint.
102
+ :return: `StoreBase` object. Using this object, the user can apply different operations such as write, update, get
103
+ and delete a model endpoint record.
110
104
  """
111
105
 
106
+ store_connection_string = (
107
+ store_connection_string
108
+ or mlrun.model_monitoring.helpers.get_connection_string(
109
+ secret_provider=secret_provider
110
+ )
111
+ )
112
+
113
+ if store_connection_string and (
114
+ store_connection_string.startswith("mysql")
115
+ or store_connection_string.startswith("sqlite")
116
+ ):
117
+ store_type = mlrun.common.schemas.model_monitoring.ModelEndpointTarget.SQL
118
+ kwargs["store_connection_string"] = store_connection_string
119
+ elif store_connection_string and store_connection_string == "v3io":
120
+ store_type = (
121
+ mlrun.common.schemas.model_monitoring.ModelEndpointTarget.V3IO_NOSQL
122
+ )
123
+ else:
124
+ store_type = None
112
125
  # Get store type value from ObjectStoreFactory enum class
113
- store_type = ObjectStoreFactory(mlrun.mlconf.model_endpoint_monitoring.store_type)
126
+ store_type_fact = ObjectStoreFactory(store_type)
114
127
 
115
128
  # Convert into store target object
116
- return store_type.to_object_store(
117
- project=project, access_key=access_key, secret_provider=secret_provider
129
+ return store_type_fact.to_object_store(
130
+ project=project, secret_provider=secret_provider, **kwargs
118
131
  )
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import json
15
15
  import typing
16
16
  from abc import ABC, abstractmethod
17
17
 
@@ -19,6 +19,7 @@ import mlrun.common.schemas.model_monitoring as mm_schemas
19
19
 
20
20
 
21
21
  class StoreBase(ABC):
22
+ type: typing.ClassVar[str]
22
23
  """
23
24
  An abstract class to handle the store object in the DB target.
24
25
  """
@@ -169,3 +170,41 @@ class StoreBase(ABC):
169
170
 
170
171
  :return: A list of the available metrics.
171
172
  """
173
+
174
+ @staticmethod
175
+ def _validate_labels(
176
+ endpoint_dict: dict,
177
+ labels: list,
178
+ ) -> bool:
179
+ """Validate that the model endpoint dictionary has the provided labels. There are 2 possible cases:
180
+ 1 - Labels were provided as a list of key-values pairs (e.g. ['label_1=value_1', 'label_2=value_2']): Validate
181
+ that each pair exist in the endpoint dictionary.
182
+ 2 - Labels were provided as a list of key labels (e.g. ['label_1', 'label_2']): Validate that each key exist in
183
+ the endpoint labels dictionary.
184
+
185
+ :param endpoint_dict: Dictionary of the model endpoint records.
186
+ :param labels: List of dictionary of required labels.
187
+
188
+ :return: True if the labels exist in the endpoint labels dictionary, otherwise False.
189
+ """
190
+
191
+ # Convert endpoint labels into dictionary
192
+ endpoint_labels = json.loads(
193
+ endpoint_dict.get(mm_schemas.EventFieldType.LABELS)
194
+ )
195
+
196
+ for label in labels:
197
+ # Case 1 - label is a key=value pair
198
+ if "=" in label:
199
+ lbl, value = list(map(lambda x: x.strip(), label.split("=")))
200
+ if lbl not in endpoint_labels or str(endpoint_labels[lbl]) != value:
201
+ return False
202
+ # Case 2 - label is just a key
203
+ else:
204
+ if label not in endpoint_labels:
205
+ return False
206
+
207
+ return True
208
+
209
+ def create_tables(self):
210
+ pass