mlrun 1.7.0rc43__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (68) hide show
  1. mlrun/__main__.py +4 -2
  2. mlrun/artifacts/manager.py +3 -1
  3. mlrun/common/formatters/__init__.py +1 -0
  4. mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
  5. mlrun/common/formatters/run.py +3 -0
  6. mlrun/common/schemas/__init__.py +1 -0
  7. mlrun/common/schemas/alert.py +11 -11
  8. mlrun/common/schemas/auth.py +5 -0
  9. mlrun/common/schemas/client_spec.py +0 -1
  10. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  11. mlrun/common/schemas/model_monitoring/constants.py +23 -9
  12. mlrun/common/schemas/model_monitoring/model_endpoints.py +24 -47
  13. mlrun/common/schemas/notification.py +12 -2
  14. mlrun/common/schemas/workflow.py +10 -2
  15. mlrun/config.py +28 -21
  16. mlrun/data_types/data_types.py +6 -1
  17. mlrun/datastore/base.py +4 -4
  18. mlrun/datastore/s3.py +12 -9
  19. mlrun/datastore/storeytargets.py +9 -6
  20. mlrun/db/base.py +3 -0
  21. mlrun/db/httpdb.py +28 -16
  22. mlrun/db/nopdb.py +24 -4
  23. mlrun/errors.py +7 -1
  24. mlrun/execution.py +40 -7
  25. mlrun/feature_store/api.py +1 -0
  26. mlrun/feature_store/retrieval/spark_merger.py +7 -7
  27. mlrun/frameworks/_common/plan.py +3 -3
  28. mlrun/frameworks/_ml_common/plan.py +1 -1
  29. mlrun/frameworks/parallel_coordinates.py +2 -3
  30. mlrun/launcher/client.py +6 -6
  31. mlrun/model.py +29 -0
  32. mlrun/model_monitoring/api.py +1 -12
  33. mlrun/model_monitoring/applications/__init__.py +1 -2
  34. mlrun/model_monitoring/applications/_application_steps.py +5 -1
  35. mlrun/model_monitoring/applications/base.py +2 -182
  36. mlrun/model_monitoring/applications/context.py +2 -9
  37. mlrun/model_monitoring/applications/evidently_base.py +0 -74
  38. mlrun/model_monitoring/applications/histogram_data_drift.py +2 -2
  39. mlrun/model_monitoring/applications/results.py +4 -4
  40. mlrun/model_monitoring/controller.py +46 -209
  41. mlrun/model_monitoring/db/stores/base/store.py +1 -0
  42. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +15 -1
  43. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +12 -0
  44. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +17 -16
  45. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +49 -39
  46. mlrun/model_monitoring/helpers.py +13 -15
  47. mlrun/model_monitoring/writer.py +3 -1
  48. mlrun/projects/operations.py +11 -8
  49. mlrun/projects/pipelines.py +35 -16
  50. mlrun/projects/project.py +52 -24
  51. mlrun/render.py +3 -3
  52. mlrun/runtimes/daskjob.py +1 -1
  53. mlrun/runtimes/kubejob.py +6 -6
  54. mlrun/runtimes/nuclio/api_gateway.py +12 -0
  55. mlrun/runtimes/nuclio/application/application.py +3 -3
  56. mlrun/runtimes/nuclio/function.py +41 -0
  57. mlrun/runtimes/nuclio/serving.py +2 -2
  58. mlrun/runtimes/pod.py +19 -13
  59. mlrun/serving/server.py +2 -0
  60. mlrun/utils/helpers.py +62 -16
  61. mlrun/utils/version/version.json +2 -2
  62. {mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +126 -44
  63. {mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +67 -68
  64. {mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
  65. mlrun/model_monitoring/evidently_application.py +0 -20
  66. {mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
  67. {mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
  68. {mlrun-1.7.0rc43.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0
@@ -13,19 +13,14 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from abc import ABC, abstractmethod
16
- from typing import Any, Union, cast
16
+ from typing import Any, Union
17
17
 
18
- import numpy as np
19
- import pandas as pd
20
- from deprecated import deprecated
21
-
22
- import mlrun
23
18
  import mlrun.model_monitoring.applications.context as mm_context
24
19
  import mlrun.model_monitoring.applications.results as mm_results
25
20
  from mlrun.serving.utils import MonitoringApplicationToDict
26
21
 
27
22
 
28
- class ModelMonitoringApplicationBaseV2(MonitoringApplicationToDict, ABC):
23
+ class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
29
24
  """
30
25
  A base class for a model monitoring application.
31
26
  Inherit from this class to create a custom model monitoring application.
@@ -111,178 +106,3 @@ class ModelMonitoringApplicationBaseV2(MonitoringApplicationToDict, ABC):
111
106
  each metric name is the key and the metric value is the corresponding value).
112
107
  """
113
108
  raise NotImplementedError
114
-
115
-
116
- # TODO: Remove in 1.9.0
117
- @deprecated(
118
- version="1.7.0",
119
- reason="The `ModelMonitoringApplicationBase` class is deprecated from "
120
- "version 1.7.0 and will be removed in version 1.9.0. "
121
- "Use `ModelMonitoringApplicationBaseV2` as your application's base class.",
122
- )
123
- class ModelMonitoringApplicationBase(MonitoringApplicationToDict, ABC):
124
- """
125
- A base class for a model monitoring application.
126
- Inherit from this class to create a custom model monitoring application.
127
-
128
- example for very simple custom application::
129
-
130
- class MyApp(ApplicationBase):
131
- def do_tracking(
132
- self,
133
- sample_df_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
134
- feature_stats: mlrun.common.model_monitoring.helpers.FeatureStats,
135
- start_infer_time: pd.Timestamp,
136
- end_infer_time: pd.Timestamp,
137
- schedule_time: pd.Timestamp,
138
- latest_request: pd.Timestamp,
139
- endpoint_id: str,
140
- output_stream_uri: str,
141
- ) -> ModelMonitoringApplicationResult:
142
- self.context.log_artifact(
143
- TableArtifact(
144
- "sample_df_stats", df=self.dict_to_histogram(sample_df_stats)
145
- )
146
- )
147
- return ModelMonitoringApplicationResult(
148
- name="data_drift_test",
149
- value=0.5,
150
- kind=mm_constant.ResultKindApp.data_drift,
151
- status=mm_constant.ResultStatusApp.detected,
152
- )
153
-
154
-
155
- """
156
-
157
- kind = "monitoring_application"
158
-
159
- def do(
160
- self, monitoring_context: mm_context.MonitoringApplicationContext
161
- ) -> tuple[
162
- list[mm_results.ModelMonitoringApplicationResult],
163
- mm_context.MonitoringApplicationContext,
164
- ]:
165
- """
166
- Process the monitoring event and return application results.
167
-
168
- :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
169
- :returns: A tuple of:
170
- [0] = list of application results that can be either from type
171
- `ModelMonitoringApplicationResult` or from type
172
- `ModelMonitoringApplicationResult`.
173
- [1] = the original application event, wrapped in `MonitoringApplicationContext`
174
- object
175
- """
176
- resolved_event = self._resolve_event(monitoring_context)
177
- if not (
178
- hasattr(self, "context") and isinstance(self.context, mlrun.MLClientCtx)
179
- ):
180
- self._lazy_init(monitoring_context)
181
- results = self.do_tracking(*resolved_event)
182
- results = results if isinstance(results, list) else [results]
183
- return results, monitoring_context
184
-
185
- def _lazy_init(self, monitoring_context: mm_context.MonitoringApplicationContext):
186
- self.context = cast(mlrun.MLClientCtx, monitoring_context)
187
-
188
- @abstractmethod
189
- def do_tracking(
190
- self,
191
- application_name: str,
192
- sample_df_stats: pd.DataFrame,
193
- feature_stats: pd.DataFrame,
194
- sample_df: pd.DataFrame,
195
- start_infer_time: pd.Timestamp,
196
- end_infer_time: pd.Timestamp,
197
- latest_request: pd.Timestamp,
198
- endpoint_id: str,
199
- output_stream_uri: str,
200
- ) -> Union[
201
- mm_results.ModelMonitoringApplicationResult,
202
- list[mm_results.ModelMonitoringApplicationResult],
203
- ]:
204
- """
205
- Implement this method with your custom monitoring logic.
206
-
207
- :param application_name: (str) the app name
208
- :param sample_df_stats: (pd.DataFrame) The new sample distribution.
209
- :param feature_stats: (pd.DataFrame) The train sample distribution.
210
- :param sample_df: (pd.DataFrame) The new sample DataFrame.
211
- :param start_infer_time: (pd.Timestamp) Start time of the monitoring schedule.
212
- :param end_infer_time: (pd.Timestamp) End time of the monitoring schedule.
213
- :param latest_request: (pd.Timestamp) Timestamp of the latest request on this endpoint_id.
214
- :param endpoint_id: (str) ID of the monitored model endpoint
215
- :param output_stream_uri: (str) URI of the output stream for results
216
-
217
- :returns: (ModelMonitoringApplicationResult) or
218
- (list[ModelMonitoringApplicationResult]) of the application results.
219
- """
220
- raise NotImplementedError
221
-
222
- @classmethod
223
- def _resolve_event(
224
- cls,
225
- monitoring_context: mm_context.MonitoringApplicationContext,
226
- ) -> tuple[
227
- str,
228
- pd.DataFrame,
229
- pd.DataFrame,
230
- pd.DataFrame,
231
- pd.Timestamp,
232
- pd.Timestamp,
233
- pd.Timestamp,
234
- str,
235
- str,
236
- ]:
237
- """
238
- Converting the event into a single tuple that will be used for passing the event arguments to the running
239
- application
240
-
241
- :param monitoring_context: (MonitoringApplicationContext) The monitoring context to process.
242
-
243
- :return: A tuple of:
244
- [0] = (str) application name
245
- [1] = (pd.DataFrame) current input statistics
246
- [2] = (pd.DataFrame) train statistics
247
- [3] = (pd.DataFrame) current input data
248
- [4] = (pd.Timestamp) start time of the monitoring schedule
249
- [5] = (pd.Timestamp) end time of the monitoring schedule
250
- [6] = (pd.Timestamp) timestamp of the latest request
251
- [7] = (str) endpoint id
252
- [8] = (str) output stream uri
253
- """
254
- return (
255
- monitoring_context.application_name,
256
- cls.dict_to_histogram(monitoring_context.sample_df_stats),
257
- cls.dict_to_histogram(monitoring_context.feature_stats),
258
- monitoring_context.sample_df,
259
- monitoring_context.start_infer_time,
260
- monitoring_context.end_infer_time,
261
- monitoring_context.latest_request,
262
- monitoring_context.endpoint_id,
263
- monitoring_context.output_stream_uri,
264
- )
265
-
266
- @staticmethod
267
- def dict_to_histogram(
268
- histogram_dict: mlrun.common.model_monitoring.helpers.FeatureStats,
269
- ) -> pd.DataFrame:
270
- """
271
- Convert histogram dictionary to pandas DataFrame with feature histograms as columns
272
-
273
- :param histogram_dict: Histogram dictionary
274
-
275
- :returns: Histogram dataframe
276
- """
277
-
278
- # Create a dictionary with feature histograms as values
279
- histograms = {}
280
- for feature, stats in histogram_dict.items():
281
- if "hist" in stats:
282
- # Normalize to probability distribution of each feature
283
- histograms[feature] = np.array(stats["hist"][0]) / stats["count"]
284
-
285
- # Convert the dictionary to pandas DataFrame
286
- histograms = pd.DataFrame(histograms)
287
-
288
- return histograms
@@ -98,9 +98,6 @@ class MonitoringApplicationContext:
98
98
  self.end_infer_time = pd.Timestamp(
99
99
  cast(str, event.get(mm_constants.ApplicationEvent.END_INFER_TIME))
100
100
  )
101
- self.latest_request = pd.Timestamp(
102
- cast(str, event.get(mm_constants.ApplicationEvent.LAST_REQUEST))
103
- )
104
101
  self.endpoint_id = cast(
105
102
  str, event.get(mm_constants.ApplicationEvent.ENDPOINT_ID)
106
103
  )
@@ -108,12 +105,8 @@ class MonitoringApplicationContext:
108
105
  str, event.get(mm_constants.ApplicationEvent.OUTPUT_STREAM_URI)
109
106
  )
110
107
 
111
- self._feature_stats: Optional[FeatureStats] = json.loads(
112
- event.get(mm_constants.ApplicationEvent.FEATURE_STATS, "{}")
113
- )
114
- self._sample_df_stats: Optional[FeatureStats] = json.loads(
115
- event.get(mm_constants.ApplicationEvent.CURRENT_STATS, "{}")
116
- )
108
+ self._feature_stats: Optional[FeatureStats] = None
109
+ self._sample_df_stats: Optional[FeatureStats] = None
117
110
 
118
111
  # Default labels for the artifacts
119
112
  self._default_labels = self._get_default_labels()
@@ -18,7 +18,6 @@ from abc import ABC
18
18
 
19
19
  import pandas as pd
20
20
  import semver
21
- from deprecated import deprecated
22
21
 
23
22
  import mlrun.model_monitoring.applications.base as mm_base
24
23
  import mlrun.model_monitoring.applications.context as mm_context
@@ -64,13 +63,6 @@ if _HAS_EVIDENTLY:
64
63
  from evidently.utils.dashboard import TemplateParams, file_html_template
65
64
 
66
65
 
67
- # TODO: Remove in 1.9.0
68
- @deprecated(
69
- version="1.7.0",
70
- reason="The `EvidentlyModelMonitoringApplicationBase` class is deprecated from "
71
- "version 1.7.0 and will be removed in version 1.9.0. "
72
- "Use `EvidentlyModelMonitoringApplicationBaseV2` as your application's base class.",
73
- )
74
66
  class EvidentlyModelMonitoringApplicationBase(
75
67
  mm_base.ModelMonitoringApplicationBase, ABC
76
68
  ):
@@ -85,72 +77,6 @@ class EvidentlyModelMonitoringApplicationBase(
85
77
  :param evidently_workspace_path: (str) The path to the Evidently workspace.
86
78
  :param evidently_project_id: (str) The ID of the Evidently project.
87
79
 
88
- """
89
- if not _HAS_EVIDENTLY:
90
- raise ModuleNotFoundError("Evidently is not installed - the app cannot run")
91
- self.evidently_workspace = Workspace.create(evidently_workspace_path)
92
- self.evidently_project_id = evidently_project_id
93
- self.evidently_project = self.evidently_workspace.get_project(
94
- evidently_project_id
95
- )
96
-
97
- def log_evidently_object(
98
- self, evidently_object: "Display", artifact_name: str
99
- ) -> None:
100
- """
101
- Logs an Evidently report or suite as an artifact.
102
-
103
- :param evidently_object: (Display) The Evidently display to log, e.g. a report or a test suite object.
104
- :param artifact_name: (str) The name for the logged artifact.
105
- """
106
- evidently_object_html = evidently_object.get_html()
107
- self.context.log_artifact(
108
- artifact_name, body=evidently_object_html.encode("utf-8"), format="html"
109
- )
110
-
111
- def log_project_dashboard(
112
- self,
113
- timestamp_start: pd.Timestamp,
114
- timestamp_end: pd.Timestamp,
115
- artifact_name: str = "dashboard",
116
- ):
117
- """
118
- Logs an Evidently project dashboard.
119
-
120
- :param timestamp_start: (pd.Timestamp) The start timestamp for the dashboard data.
121
- :param timestamp_end: (pd.Timestamp) The end timestamp for the dashboard data.
122
- :param artifact_name: (str) The name for the logged artifact.
123
- """
124
-
125
- dashboard_info = self.evidently_project.build_dashboard_info(
126
- timestamp_start, timestamp_end
127
- )
128
- template_params = TemplateParams(
129
- dashboard_id="pd_" + str(uuid.uuid4()).replace("-", ""),
130
- dashboard_info=dashboard_info,
131
- additional_graphs={},
132
- )
133
-
134
- dashboard_html = file_html_template(params=template_params)
135
- self.context.log_artifact(
136
- artifact_name, body=dashboard_html.encode("utf-8"), format="html"
137
- )
138
-
139
-
140
- class EvidentlyModelMonitoringApplicationBaseV2(
141
- mm_base.ModelMonitoringApplicationBaseV2, ABC
142
- ):
143
- def __init__(
144
- self, evidently_workspace_path: str, evidently_project_id: "STR_UUID"
145
- ) -> None:
146
- """
147
- A class for integrating Evidently for mlrun model monitoring within a monitoring application.
148
- Note: evidently is not installed by default in the mlrun/mlrun image.
149
- It must be installed separately to use this class.
150
-
151
- :param evidently_workspace_path: (str) The path to the Evidently workspace.
152
- :param evidently_project_id: (str) The ID of the Evidently project.
153
-
154
80
  """
155
81
 
156
82
  # TODO : more then one project (mep -> project)
@@ -31,7 +31,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
31
31
  ResultStatusApp,
32
32
  )
33
33
  from mlrun.model_monitoring.applications import (
34
- ModelMonitoringApplicationBaseV2,
34
+ ModelMonitoringApplicationBase,
35
35
  )
36
36
  from mlrun.model_monitoring.metrics.histogram_distance import (
37
37
  HellingerDistance,
@@ -87,7 +87,7 @@ class DataDriftClassifier:
87
87
  return ResultStatusApp.no_detection
88
88
 
89
89
 
90
- class HistogramDataDriftApplication(ModelMonitoringApplicationBaseV2):
90
+ class HistogramDataDriftApplication(ModelMonitoringApplicationBase):
91
91
  """
92
92
  MLRun's default data drift application for model monitoring.
93
93
 
@@ -29,8 +29,8 @@ class _ModelMonitoringApplicationDataRes(ABC):
29
29
  def __post_init__(self):
30
30
  pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
31
31
  if not re.fullmatch(pat, self.name):
32
- raise mlrun.errors.MLRunInvalidArgumentError(
33
- "Attribute name must be of the format [a-zA-Z_][a-zA-Z0-9_]*"
32
+ raise mlrun.errors.MLRunValueError(
33
+ "Attribute name must comply with the regex `[a-zA-Z_][a-zA-Z0-9_]*`"
34
34
  )
35
35
 
36
36
  @abstractmethod
@@ -45,7 +45,7 @@ class ModelMonitoringApplicationResult(_ModelMonitoringApplicationDataRes):
45
45
 
46
46
  :param name: (str) Name of the application result. This name must be
47
47
  unique for each metric in a single application
48
- (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
48
+ (name must be of the format :code:`[a-zA-Z_][a-zA-Z0-9_]*`).
49
49
  :param value: (float) Value of the application result.
50
50
  :param kind: (ResultKindApp) Kind of application result.
51
51
  :param status: (ResultStatusApp) Status of the application result.
@@ -80,7 +80,7 @@ class ModelMonitoringApplicationMetric(_ModelMonitoringApplicationDataRes):
80
80
 
81
81
  :param name: (str) Name of the application metric. This name must be
82
82
  unique for each metric in a single application
83
- (name must be of the format [a-zA-Z_][a-zA-Z0-9_]*).
83
+ (name must be of the format :code:`[a-zA-Z_][a-zA-Z0-9_]*`).
84
84
  :param value: (float) Value of the application metric.
85
85
  """
86
86