mlrun 1.7.1rc4__py3-none-any.whl → 1.8.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (257) hide show
  1. mlrun/__init__.py +23 -21
  2. mlrun/__main__.py +3 -3
  3. mlrun/alerts/alert.py +148 -14
  4. mlrun/artifacts/__init__.py +1 -2
  5. mlrun/artifacts/base.py +46 -12
  6. mlrun/artifacts/dataset.py +16 -16
  7. mlrun/artifacts/document.py +334 -0
  8. mlrun/artifacts/manager.py +15 -13
  9. mlrun/artifacts/model.py +66 -53
  10. mlrun/common/constants.py +7 -0
  11. mlrun/common/formatters/__init__.py +1 -0
  12. mlrun/common/formatters/feature_set.py +1 -0
  13. mlrun/common/formatters/function.py +1 -0
  14. mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
  15. mlrun/common/formatters/pipeline.py +1 -2
  16. mlrun/common/formatters/project.py +9 -0
  17. mlrun/common/model_monitoring/__init__.py +0 -5
  18. mlrun/common/model_monitoring/helpers.py +1 -29
  19. mlrun/common/runtimes/constants.py +1 -2
  20. mlrun/common/schemas/__init__.py +6 -2
  21. mlrun/common/schemas/alert.py +111 -19
  22. mlrun/common/schemas/api_gateway.py +3 -3
  23. mlrun/common/schemas/artifact.py +11 -7
  24. mlrun/common/schemas/auth.py +6 -4
  25. mlrun/common/schemas/background_task.py +7 -7
  26. mlrun/common/schemas/client_spec.py +2 -3
  27. mlrun/common/schemas/clusterization_spec.py +2 -2
  28. mlrun/common/schemas/common.py +53 -3
  29. mlrun/common/schemas/constants.py +15 -0
  30. mlrun/common/schemas/datastore_profile.py +1 -1
  31. mlrun/common/schemas/feature_store.py +9 -9
  32. mlrun/common/schemas/frontend_spec.py +4 -4
  33. mlrun/common/schemas/function.py +10 -10
  34. mlrun/common/schemas/hub.py +1 -1
  35. mlrun/common/schemas/k8s.py +3 -3
  36. mlrun/common/schemas/memory_reports.py +3 -3
  37. mlrun/common/schemas/model_monitoring/__init__.py +2 -1
  38. mlrun/common/schemas/model_monitoring/constants.py +66 -14
  39. mlrun/common/schemas/model_monitoring/grafana.py +1 -1
  40. mlrun/common/schemas/model_monitoring/model_endpoints.py +91 -147
  41. mlrun/common/schemas/notification.py +24 -3
  42. mlrun/common/schemas/object.py +1 -1
  43. mlrun/common/schemas/pagination.py +4 -4
  44. mlrun/common/schemas/partition.py +137 -0
  45. mlrun/common/schemas/pipeline.py +2 -2
  46. mlrun/common/schemas/project.py +25 -17
  47. mlrun/common/schemas/runs.py +2 -2
  48. mlrun/common/schemas/runtime_resource.py +5 -5
  49. mlrun/common/schemas/schedule.py +1 -1
  50. mlrun/common/schemas/secret.py +1 -1
  51. mlrun/common/schemas/tag.py +3 -3
  52. mlrun/common/schemas/workflow.py +5 -5
  53. mlrun/config.py +67 -10
  54. mlrun/data_types/__init__.py +0 -2
  55. mlrun/data_types/infer.py +3 -1
  56. mlrun/data_types/spark.py +2 -1
  57. mlrun/datastore/__init__.py +0 -2
  58. mlrun/datastore/alibaba_oss.py +4 -1
  59. mlrun/datastore/azure_blob.py +4 -1
  60. mlrun/datastore/base.py +12 -4
  61. mlrun/datastore/datastore.py +9 -3
  62. mlrun/datastore/datastore_profile.py +79 -20
  63. mlrun/datastore/dbfs_store.py +4 -1
  64. mlrun/datastore/filestore.py +4 -1
  65. mlrun/datastore/google_cloud_storage.py +4 -1
  66. mlrun/datastore/hdfs.py +4 -1
  67. mlrun/datastore/inmem.py +4 -1
  68. mlrun/datastore/redis.py +4 -1
  69. mlrun/datastore/s3.py +4 -1
  70. mlrun/datastore/sources.py +52 -51
  71. mlrun/datastore/store_resources.py +0 -2
  72. mlrun/datastore/targets.py +21 -21
  73. mlrun/datastore/utils.py +2 -2
  74. mlrun/datastore/v3io.py +4 -1
  75. mlrun/datastore/vectorstore.py +194 -0
  76. mlrun/datastore/wasbfs/fs.py +13 -12
  77. mlrun/db/base.py +208 -82
  78. mlrun/db/factory.py +0 -3
  79. mlrun/db/httpdb.py +1237 -386
  80. mlrun/db/nopdb.py +201 -74
  81. mlrun/errors.py +2 -2
  82. mlrun/execution.py +136 -50
  83. mlrun/feature_store/__init__.py +0 -2
  84. mlrun/feature_store/api.py +41 -40
  85. mlrun/feature_store/common.py +9 -9
  86. mlrun/feature_store/feature_set.py +20 -18
  87. mlrun/feature_store/feature_vector.py +27 -24
  88. mlrun/feature_store/retrieval/base.py +14 -9
  89. mlrun/feature_store/retrieval/job.py +2 -1
  90. mlrun/feature_store/steps.py +2 -2
  91. mlrun/features.py +30 -13
  92. mlrun/frameworks/__init__.py +1 -2
  93. mlrun/frameworks/_common/__init__.py +1 -2
  94. mlrun/frameworks/_common/artifacts_library.py +2 -2
  95. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  96. mlrun/frameworks/_common/model_handler.py +29 -27
  97. mlrun/frameworks/_common/producer.py +3 -1
  98. mlrun/frameworks/_dl_common/__init__.py +1 -2
  99. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  100. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  101. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  102. mlrun/frameworks/_ml_common/__init__.py +1 -2
  103. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  104. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  105. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  109. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  110. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  111. mlrun/frameworks/huggingface/__init__.py +1 -2
  112. mlrun/frameworks/huggingface/model_server.py +9 -9
  113. mlrun/frameworks/lgbm/__init__.py +47 -44
  114. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  117. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  119. mlrun/frameworks/lgbm/model_handler.py +15 -11
  120. mlrun/frameworks/lgbm/model_server.py +11 -7
  121. mlrun/frameworks/lgbm/utils.py +2 -2
  122. mlrun/frameworks/onnx/__init__.py +1 -2
  123. mlrun/frameworks/onnx/dataset.py +3 -3
  124. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  125. mlrun/frameworks/onnx/model_handler.py +7 -5
  126. mlrun/frameworks/onnx/model_server.py +8 -6
  127. mlrun/frameworks/parallel_coordinates.py +11 -11
  128. mlrun/frameworks/pytorch/__init__.py +22 -23
  129. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  130. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  131. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  132. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  133. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  134. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  135. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  136. mlrun/frameworks/pytorch/model_handler.py +21 -17
  137. mlrun/frameworks/pytorch/model_server.py +13 -9
  138. mlrun/frameworks/sklearn/__init__.py +19 -18
  139. mlrun/frameworks/sklearn/estimator.py +2 -2
  140. mlrun/frameworks/sklearn/metric.py +3 -3
  141. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  142. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  143. mlrun/frameworks/sklearn/model_handler.py +4 -3
  144. mlrun/frameworks/tf_keras/__init__.py +11 -12
  145. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  146. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  147. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  148. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  149. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  150. mlrun/frameworks/tf_keras/model_server.py +12 -8
  151. mlrun/frameworks/xgboost/__init__.py +19 -18
  152. mlrun/frameworks/xgboost/model_handler.py +13 -9
  153. mlrun/launcher/base.py +3 -4
  154. mlrun/launcher/local.py +1 -1
  155. mlrun/launcher/remote.py +1 -1
  156. mlrun/lists.py +4 -3
  157. mlrun/model.py +117 -46
  158. mlrun/model_monitoring/__init__.py +4 -4
  159. mlrun/model_monitoring/api.py +61 -59
  160. mlrun/model_monitoring/applications/_application_steps.py +17 -17
  161. mlrun/model_monitoring/applications/base.py +165 -6
  162. mlrun/model_monitoring/applications/context.py +88 -37
  163. mlrun/model_monitoring/applications/evidently_base.py +1 -2
  164. mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
  165. mlrun/model_monitoring/applications/results.py +55 -3
  166. mlrun/model_monitoring/controller.py +207 -239
  167. mlrun/model_monitoring/db/__init__.py +0 -2
  168. mlrun/model_monitoring/db/_schedules.py +156 -0
  169. mlrun/model_monitoring/db/_stats.py +189 -0
  170. mlrun/model_monitoring/db/tsdb/base.py +78 -25
  171. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +90 -16
  172. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  173. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +279 -59
  174. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  175. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
  176. mlrun/model_monitoring/helpers.py +152 -49
  177. mlrun/model_monitoring/stream_processing.py +99 -283
  178. mlrun/model_monitoring/tracking_policy.py +10 -3
  179. mlrun/model_monitoring/writer.py +48 -36
  180. mlrun/package/__init__.py +3 -6
  181. mlrun/package/context_handler.py +1 -1
  182. mlrun/package/packager.py +12 -9
  183. mlrun/package/packagers/__init__.py +0 -2
  184. mlrun/package/packagers/default_packager.py +14 -11
  185. mlrun/package/packagers/numpy_packagers.py +16 -7
  186. mlrun/package/packagers/pandas_packagers.py +18 -18
  187. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  188. mlrun/package/packagers_manager.py +31 -14
  189. mlrun/package/utils/__init__.py +0 -3
  190. mlrun/package/utils/_pickler.py +6 -6
  191. mlrun/platforms/__init__.py +47 -16
  192. mlrun/platforms/iguazio.py +4 -1
  193. mlrun/projects/operations.py +27 -27
  194. mlrun/projects/pipelines.py +75 -38
  195. mlrun/projects/project.py +865 -206
  196. mlrun/run.py +53 -10
  197. mlrun/runtimes/__init__.py +1 -3
  198. mlrun/runtimes/base.py +15 -11
  199. mlrun/runtimes/daskjob.py +9 -9
  200. mlrun/runtimes/generators.py +2 -1
  201. mlrun/runtimes/kubejob.py +4 -5
  202. mlrun/runtimes/mounts.py +572 -0
  203. mlrun/runtimes/mpijob/__init__.py +0 -2
  204. mlrun/runtimes/mpijob/abstract.py +7 -6
  205. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  206. mlrun/runtimes/nuclio/application/application.py +11 -11
  207. mlrun/runtimes/nuclio/function.py +19 -17
  208. mlrun/runtimes/nuclio/serving.py +18 -11
  209. mlrun/runtimes/pod.py +154 -45
  210. mlrun/runtimes/remotesparkjob.py +3 -2
  211. mlrun/runtimes/sparkjob/__init__.py +0 -2
  212. mlrun/runtimes/sparkjob/spark3job.py +21 -11
  213. mlrun/runtimes/utils.py +6 -5
  214. mlrun/serving/merger.py +6 -4
  215. mlrun/serving/remote.py +18 -17
  216. mlrun/serving/routers.py +185 -172
  217. mlrun/serving/server.py +7 -1
  218. mlrun/serving/states.py +97 -78
  219. mlrun/serving/utils.py +13 -2
  220. mlrun/serving/v1_serving.py +3 -2
  221. mlrun/serving/v2_serving.py +74 -65
  222. mlrun/track/__init__.py +1 -1
  223. mlrun/track/tracker.py +2 -2
  224. mlrun/track/trackers/mlflow_tracker.py +6 -5
  225. mlrun/utils/async_http.py +1 -1
  226. mlrun/utils/clones.py +1 -1
  227. mlrun/utils/helpers.py +66 -18
  228. mlrun/utils/logger.py +106 -4
  229. mlrun/utils/notifications/notification/__init__.py +22 -19
  230. mlrun/utils/notifications/notification/base.py +33 -14
  231. mlrun/utils/notifications/notification/console.py +6 -6
  232. mlrun/utils/notifications/notification/git.py +11 -11
  233. mlrun/utils/notifications/notification/ipython.py +10 -9
  234. mlrun/utils/notifications/notification/mail.py +176 -0
  235. mlrun/utils/notifications/notification/slack.py +6 -6
  236. mlrun/utils/notifications/notification/webhook.py +6 -6
  237. mlrun/utils/notifications/notification_pusher.py +86 -44
  238. mlrun/utils/regex.py +3 -1
  239. mlrun/utils/version/version.json +2 -2
  240. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/METADATA +191 -186
  241. mlrun-1.8.0rc8.dist-info/RECORD +347 -0
  242. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/WHEEL +1 -1
  243. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  244. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  245. mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
  246. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  247. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  248. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  249. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  250. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  251. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
  252. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  253. mlrun/model_monitoring/model_endpoint.py +0 -118
  254. mlrun-1.7.1rc4.dist-info/RECORD +0 -351
  255. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/LICENSE +0 -0
  256. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/entry_points.txt +0 -0
  257. {mlrun-1.7.1rc4.dist-info → mlrun-1.8.0rc8.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from datetime import datetime, timezone
15
+ from datetime import datetime, timedelta, timezone
16
16
  from io import StringIO
17
17
  from typing import Literal, Optional, Union
18
18
 
@@ -168,6 +168,9 @@ class V3IOTSDBConnector(TSDBConnector):
168
168
  tsdb_batching_max_events: int = 1000,
169
169
  tsdb_batching_timeout_secs: int = 30,
170
170
  sample_window: int = 10,
171
+ aggregate_windows: Optional[list[str]] = None,
172
+ aggregate_period: str = "1m",
173
+ **kwarg,
171
174
  ):
172
175
  """
173
176
  Apply TSDB steps on the provided monitoring graph. Throughout these steps, the graph stores live data of
@@ -178,7 +181,40 @@ class V3IOTSDBConnector(TSDBConnector):
178
181
  - endpoint_features (Prediction and feature names and values)
179
182
  - custom_metrics (user-defined metrics)
180
183
  """
184
+ aggregate_windows = aggregate_windows or ["5m", "1h"]
181
185
 
186
+ # Calculate number of predictions and average latency
187
+ def apply_storey_aggregations():
188
+ # Calculate number of predictions for each window (5 min and 1 hour by default)
189
+ graph.add_step(
190
+ class_name="storey.AggregateByKey",
191
+ aggregates=[
192
+ {
193
+ "name": EventFieldType.LATENCY,
194
+ "column": EventFieldType.LATENCY,
195
+ "operations": ["count", "avg"],
196
+ "windows": aggregate_windows,
197
+ "period": aggregate_period,
198
+ }
199
+ ],
200
+ name=EventFieldType.LATENCY,
201
+ after="MapFeatureNames",
202
+ step_name="Aggregates",
203
+ table=".",
204
+ key_field=EventFieldType.ENDPOINT_ID,
205
+ )
206
+ # Calculate average latency time for each window (5 min and 1 hour by default)
207
+ graph.add_step(
208
+ class_name="storey.Rename",
209
+ mapping={
210
+ "latency_count_5m": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_5M,
211
+ "latency_count_1h": mm_schemas.EventLiveStats.PREDICTIONS_COUNT_1H,
212
+ },
213
+ name="Rename",
214
+ after=EventFieldType.LATENCY,
215
+ )
216
+
217
+ apply_storey_aggregations()
182
218
  # Write latency per prediction, labeled by endpoint ID only
183
219
  graph.add_step(
184
220
  "storey.TSDBTarget",
@@ -310,6 +346,7 @@ class V3IOTSDBConnector(TSDBConnector):
310
346
  ],
311
347
  index_cols=[
312
348
  mm_schemas.EventFieldType.ENDPOINT_ID,
349
+ mm_schemas.EventFieldType.ERROR_TYPE,
313
350
  ],
314
351
  max_events=tsdb_batching_max_events,
315
352
  flush_after_seconds=tsdb_batching_timeout_secs,
@@ -338,9 +375,6 @@ class V3IOTSDBConnector(TSDBConnector):
338
375
  elif kind == mm_schemas.WriterEventKind.RESULT:
339
376
  table = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
340
377
  index_cols = index_cols_base + [mm_schemas.ResultData.RESULT_NAME]
341
- event.pop(mm_schemas.ResultData.CURRENT_STATS, None)
342
- # TODO: remove this when extra data is supported (ML-7460)
343
- event.pop(mm_schemas.ResultData.RESULT_EXTRA_DATA, None)
344
378
  else:
345
379
  raise ValueError(f"Invalid {kind = }")
346
380
 
@@ -544,6 +578,7 @@ class V3IOTSDBConnector(TSDBConnector):
544
578
  end: datetime,
545
579
  metrics: list[mm_schemas.ModelEndpointMonitoringMetric],
546
580
  type: Literal["metrics", "results"] = "results",
581
+ with_result_extra_data: bool = False,
547
582
  ) -> Union[
548
583
  list[
549
584
  Union[
@@ -565,6 +600,12 @@ class V3IOTSDBConnector(TSDBConnector):
565
600
  """
566
601
 
567
602
  if type == "metrics":
603
+ if with_result_extra_data:
604
+ logger.warning(
605
+ "The 'with_result_extra_data' parameter is not supported for metrics, just for results",
606
+ project=self.project,
607
+ endpoint_id=endpoint_id,
608
+ )
568
609
  table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
569
610
  name = mm_schemas.MetricData.METRIC_NAME
570
611
  columns = [mm_schemas.MetricData.METRIC_VALUE]
@@ -577,6 +618,8 @@ class V3IOTSDBConnector(TSDBConnector):
577
618
  mm_schemas.ResultData.RESULT_STATUS,
578
619
  mm_schemas.ResultData.RESULT_KIND,
579
620
  ]
621
+ if with_result_extra_data:
622
+ columns.append(mm_schemas.ResultData.RESULT_EXTRA_DATA)
580
623
  df_handler = self.df_to_results_values
581
624
  else:
582
625
  raise ValueError(f"Invalid {type = }")
@@ -605,6 +648,9 @@ class V3IOTSDBConnector(TSDBConnector):
605
648
  endpoint_id=endpoint_id,
606
649
  is_empty=df.empty,
607
650
  )
651
+ if not with_result_extra_data and type == "results":
652
+ # Set the extra data to an empty string if it's not requested
653
+ df[mm_schemas.ResultData.RESULT_EXTRA_DATA] = ""
608
654
 
609
655
  return df_handler(df=df, metrics=metrics, project=self.project)
610
656
 
@@ -700,12 +746,13 @@ class V3IOTSDBConnector(TSDBConnector):
700
746
  def get_last_request(
701
747
  self,
702
748
  endpoint_ids: Union[str, list[str]],
703
- start: Union[datetime, str] = "0",
704
- end: Union[datetime, str] = "now",
749
+ start: Optional[datetime] = None,
750
+ end: Optional[datetime] = None,
705
751
  ) -> pd.DataFrame:
706
752
  endpoint_ids = (
707
753
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
708
754
  )
755
+ start, end = self._get_start_end(start, end)
709
756
  df = self._get_records(
710
757
  table=mm_schemas.FileTargetKind.PREDICTIONS,
711
758
  start=start,
@@ -734,12 +781,14 @@ class V3IOTSDBConnector(TSDBConnector):
734
781
  def get_drift_status(
735
782
  self,
736
783
  endpoint_ids: Union[str, list[str]],
737
- start: Union[datetime, str] = "now-24h",
738
- end: Union[datetime, str] = "now",
784
+ start: Optional[datetime] = None,
785
+ end: Optional[datetime] = None,
739
786
  ) -> pd.DataFrame:
740
787
  endpoint_ids = (
741
788
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
742
789
  )
790
+ start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
791
+ start, end = self._get_start_end(start, end)
743
792
  df = self._get_records(
744
793
  table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
745
794
  start=start,
@@ -758,9 +807,10 @@ class V3IOTSDBConnector(TSDBConnector):
758
807
  def get_metrics_metadata(
759
808
  self,
760
809
  endpoint_id: str,
761
- start: Union[datetime, str] = "0",
762
- end: Union[datetime, str] = "now",
810
+ start: Optional[datetime] = None,
811
+ end: Optional[datetime] = None,
763
812
  ) -> pd.DataFrame:
813
+ start, end = self._get_start_end(start, end)
764
814
  df = self._get_records(
765
815
  table=mm_schemas.V3IOTSDBTables.METRICS,
766
816
  start=start,
@@ -778,9 +828,10 @@ class V3IOTSDBConnector(TSDBConnector):
778
828
  def get_results_metadata(
779
829
  self,
780
830
  endpoint_id: str,
781
- start: Union[datetime, str] = "0",
782
- end: Union[datetime, str] = "now",
831
+ start: Optional[datetime] = None,
832
+ end: Optional[datetime] = None,
783
833
  ) -> pd.DataFrame:
834
+ start, end = self._get_start_end(start, end)
784
835
  df = self._get_records(
785
836
  table=mm_schemas.V3IOTSDBTables.APP_RESULTS,
786
837
  start=start,
@@ -803,18 +854,20 @@ class V3IOTSDBConnector(TSDBConnector):
803
854
  def get_error_count(
804
855
  self,
805
856
  endpoint_ids: Union[str, list[str]],
806
- start: Union[datetime, str] = "0",
807
- end: Union[datetime, str] = "now",
857
+ start: Optional[datetime] = None,
858
+ end: Optional[datetime] = None,
808
859
  ) -> pd.DataFrame:
809
860
  endpoint_ids = (
810
861
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
811
862
  )
863
+ start, end = self._get_start_end(start, end)
812
864
  df = self._get_records(
813
865
  table=mm_schemas.FileTargetKind.ERRORS,
814
866
  start=start,
815
867
  end=end,
816
868
  columns=[mm_schemas.EventFieldType.ERROR_COUNT],
817
- filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]})",
869
+ filter_query=f"endpoint_id IN({str(endpoint_ids)[1:-1]}) "
870
+ f"AND {mm_schemas.EventFieldType.ERROR_TYPE} == '{mm_schemas.EventFieldType.INFER_ERROR}'",
818
871
  agg_funcs=["count"],
819
872
  )
820
873
  if not df.empty:
@@ -830,12 +883,14 @@ class V3IOTSDBConnector(TSDBConnector):
830
883
  def get_avg_latency(
831
884
  self,
832
885
  endpoint_ids: Union[str, list[str]],
833
- start: Union[datetime, str] = "0",
834
- end: Union[datetime, str] = "now",
886
+ start: Optional[datetime] = None,
887
+ end: Optional[datetime] = None,
835
888
  ) -> pd.DataFrame:
836
889
  endpoint_ids = (
837
890
  endpoint_ids if isinstance(endpoint_ids, list) else [endpoint_ids]
838
891
  )
892
+ start = start or (mlrun.utils.datetime_now() - timedelta(hours=24))
893
+ start, end = self._get_start_end(start, end)
839
894
  df = self._get_records(
840
895
  table=mm_schemas.FileTargetKind.PREDICTIONS,
841
896
  start=start,
@@ -846,4 +901,10 @@ class V3IOTSDBConnector(TSDBConnector):
846
901
  )
847
902
  if not df.empty:
848
903
  df.dropna(inplace=True)
904
+ df.rename(
905
+ columns={
906
+ f"avg({mm_schemas.EventFieldType.LATENCY})": f"avg_{mm_schemas.EventFieldType.LATENCY}"
907
+ },
908
+ inplace=True,
909
+ )
849
910
  return df.reset_index(drop=True)
@@ -13,26 +13,32 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import datetime
16
+ import os
16
17
  import typing
17
18
 
18
19
  import numpy as np
19
20
  import pandas as pd
20
21
 
21
22
  if typing.TYPE_CHECKING:
23
+ from mlrun.datastore import DataItem
22
24
  from mlrun.db.base import RunDBInterface
23
25
  from mlrun.projects import MlrunProject
24
26
 
27
+ from fnmatch import fnmatchcase
28
+ from typing import Optional
29
+
25
30
  import mlrun
26
31
  import mlrun.artifacts
27
32
  import mlrun.common.model_monitoring.helpers
28
33
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
29
34
  import mlrun.data_types.infer
30
35
  import mlrun.model_monitoring
36
+ import mlrun.utils.helpers
37
+ from mlrun.common.schemas import ModelEndpoint
31
38
  from mlrun.common.schemas.model_monitoring.model_endpoints import (
32
39
  ModelEndpointMonitoringMetric,
33
40
  _compose_full_name,
34
41
  )
35
- from mlrun.model_monitoring.model_endpoint import ModelEndpoint
36
42
  from mlrun.utils import logger
37
43
 
38
44
 
@@ -42,6 +48,70 @@ class _BatchDict(typing.TypedDict):
42
48
  days: int
43
49
 
44
50
 
51
+ def _is_results_regex_match(
52
+ existing_result_name: Optional[str],
53
+ result_name_filters: Optional[list[str]],
54
+ ) -> bool:
55
+ if existing_result_name.count(".") != 3 or any(
56
+ part == "" for part in existing_result_name.split(".")
57
+ ):
58
+ logger.warning(
59
+ f"_is_results_regex_match: existing_result_name illegal, will be ignored."
60
+ f" existing_result_name: {existing_result_name}"
61
+ )
62
+ return False
63
+ existing_result_name = ".".join(existing_result_name.split(".")[i] for i in [1, 3])
64
+ for result_name_filter in result_name_filters:
65
+ if fnmatchcase(existing_result_name, result_name_filter):
66
+ return True
67
+ return False
68
+
69
+
70
+ def filter_results_by_regex(
71
+ existing_result_names: Optional[list[str]] = None,
72
+ result_name_filters: Optional[list[str]] = None,
73
+ ) -> list[str]:
74
+ """
75
+ Filter a list of existing result names by a list of filters.
76
+
77
+ This function returns only the results that match the filters provided. If no filters are given,
78
+ it returns all results. Invalid inputs are ignored.
79
+
80
+ :param existing_result_names: List of existing results' fully qualified names (FQNs)
81
+ in the format: endpoint_id.app_name.type.name.
82
+ Example: mep1.app1.result.metric1
83
+ :param result_name_filters: List of filters in the format: app.result_name.
84
+ Wildcards can be used, such as app.result* or *.result
85
+
86
+ :return: List of FQNs of the matching results
87
+ """
88
+
89
+ if not result_name_filters:
90
+ return existing_result_names
91
+
92
+ if not existing_result_names:
93
+ return []
94
+
95
+ # filters validations
96
+ validated_filters = []
97
+ for result_name_filter in result_name_filters:
98
+ if result_name_filter.count(".") != 1:
99
+ logger.warning(
100
+ f"filter_results_by_regex: result_name_filter illegal, will be ignored."
101
+ f"Filter: {result_name_filter}"
102
+ )
103
+ else:
104
+ validated_filters.append(result_name_filter)
105
+ filtered_metrics_names = []
106
+ for existing_result_name in existing_result_names:
107
+ if _is_results_regex_match(
108
+ existing_result_name=existing_result_name,
109
+ result_name_filters=validated_filters,
110
+ ):
111
+ filtered_metrics_names.append(existing_result_name)
112
+ return filtered_metrics_names
113
+
114
+
45
115
  def get_stream_path(
46
116
  project: str,
47
117
  function_name: str = mm_constants.MonitoringFunctionNames.STREAM,
@@ -98,19 +168,64 @@ def get_monitoring_parquet_path(
98
168
  return parquet_path
99
169
 
100
170
 
101
- def get_connection_string(secret_provider: typing.Callable[[str], str] = None) -> str:
102
- """Get endpoint store connection string from the project secret. If wasn't set, take it from the system
103
- configurations.
171
+ def get_monitoring_stats_directory_path(
172
+ project: str,
173
+ kind: str = mm_constants.FileTargetKind.STATS,
174
+ ) -> str:
175
+ """
176
+ Get model monitoring stats target for the current project and kind. The stats target path is based on the
177
+ project artifact path. If project artifact path is not defined, the stats target path will be based on MLRun
178
+ artifact path.
179
+ :param project: Project object.
180
+ :param kind: indicate the kind of the stats path
181
+ :return: Monitoring stats target path.
182
+ """
183
+ stats_path = mlrun.mlconf.get_model_monitoring_file_target_path(
184
+ project=project,
185
+ kind=kind,
186
+ )
187
+ return stats_path
104
188
 
105
- :param secret_provider: An optional secret provider to get the connection string secret.
106
189
 
107
- :return: Valid SQL connection string.
190
+ def _get_monitoring_current_stats_file_path(project: str, endpoint_id: str) -> str:
191
+ return os.path.join(
192
+ get_monitoring_stats_directory_path(project),
193
+ f"{endpoint_id}_current_stats.json",
194
+ )
195
+
108
196
 
197
+ def _get_monitoring_drift_measures_file_path(project: str, endpoint_id: str) -> str:
198
+ return os.path.join(
199
+ get_monitoring_stats_directory_path(project),
200
+ f"{endpoint_id}_drift_measures.json",
201
+ )
202
+
203
+
204
+ def get_monitoring_current_stats_data(project: str, endpoint_id: str) -> "DataItem":
205
+ """
206
+ getter for data item of current stats for project and endpoint
207
+ :param project: project name str
208
+ :param endpoint_id: endpoint id str
209
+ :return: DataItem
109
210
  """
211
+ return mlrun.datastore.store_manager.object(
212
+ _get_monitoring_current_stats_file_path(
213
+ project=project, endpoint_id=endpoint_id
214
+ )
215
+ )
110
216
 
111
- return mlrun.get_secret_or_env(
112
- key=mm_constants.ProjectSecretKeys.ENDPOINT_STORE_CONNECTION,
113
- secret_provider=secret_provider,
217
+
218
+ def get_monitoring_drift_measures_data(project: str, endpoint_id: str) -> "DataItem":
219
+ """
220
+ getter for data item of drift measures for project and endpoint
221
+ :param project: project name str
222
+ :param endpoint_id: endpoint id str
223
+ :return: DataItem
224
+ """
225
+ return mlrun.datastore.store_manager.object(
226
+ _get_monitoring_drift_measures_file_path(
227
+ project=project, endpoint_id=endpoint_id
228
+ )
114
229
  )
115
230
 
116
231
 
@@ -186,19 +301,24 @@ def update_model_endpoint_last_request(
186
301
  :param current_request: current request time
187
302
  :param db: DB interface.
188
303
  """
189
- is_model_server_endpoint = model_endpoint.spec.stream_path != ""
190
- if is_model_server_endpoint:
191
- current_request = current_request.isoformat()
304
+ is_batch_endpoint = (
305
+ model_endpoint.metadata.endpoint_type == mm_constants.EndpointType.BATCH_EP
306
+ )
307
+ if not is_batch_endpoint:
192
308
  logger.info(
193
309
  "Update model endpoint last request time (EP with serving)",
194
310
  project=project,
195
311
  endpoint_id=model_endpoint.metadata.uid,
312
+ name=model_endpoint.metadata.name,
313
+ function_name=model_endpoint.spec.function_name,
196
314
  last_request=model_endpoint.status.last_request,
197
315
  current_request=current_request,
198
316
  )
199
317
  db.patch_model_endpoint(
200
318
  project=project,
201
319
  endpoint_id=model_endpoint.metadata.uid,
320
+ name=model_endpoint.metadata.name,
321
+ function_name=model_endpoint.spec.function_name,
202
322
  attributes={mm_constants.EventFieldType.LAST_REQUEST: current_request},
203
323
  )
204
324
  else: # model endpoint without any serving function - close the window "manually"
@@ -217,7 +337,7 @@ def update_model_endpoint_last_request(
217
337
  + datetime.timedelta(
218
338
  seconds=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_timeout_secs
219
339
  )
220
- ).isoformat()
340
+ )
221
341
  logger.info(
222
342
  "Bumping model endpoint last request time (EP without serving)",
223
343
  project=project,
@@ -229,6 +349,8 @@ def update_model_endpoint_last_request(
229
349
  db.patch_model_endpoint(
230
350
  project=project,
231
351
  endpoint_id=model_endpoint.metadata.uid,
352
+ name=model_endpoint.metadata.name,
353
+ function_name=model_endpoint.spec.function_name,
232
354
  attributes={mm_constants.EventFieldType.LAST_REQUEST: bumped_last_request},
233
355
  )
234
356
 
@@ -270,17 +392,6 @@ def calculate_inputs_statistics(
270
392
  return inputs_statistics
271
393
 
272
394
 
273
- def get_endpoint_record(
274
- project: str,
275
- endpoint_id: str,
276
- secret_provider: typing.Optional[typing.Callable[[str], str]] = None,
277
- ) -> dict[str, typing.Any]:
278
- model_endpoint_store = mlrun.model_monitoring.get_store_object(
279
- project=project, secret_provider=secret_provider
280
- )
281
- return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
282
-
283
-
284
395
  def get_result_instance_fqn(
285
396
  model_endpoint_id: str, app_name: str, result_name: str
286
397
  ) -> str:
@@ -320,33 +431,25 @@ def get_invocations_metric(project: str) -> ModelEndpointMonitoringMetric:
320
431
  )
321
432
 
322
433
 
323
- def enrich_model_endpoint_with_model_uri(
324
- model_endpoint: ModelEndpoint,
325
- model_obj: mlrun.artifacts.ModelArtifact,
326
- ):
327
- """
328
- Enrich the model endpoint object with the model uri from the model object. We will use a unique reference
329
- to the model object that includes the project, db_key, iter, and tree.
330
- In addition, we verify that the model object is of type `ModelArtifact`.
434
+ def _get_monitoring_schedules_folder_path(project: str) -> str:
435
+ return typing.cast(
436
+ str,
437
+ mlrun.mlconf.get_model_monitoring_file_target_path(
438
+ project=project, kind=mm_constants.FileTargetKind.MONITORING_SCHEDULES
439
+ ),
440
+ )
331
441
 
332
- :param model_endpoint: An object representing the model endpoint that will be enriched with the model uri.
333
- :param model_obj: An object representing the model artifact.
334
442
 
335
- :raise: `MLRunInvalidArgumentError` if the model object is not of type `ModelArtifact`.
336
- """
337
- mlrun.utils.helpers.verify_field_of_type(
338
- field_name="model_endpoint.spec.model_uri",
339
- field_value=model_obj,
340
- expected_type=mlrun.artifacts.ModelArtifact,
443
+ def _get_monitoring_schedules_file_path(*, project: str, endpoint_id: str) -> str:
444
+ return os.path.join(
445
+ _get_monitoring_schedules_folder_path(project), f"{endpoint_id}.json"
341
446
  )
342
447
 
343
- # Update model_uri with a unique reference to handle future changes
344
- model_artifact_uri = mlrun.utils.helpers.generate_artifact_uri(
345
- project=model_endpoint.metadata.project,
346
- key=model_obj.db_key,
347
- iter=model_obj.iter,
348
- tree=model_obj.tree,
349
- )
350
- model_endpoint.spec.model_uri = mlrun.datastore.get_store_uri(
351
- kind=mlrun.utils.helpers.StorePrefix.Model, uri=model_artifact_uri
448
+
449
+ def get_monitoring_schedules_data(*, project: str, endpoint_id: str) -> "DataItem":
450
+ """
451
+ Get the model monitoring schedules' data item of the project's model endpoint.
452
+ """
453
+ return mlrun.datastore.store_manager.object(
454
+ _get_monitoring_schedules_file_path(project=project, endpoint_id=endpoint_id)
352
455
  )