mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (107) hide show
  1. mlrun/__init__.py +10 -1
  2. mlrun/__main__.py +18 -109
  3. mlrun/{runtimes/mpijob/v1alpha1.py → alerts/__init__.py} +2 -16
  4. mlrun/alerts/alert.py +141 -0
  5. mlrun/artifacts/__init__.py +8 -3
  6. mlrun/artifacts/base.py +36 -253
  7. mlrun/artifacts/dataset.py +9 -190
  8. mlrun/artifacts/manager.py +20 -41
  9. mlrun/artifacts/model.py +8 -140
  10. mlrun/artifacts/plots.py +14 -375
  11. mlrun/common/schemas/__init__.py +4 -2
  12. mlrun/common/schemas/alert.py +46 -4
  13. mlrun/common/schemas/api_gateway.py +4 -0
  14. mlrun/common/schemas/artifact.py +15 -0
  15. mlrun/common/schemas/auth.py +2 -0
  16. mlrun/common/schemas/model_monitoring/__init__.py +8 -1
  17. mlrun/common/schemas/model_monitoring/constants.py +40 -4
  18. mlrun/common/schemas/model_monitoring/model_endpoints.py +73 -2
  19. mlrun/common/schemas/project.py +2 -0
  20. mlrun/config.py +7 -4
  21. mlrun/data_types/to_pandas.py +4 -4
  22. mlrun/datastore/base.py +41 -9
  23. mlrun/datastore/datastore_profile.py +54 -4
  24. mlrun/datastore/inmem.py +2 -2
  25. mlrun/datastore/sources.py +43 -2
  26. mlrun/datastore/store_resources.py +2 -6
  27. mlrun/datastore/targets.py +106 -39
  28. mlrun/db/base.py +23 -3
  29. mlrun/db/httpdb.py +101 -47
  30. mlrun/db/nopdb.py +20 -2
  31. mlrun/errors.py +5 -0
  32. mlrun/feature_store/__init__.py +0 -2
  33. mlrun/feature_store/api.py +12 -47
  34. mlrun/feature_store/feature_set.py +9 -0
  35. mlrun/feature_store/retrieval/base.py +9 -4
  36. mlrun/feature_store/retrieval/conversion.py +4 -4
  37. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  38. mlrun/feature_store/retrieval/job.py +2 -0
  39. mlrun/feature_store/retrieval/local_merger.py +2 -0
  40. mlrun/feature_store/retrieval/spark_merger.py +5 -0
  41. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
  42. mlrun/launcher/base.py +4 -3
  43. mlrun/launcher/client.py +1 -1
  44. mlrun/lists.py +4 -2
  45. mlrun/model.py +25 -11
  46. mlrun/model_monitoring/__init__.py +1 -1
  47. mlrun/model_monitoring/api.py +41 -18
  48. mlrun/model_monitoring/application.py +5 -305
  49. mlrun/model_monitoring/applications/__init__.py +11 -0
  50. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  51. mlrun/model_monitoring/applications/base.py +282 -0
  52. mlrun/model_monitoring/applications/context.py +214 -0
  53. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  54. mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
  55. mlrun/model_monitoring/applications/results.py +99 -0
  56. mlrun/model_monitoring/controller.py +3 -1
  57. mlrun/model_monitoring/db/__init__.py +2 -0
  58. mlrun/model_monitoring/db/stores/base/store.py +9 -36
  59. mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
  60. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
  61. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +104 -187
  62. mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
  63. mlrun/model_monitoring/db/tsdb/base.py +135 -0
  64. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  65. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  66. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +404 -0
  67. mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
  68. mlrun/model_monitoring/evidently_application.py +6 -118
  69. mlrun/model_monitoring/helpers.py +1 -1
  70. mlrun/model_monitoring/model_endpoint.py +3 -2
  71. mlrun/model_monitoring/stream_processing.py +48 -213
  72. mlrun/model_monitoring/writer.py +101 -121
  73. mlrun/platforms/__init__.py +10 -9
  74. mlrun/platforms/iguazio.py +21 -202
  75. mlrun/projects/operations.py +11 -7
  76. mlrun/projects/pipelines.py +13 -76
  77. mlrun/projects/project.py +73 -45
  78. mlrun/render.py +11 -13
  79. mlrun/run.py +6 -41
  80. mlrun/runtimes/__init__.py +3 -3
  81. mlrun/runtimes/base.py +6 -6
  82. mlrun/runtimes/funcdoc.py +0 -28
  83. mlrun/runtimes/kubejob.py +2 -1
  84. mlrun/runtimes/local.py +1 -1
  85. mlrun/runtimes/mpijob/__init__.py +0 -20
  86. mlrun/runtimes/mpijob/v1.py +1 -1
  87. mlrun/runtimes/nuclio/api_gateway.py +75 -9
  88. mlrun/runtimes/nuclio/function.py +9 -35
  89. mlrun/runtimes/pod.py +16 -36
  90. mlrun/runtimes/remotesparkjob.py +1 -1
  91. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  92. mlrun/runtimes/utils.py +1 -39
  93. mlrun/utils/helpers.py +72 -71
  94. mlrun/utils/notifications/notification/base.py +1 -1
  95. mlrun/utils/notifications/notification/slack.py +12 -5
  96. mlrun/utils/notifications/notification/webhook.py +1 -1
  97. mlrun/utils/notifications/notification_pusher.py +134 -14
  98. mlrun/utils/version/version.json +2 -2
  99. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/METADATA +4 -3
  100. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/RECORD +105 -95
  101. mlrun/kfpops.py +0 -865
  102. mlrun/platforms/other.py +0 -305
  103. /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
  104. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/LICENSE +0 -0
  105. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/WHEEL +0 -0
  106. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/entry_points.txt +0 -0
  107. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/top_level.txt +0 -0
mlrun/db/httpdb.py CHANGED
@@ -15,7 +15,6 @@
15
15
  import enum
16
16
  import http
17
17
  import re
18
- import tempfile
19
18
  import time
20
19
  import traceback
21
20
  import typing
@@ -26,9 +25,9 @@ from os import path, remove
26
25
  from typing import Optional, Union
27
26
  from urllib.parse import urlparse
28
27
 
29
- import kfp
30
28
  import requests
31
29
  import semver
30
+ from mlrun_pipelines.utils import compile_pipeline
32
31
 
33
32
  import mlrun
34
33
  import mlrun.common.schemas
@@ -38,6 +37,7 @@ import mlrun.platforms
38
37
  import mlrun.projects
39
38
  import mlrun.runtimes.nuclio.api_gateway
40
39
  import mlrun.utils
40
+ from mlrun.alerts.alert import AlertConfig
41
41
  from mlrun.db.auth_utils import OAuthClientIDTokenProvider, StaticTokenProvider
42
42
  from mlrun.errors import MLRunInvalidArgumentError, err_to_str
43
43
 
@@ -51,7 +51,6 @@ from ..utils import (
51
51
  datetime_to_iso,
52
52
  dict_to_json,
53
53
  logger,
54
- new_pipe_metadata,
55
54
  normalize_name,
56
55
  version,
57
56
  )
@@ -590,7 +589,7 @@ class HTTPRunDB(RunDBInterface):
590
589
  if offset < 0:
591
590
  raise MLRunInvalidArgumentError("Offset cannot be negative")
592
591
  if size is None:
593
- size = int(config.httpdb.logs.pull_logs_default_size_limit)
592
+ size = int(mlrun.mlconf.httpdb.logs.pull_logs_default_size_limit)
594
593
  elif size == -1:
595
594
  logger.warning(
596
595
  "Retrieving all logs. This may be inefficient and can result in a large log."
@@ -636,33 +635,35 @@ class HTTPRunDB(RunDBInterface):
636
635
 
637
636
  state, text = self.get_log(uid, project, offset=offset)
638
637
  if text:
639
- print(text.decode(errors=config.httpdb.logs.decode.errors))
638
+ print(text.decode(errors=mlrun.mlconf.httpdb.logs.decode.errors))
640
639
  nil_resp = 0
641
640
  while True:
642
641
  offset += len(text)
643
642
  # if we get 3 nil responses in a row, increase the sleep time to 10 seconds
644
643
  # TODO: refactor this to use a conditional backoff mechanism
645
644
  if nil_resp < 3:
646
- time.sleep(int(config.httpdb.logs.pull_logs_default_interval))
645
+ time.sleep(int(mlrun.mlconf.httpdb.logs.pull_logs_default_interval))
647
646
  else:
648
647
  time.sleep(
649
- int(config.httpdb.logs.pull_logs_backoff_no_logs_default_interval)
648
+ int(
649
+ mlrun.mlconf.httpdb.logs.pull_logs_backoff_no_logs_default_interval
650
+ )
650
651
  )
651
652
  state, text = self.get_log(uid, project, offset=offset)
652
653
  if text:
653
654
  nil_resp = 0
654
655
  print(
655
- text.decode(errors=config.httpdb.logs.decode.errors),
656
+ text.decode(errors=mlrun.mlconf.httpdb.logs.decode.errors),
656
657
  end="",
657
658
  )
658
659
  else:
659
660
  nil_resp += 1
660
661
 
661
662
  if watch and state in [
662
- mlrun.runtimes.constants.RunStates.pending,
663
- mlrun.runtimes.constants.RunStates.running,
664
- mlrun.runtimes.constants.RunStates.created,
665
- mlrun.runtimes.constants.RunStates.aborting,
663
+ mlrun.common.runtimes.constants.RunStates.pending,
664
+ mlrun.common.runtimes.constants.RunStates.running,
665
+ mlrun.common.runtimes.constants.RunStates.created,
666
+ mlrun.common.runtimes.constants.RunStates.aborting,
666
667
  ]:
667
668
  continue
668
669
  else:
@@ -985,7 +986,18 @@ class HTTPRunDB(RunDBInterface):
985
986
  resp = self.api_call("GET", endpoint_path, error, params=params, version="v2")
986
987
  return resp.json()
987
988
 
988
- def del_artifact(self, key, tag=None, project="", tree=None, uid=None):
989
+ def del_artifact(
990
+ self,
991
+ key,
992
+ tag=None,
993
+ project="",
994
+ tree=None,
995
+ uid=None,
996
+ deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
997
+ mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
998
+ ),
999
+ secrets: dict = None,
1000
+ ):
989
1001
  """Delete an artifact.
990
1002
 
991
1003
  :param key: Identifying key of the artifact.
@@ -993,6 +1005,8 @@ class HTTPRunDB(RunDBInterface):
993
1005
  :param project: Project that the artifact belongs to.
994
1006
  :param tree: The tree which generated this artifact.
995
1007
  :param uid: A unique ID for this specific version of the artifact (the uid that was generated in the backend)
1008
+ :param deletion_strategy: The artifact deletion strategy types.
1009
+ :param secrets: Credentials needed to access the artifact data.
996
1010
  """
997
1011
 
998
1012
  endpoint_path = f"projects/{project}/artifacts/{key}"
@@ -1001,9 +1015,17 @@ class HTTPRunDB(RunDBInterface):
1001
1015
  "tag": tag,
1002
1016
  "tree": tree,
1003
1017
  "uid": uid,
1018
+ "deletion_strategy": deletion_strategy,
1004
1019
  }
1005
1020
  error = f"del artifact {project}/{key}"
1006
- self.api_call("DELETE", endpoint_path, error, params=params, version="v2")
1021
+ self.api_call(
1022
+ "DELETE",
1023
+ endpoint_path,
1024
+ error,
1025
+ params=params,
1026
+ version="v2",
1027
+ body=dict_to_json(secrets),
1028
+ )
1007
1029
 
1008
1030
  def list_artifacts(
1009
1031
  self,
@@ -1018,6 +1040,7 @@ class HTTPRunDB(RunDBInterface):
1018
1040
  kind: str = None,
1019
1041
  category: Union[str, mlrun.common.schemas.ArtifactCategories] = None,
1020
1042
  tree: str = None,
1043
+ producer_uri: str = None,
1021
1044
  ) -> ArtifactList:
1022
1045
  """List artifacts filtered by various parameters.
1023
1046
 
@@ -1046,9 +1069,12 @@ class HTTPRunDB(RunDBInterface):
1046
1069
  :param best_iteration: Returns the artifact which belongs to the best iteration of a given run, in the case of
1047
1070
  artifacts generated from a hyper-param run. If only a single iteration exists, will return the artifact
1048
1071
  from that iteration. If using ``best_iter``, the ``iter`` parameter must not be used.
1049
- :param kind: Return artifacts of the requested kind.
1050
- :param category: Return artifacts of the requested category.
1051
- :param tree: Return artifacts of the requested tree.
1072
+ :param kind: Return artifacts of the requested kind.
1073
+ :param category: Return artifacts of the requested category.
1074
+ :param tree: Return artifacts of the requested tree.
1075
+ :param producer_uri: Return artifacts produced by the requested producer URI. Producer URI usually
1076
+ points to a run and is used to filter artifacts by the run that produced them when the artifact producer id
1077
+ is a workflow id (artifact was created as part of a workflow).
1052
1078
  """
1053
1079
 
1054
1080
  project = project or config.default_project
@@ -1067,6 +1093,7 @@ class HTTPRunDB(RunDBInterface):
1067
1093
  "category": category,
1068
1094
  "tree": tree,
1069
1095
  "format": mlrun.common.schemas.ArtifactsFormat.full.value,
1096
+ "producer_uri": producer_uri,
1070
1097
  }
1071
1098
  error = "list artifacts"
1072
1099
  endpoint_path = f"projects/{project}/artifacts"
@@ -1828,14 +1855,11 @@ class HTTPRunDB(RunDBInterface):
1828
1855
  if isinstance(pipeline, str):
1829
1856
  pipe_file = pipeline
1830
1857
  else:
1831
- pipe_file = tempfile.NamedTemporaryFile(suffix=".yaml", delete=False).name
1832
- conf = new_pipe_metadata(
1858
+ pipe_file = compile_pipeline(
1833
1859
  artifact_path=artifact_path,
1834
1860
  cleanup_ttl=cleanup_ttl,
1835
- op_transformers=ops,
1836
- )
1837
- kfp.compiler.Compiler().compile(
1838
- pipeline, pipe_file, type_check=False, pipeline_conf=conf
1861
+ ops=ops,
1862
+ pipeline=pipeline,
1839
1863
  )
1840
1864
 
1841
1865
  if pipe_file.endswith(".yaml"):
@@ -3112,14 +3136,12 @@ class HTTPRunDB(RunDBInterface):
3112
3136
  :param labels: A list of labels to filter by. Label filters work by either filtering a specific value of a
3113
3137
  label (i.e. list("key=value")) or by looking for the existence of a given key (i.e. "key")
3114
3138
  :param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric'
3115
- :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
3116
- time, a Unix timestamp in milliseconds, a relative time (`'now'` or
3117
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` =
3118
- days), or 0 for the earliest time.
3119
- :param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
3120
- time, a Unix timestamp in milliseconds, a relative time (`'now'` or
3121
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` =
3122
- days), or 0 for the earliest time.
3139
+ :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339 time, a
3140
+ Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
3141
+ `m` = minutes, `h` = hours, `'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
3142
+ :param end: The end time of the metrics. Can be represented by a string containing an RFC 3339 time, a
3143
+ Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
3144
+ `m` = minutes, `h` = hours, `'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
3123
3145
  :param top_level: if true will return only routers and endpoint that are NOT children of any router
3124
3146
  :param uids: if passed will return a list `ModelEndpoint` object with uid in uids
3125
3147
  """
@@ -3168,13 +3190,13 @@ class HTTPRunDB(RunDBInterface):
3168
3190
  :param project: The name of the project
3169
3191
  :param endpoint_id: The unique id of the model endpoint.
3170
3192
  :param start: The start time of the metrics. Can be represented by a string containing an
3171
- RFC 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
3172
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or
3173
- 0 for the earliest time.
3193
+ RFC 3339 time, a Unix timestamp in milliseconds, a relative time
3194
+ (`'now'` or `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours,
3195
+ `'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
3174
3196
  :param end: The end time of the metrics. Can be represented by a string containing an
3175
- RFC 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
3176
- `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or
3177
- 0 for the earliest time.
3197
+ RFC 3339 time, a Unix timestamp in milliseconds, a relative time
3198
+ (`'now'` or `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours,
3199
+ `'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
3178
3200
  :param metrics: A list of metrics to return for the model endpoint. There are pre-defined
3179
3201
  metrics for model endpoints such as predictions_per_second and
3180
3202
  latency_avg_5m but also custom metrics defined by the user. Please note that
@@ -3915,7 +3937,7 @@ class HTTPRunDB(RunDBInterface):
3915
3937
  logger.warning(
3916
3938
  "Building a function image to ECR and loading an S3 source to the image may require conflicting access "
3917
3939
  "keys. Only the permissions granted to the platform's configured secret will take affect "
3918
- "(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
3940
+ "(see mlrun.mlconf.httpdb.builder.docker_registry_secret). "
3919
3941
  "In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
3920
3942
  source=func.spec.build.source,
3921
3943
  load_source_on_run=func.spec.build.load_source_on_run,
@@ -3943,9 +3965,9 @@ class HTTPRunDB(RunDBInterface):
3943
3965
  def store_alert_config(
3944
3966
  self,
3945
3967
  alert_name: str,
3946
- alert_data: Union[dict, mlrun.common.schemas.AlertConfig],
3968
+ alert_data: Union[dict, AlertConfig],
3947
3969
  project="",
3948
- ):
3970
+ ) -> AlertConfig:
3949
3971
  """
3950
3972
  Create/modify an alert.
3951
3973
  :param alert_name: The name of the alert.
@@ -3956,13 +3978,19 @@ class HTTPRunDB(RunDBInterface):
3956
3978
  project = project or config.default_project
3957
3979
  endpoint_path = f"projects/{project}/alerts/{alert_name}"
3958
3980
  error_message = f"put alert {project}/alerts/{alert_name}"
3959
- if isinstance(alert_data, mlrun.common.schemas.AlertConfig):
3960
- alert_data = alert_data.dict()
3981
+ alert_instance = (
3982
+ alert_data
3983
+ if isinstance(alert_data, AlertConfig)
3984
+ else AlertConfig.from_dict(alert_data)
3985
+ )
3986
+ alert_instance.validate_required_fields()
3987
+
3988
+ alert_data = alert_instance.to_dict()
3961
3989
  body = _as_json(alert_data)
3962
3990
  response = self.api_call("PUT", endpoint_path, error_message, body=body)
3963
- return mlrun.common.schemas.AlertConfig(**response.json())
3991
+ return AlertConfig.from_dict(response.json())
3964
3992
 
3965
- def get_alert_config(self, alert_name: str, project=""):
3993
+ def get_alert_config(self, alert_name: str, project="") -> AlertConfig:
3966
3994
  """
3967
3995
  Retrieve an alert.
3968
3996
  :param alert_name: The name of the alert to retrieve.
@@ -3973,9 +4001,9 @@ class HTTPRunDB(RunDBInterface):
3973
4001
  endpoint_path = f"projects/{project}/alerts/{alert_name}"
3974
4002
  error_message = f"get alert {project}/alerts/{alert_name}"
3975
4003
  response = self.api_call("GET", endpoint_path, error_message)
3976
- return mlrun.common.schemas.AlertConfig(**response.json())
4004
+ return AlertConfig.from_dict(response.json())
3977
4005
 
3978
- def list_alerts_configs(self, project=""):
4006
+ def list_alerts_configs(self, project="") -> list[AlertConfig]:
3979
4007
  """
3980
4008
  Retrieve list of alerts of a project.
3981
4009
  :param project: The project name.
@@ -3987,7 +4015,7 @@ class HTTPRunDB(RunDBInterface):
3987
4015
  response = self.api_call("GET", endpoint_path, error_message).json()
3988
4016
  results = []
3989
4017
  for item in response:
3990
- results.append(mlrun.common.schemas.AlertConfig(**item))
4018
+ results.append(AlertConfig(**item))
3991
4019
  return results
3992
4020
 
3993
4021
  def delete_alert_config(self, alert_name: str, project=""):
@@ -4012,6 +4040,32 @@ class HTTPRunDB(RunDBInterface):
4012
4040
  error_message = f"post alert {project}/alerts/{alert_name}/reset"
4013
4041
  self.api_call("POST", endpoint_path, error_message)
4014
4042
 
4043
+ def get_alert_template(
4044
+ self, template_name: str
4045
+ ) -> mlrun.common.schemas.AlertTemplate:
4046
+ """
4047
+ Retrieve a specific alert template.
4048
+ :param template_name: The name of the template to retrieve.
4049
+ :return: The template object.
4050
+ """
4051
+ endpoint_path = f"alert-templates/{template_name}"
4052
+ error_message = f"get template alert-templates/{template_name}"
4053
+ response = self.api_call("GET", endpoint_path, error_message)
4054
+ return mlrun.common.schemas.AlertTemplate(**response.json())
4055
+
4056
+ def list_alert_templates(self) -> list[mlrun.common.schemas.AlertTemplate]:
4057
+ """
4058
+ Retrieve list of all alert templates.
4059
+ :return: All the alert template objects in the database.
4060
+ """
4061
+ endpoint_path = "alert-templates"
4062
+ error_message = "get templates /alert-templates"
4063
+ response = self.api_call("GET", endpoint_path, error_message).json()
4064
+ results = []
4065
+ for item in response:
4066
+ results.append(mlrun.common.schemas.AlertTemplate(**item))
4067
+ return results
4068
+
4015
4069
 
4016
4070
  def _as_json(obj):
4017
4071
  fn = getattr(obj, "to_json", None)
mlrun/db/nopdb.py CHANGED
@@ -16,6 +16,7 @@
16
16
  import datetime
17
17
  from typing import Optional, Union
18
18
 
19
+ import mlrun.alerts
19
20
  import mlrun.common.schemas
20
21
  import mlrun.errors
21
22
 
@@ -128,7 +129,18 @@ class NopDB(RunDBInterface):
128
129
  ):
129
130
  pass
130
131
 
131
- def del_artifact(self, key, tag="", project="", tree=None, uid=None):
132
+ def del_artifact(
133
+ self,
134
+ key,
135
+ tag="",
136
+ project="",
137
+ tree=None,
138
+ uid=None,
139
+ deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
140
+ mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
141
+ ),
142
+ secrets: dict = None,
143
+ ):
132
144
  pass
133
145
 
134
146
  def del_artifacts(self, name="", project="", tag="", labels=None):
@@ -671,7 +683,7 @@ class NopDB(RunDBInterface):
671
683
  def store_alert_config(
672
684
  self,
673
685
  alert_name: str,
674
- alert_data: Union[dict, mlrun.common.schemas.AlertConfig],
686
+ alert_data: Union[dict, mlrun.alerts.alert.AlertConfig],
675
687
  project="",
676
688
  ):
677
689
  pass
@@ -687,3 +699,9 @@ class NopDB(RunDBInterface):
687
699
 
688
700
  def reset_alert_config(self, alert_name: str, project=""):
689
701
  pass
702
+
703
+ def get_alert_template(self, template_name: str):
704
+ pass
705
+
706
+ def list_alert_templates(self):
707
+ pass
mlrun/errors.py CHANGED
@@ -183,6 +183,10 @@ class MLRunInternalServerError(MLRunHTTPStatusError):
183
183
  error_status_code = HTTPStatus.INTERNAL_SERVER_ERROR.value
184
184
 
185
185
 
186
+ class MLRunNotImplementedServerError(MLRunHTTPStatusError):
187
+ error_status_code = HTTPStatus.NOT_IMPLEMENTED.value
188
+
189
+
186
190
  class MLRunServiceUnavailableError(MLRunHTTPStatusError):
187
191
  error_status_code = HTTPStatus.SERVICE_UNAVAILABLE.value
188
192
 
@@ -234,4 +238,5 @@ STATUS_ERRORS = {
234
238
  HTTPStatus.PRECONDITION_FAILED.value: MLRunPreconditionFailedError,
235
239
  HTTPStatus.INTERNAL_SERVER_ERROR.value: MLRunInternalServerError,
236
240
  HTTPStatus.SERVICE_UNAVAILABLE.value: MLRunServiceUnavailableError,
241
+ HTTPStatus.NOT_IMPLEMENTED.value: MLRunNotImplementedServerError,
237
242
  }
@@ -19,7 +19,6 @@ __all__ = [
19
19
  "get_online_feature_service",
20
20
  "ingest",
21
21
  "preview",
22
- "deploy_ingestion_service",
23
22
  "deploy_ingestion_service_v2",
24
23
  "delete_feature_set",
25
24
  "delete_feature_vector",
@@ -41,7 +40,6 @@ from ..features import Entity, Feature
41
40
  from .api import (
42
41
  delete_feature_set,
43
42
  delete_feature_vector,
44
- deploy_ingestion_service,
45
43
  deploy_ingestion_service_v2,
46
44
  get_feature_set,
47
45
  get_feature_vector,
@@ -113,6 +113,7 @@ def get_offline_features(
113
113
  order_by: Union[str, list[str]] = None,
114
114
  spark_service: str = None,
115
115
  timestamp_for_filtering: Union[str, dict[str, str]] = None,
116
+ additional_filters: list = None,
116
117
  ):
117
118
  """retrieve offline feature vector results
118
119
 
@@ -175,6 +176,13 @@ def get_offline_features(
175
176
  By default, the filter executes on the timestamp_key of each feature set.
176
177
  Note: the time filtering is performed on each feature set before the
177
178
  merge process using start_time and end_time params.
179
+ :param additional_filters: List of additional_filter conditions as tuples.
180
+ Each tuple should be in the format (column_name, operator, value).
181
+ Supported operators: "=", ">=", "<=", ">", "<".
182
+ Example: [("Product", "=", "Computer")]
183
+ For all supported filters, please see:
184
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
185
+
178
186
 
179
187
  """
180
188
  return _get_offline_features(
@@ -194,6 +202,7 @@ def get_offline_features(
194
202
  order_by,
195
203
  spark_service,
196
204
  timestamp_for_filtering,
205
+ additional_filters,
197
206
  )
198
207
 
199
208
 
@@ -214,6 +223,7 @@ def _get_offline_features(
214
223
  order_by: Union[str, list[str]] = None,
215
224
  spark_service: str = None,
216
225
  timestamp_for_filtering: Union[str, dict[str, str]] = None,
226
+ additional_filters=None,
217
227
  ) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
218
228
  if entity_rows is None and entity_timestamp_column is not None:
219
229
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -252,6 +262,7 @@ def _get_offline_features(
252
262
  start_time=start_time,
253
263
  end_time=end_time,
254
264
  timestamp_for_filtering=timestamp_for_filtering,
265
+ additional_filters=additional_filters,
255
266
  )
256
267
 
257
268
  merger = merger_engine(feature_vector, **(engine_args or {}))
@@ -267,6 +278,7 @@ def _get_offline_features(
267
278
  update_stats=update_stats,
268
279
  query=query,
269
280
  order_by=order_by,
281
+ additional_filters=additional_filters,
270
282
  )
271
283
 
272
284
 
@@ -1005,53 +1017,6 @@ def _deploy_ingestion_service_v2(
1005
1017
  return function.deploy(), function
1006
1018
 
1007
1019
 
1008
- @deprecated(
1009
- version="1.5.0",
1010
- reason="'deploy_ingestion_service' will be removed in 1.7.0, use 'deploy_ingestion_service_v2' instead",
1011
- category=FutureWarning,
1012
- )
1013
- def deploy_ingestion_service(
1014
- featureset: Union[FeatureSet, str],
1015
- source: DataSource = None,
1016
- targets: list[DataTargetBase] = None,
1017
- name: str = None,
1018
- run_config: RunConfig = None,
1019
- verbose=False,
1020
- ) -> str:
1021
- """Start real-time ingestion service using nuclio function
1022
-
1023
- Deploy a real-time function implementing feature ingestion pipeline
1024
- the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
1025
-
1026
- the `run_config` parameter allow specifying the function and job configuration,
1027
- see: :py:class:`~mlrun.feature_store.RunConfig`
1028
-
1029
- example::
1030
-
1031
- source = HTTPSource()
1032
- func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
1033
- config = RunConfig(function=func)
1034
- my_set.deploy_ingestion_service(source, run_config=config)
1035
-
1036
- :param featureset: feature set object or uri
1037
- :param source: data source object describing the online or offline source
1038
- :param targets: list of data target objects
1039
- :param name: name for the job/function
1040
- :param run_config: service runtime configuration (function object/uri, resources, etc..)
1041
- :param verbose: verbose log
1042
-
1043
- :return: URL to access the deployed ingestion service
1044
- """
1045
- endpoint, _ = featureset.deploy_ingestion_service(
1046
- source=source,
1047
- targets=targets,
1048
- name=name,
1049
- run_config=run_config,
1050
- verbose=verbose,
1051
- )
1052
- return endpoint
1053
-
1054
-
1055
1020
  def _ingest_with_spark(
1056
1021
  spark=None,
1057
1022
  featureset: Union[FeatureSet, str] = None,
@@ -917,6 +917,7 @@ class FeatureSet(ModelObj):
917
917
  start_time=None,
918
918
  end_time=None,
919
919
  time_column=None,
920
+ additional_filters=None,
920
921
  **kwargs,
921
922
  ):
922
923
  """return featureset (offline) data as dataframe
@@ -928,6 +929,12 @@ class FeatureSet(ModelObj):
928
929
  :param end_time: filter by end time
929
930
  :param time_column: specify the time column name in the file
930
931
  :param kwargs: additional reader (csv, parquet, ..) args
932
+ :param additional_filters: List of additional_filter conditions as tuples.
933
+ Each tuple should be in the format (column_name, operator, value).
934
+ Supported operators: "=", ">=", "<=", ">", "<".
935
+ Example: [("Product", "=", "Computer")]
936
+ For all supported filters, please see:
937
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
931
938
  :return: DataFrame
932
939
  """
933
940
  entities = list(self.spec.entities.keys())
@@ -946,6 +953,7 @@ class FeatureSet(ModelObj):
946
953
  start_time=start_time,
947
954
  end_time=end_time,
948
955
  time_field=time_column,
956
+ additional_filters=additional_filters,
949
957
  **kwargs,
950
958
  )
951
959
  # to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
@@ -965,6 +973,7 @@ class FeatureSet(ModelObj):
965
973
  start_time=start_time,
966
974
  end_time=end_time,
967
975
  time_column=time_column,
976
+ additional_filters=additional_filters,
968
977
  **kwargs,
969
978
  )
970
979
  return result
@@ -88,6 +88,7 @@ class BaseMerger(abc.ABC):
88
88
  update_stats=None,
89
89
  query=None,
90
90
  order_by=None,
91
+ additional_filters=None,
91
92
  ):
92
93
  self._target = target
93
94
 
@@ -134,6 +135,7 @@ class BaseMerger(abc.ABC):
134
135
  timestamp_for_filtering=timestamp_for_filtering,
135
136
  query=query,
136
137
  order_by=order_by,
138
+ additional_filters=additional_filters,
137
139
  )
138
140
 
139
141
  def _write_to_offline_target(self, timestamp_key=None):
@@ -186,6 +188,7 @@ class BaseMerger(abc.ABC):
186
188
  timestamp_for_filtering=None,
187
189
  query=None,
188
190
  order_by=None,
191
+ additional_filters=None,
189
192
  ):
190
193
  self._create_engine_env()
191
194
 
@@ -212,7 +215,7 @@ class BaseMerger(abc.ABC):
212
215
  feature_sets.append(None)
213
216
  join_types.append(None)
214
217
 
215
- filtered = False
218
+ timestamp_filtered = False
216
219
  for step in join_graph.steps:
217
220
  name = step.right_feature_set_name
218
221
  feature_set = feature_set_objects[name]
@@ -250,7 +253,7 @@ class BaseMerger(abc.ABC):
250
253
  if self._drop_indexes:
251
254
  self._append_drop_column(time_column)
252
255
  if (start_time or end_time) and time_column:
253
- filtered = True
256
+ timestamp_filtered = True
254
257
 
255
258
  df = self._get_engine_df(
256
259
  feature_set,
@@ -259,6 +262,7 @@ class BaseMerger(abc.ABC):
259
262
  start_time if time_column else None,
260
263
  end_time if time_column else None,
261
264
  time_column,
265
+ additional_filters,
262
266
  )
263
267
 
264
268
  fs_entities_and_timestamp = list(feature_set.spec.entities.keys())
@@ -302,8 +306,8 @@ class BaseMerger(abc.ABC):
302
306
  new_columns.append((column, alias))
303
307
  self._update_alias(dictionary={name: alias for name, alias in new_columns})
304
308
 
305
- # None of the feature sets was filtered as required
306
- if not filtered and (start_time or end_time):
309
+ # None of the feature sets was timestamp filtered as required
310
+ if not timestamp_filtered and (start_time or end_time):
307
311
  raise mlrun.errors.MLRunRuntimeError(
308
312
  "start_time and end_time can only be provided in conjunction with "
309
313
  "a timestamp column, or when the at least one feature_set has a timestamp key"
@@ -755,6 +759,7 @@ class BaseMerger(abc.ABC):
755
759
  start_time: typing.Union[str, datetime] = None,
756
760
  end_time: typing.Union[str, datetime] = None,
757
761
  time_column: typing.Optional[str] = None,
762
+ additional_filters=None,
758
763
  ):
759
764
  """
760
765
  Return the feature_set data frame according to the args
@@ -79,10 +79,10 @@ class PandasConversionMixin:
79
79
  msg = (
80
80
  "toPandas attempted Arrow optimization because "
81
81
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
82
- "failed by the reason below:\n %s\n"
82
+ f"failed by the reason below:\n {e}\n"
83
83
  "Attempting non-optimization as "
84
84
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
85
- "true." % str(e)
85
+ "true."
86
86
  )
87
87
  warnings.warn(msg)
88
88
  use_arrow = False
@@ -92,7 +92,7 @@ class PandasConversionMixin:
92
92
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
93
93
  "reached the error below and will not continue because automatic fallback "
94
94
  "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
95
- "false.\n %s" % str(e)
95
+ f"false.\n {e}"
96
96
  )
97
97
  warnings.warn(msg)
98
98
  raise
@@ -158,7 +158,7 @@ class PandasConversionMixin:
158
158
  "reached the error below and can not continue. Note that "
159
159
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
160
160
  "effect on failures in the middle of "
161
- "computation.\n %s" % str(e)
161
+ f"computation.\n {e}"
162
162
  )
163
163
  warnings.warn(msg)
164
164
  raise
@@ -145,6 +145,7 @@ class DaskFeatureMerger(BaseMerger):
145
145
  start_time=None,
146
146
  end_time=None,
147
147
  time_column=None,
148
+ additional_filters=None,
148
149
  ):
149
150
  import dask.dataframe as dd
150
151
 
@@ -155,6 +156,7 @@ class DaskFeatureMerger(BaseMerger):
155
156
  end_time=end_time,
156
157
  time_column=time_column,
157
158
  index=False,
159
+ additional_filters=additional_filters,
158
160
  )
159
161
 
160
162
  return self._reset_index(df).persist()