mlrun 1.7.0rc16__py3-none-any.whl → 1.7.0rc18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (65) hide show
  1. mlrun/alerts/alert.py +27 -24
  2. mlrun/artifacts/manager.py +5 -1
  3. mlrun/artifacts/model.py +1 -1
  4. mlrun/common/runtimes/constants.py +3 -0
  5. mlrun/common/schemas/__init__.py +8 -2
  6. mlrun/common/schemas/alert.py +49 -10
  7. mlrun/common/schemas/client_spec.py +1 -0
  8. mlrun/common/schemas/function.py +4 -0
  9. mlrun/common/schemas/model_monitoring/__init__.py +3 -1
  10. mlrun/common/schemas/model_monitoring/constants.py +21 -1
  11. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  12. mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
  13. mlrun/common/schemas/project.py +3 -1
  14. mlrun/config.py +9 -3
  15. mlrun/data_types/to_pandas.py +5 -5
  16. mlrun/datastore/datastore.py +6 -2
  17. mlrun/datastore/redis.py +2 -2
  18. mlrun/datastore/s3.py +5 -0
  19. mlrun/datastore/sources.py +111 -6
  20. mlrun/datastore/targets.py +2 -2
  21. mlrun/db/base.py +6 -2
  22. mlrun/db/httpdb.py +22 -3
  23. mlrun/db/nopdb.py +10 -3
  24. mlrun/errors.py +6 -0
  25. mlrun/feature_store/retrieval/conversion.py +5 -5
  26. mlrun/feature_store/retrieval/job.py +3 -2
  27. mlrun/feature_store/retrieval/spark_merger.py +2 -1
  28. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
  29. mlrun/lists.py +2 -0
  30. mlrun/model.py +8 -6
  31. mlrun/model_monitoring/db/stores/base/store.py +16 -3
  32. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +44 -43
  33. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
  34. mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
  35. mlrun/model_monitoring/db/tsdb/base.py +25 -18
  36. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  37. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +207 -0
  38. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  39. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +231 -0
  40. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +103 -64
  41. mlrun/model_monitoring/db/v3io_tsdb_reader.py +217 -16
  42. mlrun/model_monitoring/helpers.py +32 -0
  43. mlrun/model_monitoring/stream_processing.py +7 -4
  44. mlrun/model_monitoring/writer.py +19 -14
  45. mlrun/package/utils/_formatter.py +2 -2
  46. mlrun/projects/project.py +40 -11
  47. mlrun/render.py +8 -5
  48. mlrun/runtimes/__init__.py +1 -0
  49. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  50. mlrun/runtimes/nuclio/api_gateway.py +97 -77
  51. mlrun/runtimes/nuclio/application/application.py +160 -7
  52. mlrun/runtimes/nuclio/function.py +18 -12
  53. mlrun/track/tracker.py +2 -1
  54. mlrun/utils/async_http.py +25 -5
  55. mlrun/utils/helpers.py +28 -3
  56. mlrun/utils/logger.py +11 -6
  57. mlrun/utils/notifications/notification/slack.py +27 -7
  58. mlrun/utils/notifications/notification_pusher.py +45 -41
  59. mlrun/utils/version/version.json +2 -2
  60. {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/METADATA +8 -3
  61. {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/RECORD +65 -61
  62. {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/LICENSE +0 -0
  63. {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/WHEEL +0 -0
  64. {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/entry_points.txt +0 -0
  65. {mlrun-1.7.0rc16.dist-info → mlrun-1.7.0rc18.dist-info}/top_level.txt +0 -0
@@ -111,6 +111,24 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
111
111
  )
112
112
 
113
113
 
114
+ def get_tsdb_connection_string(
115
+ secret_provider: typing.Optional[typing.Callable] = None,
116
+ ) -> str:
117
+ """Get TSDB connection string from the project secret. If wasn't set, take it from the system
118
+ configurations.
119
+ :param secret_provider: An optional secret provider to get the connection string secret.
120
+ :return: Valid TSDB connection string.
121
+ """
122
+
123
+ return (
124
+ mlrun.get_secret_or_env(
125
+ key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION,
126
+ secret_provider=secret_provider,
127
+ )
128
+ or mlrun.mlconf.model_endpoint_monitoring.tsdb_connection
129
+ )
130
+
131
+
114
132
  def batch_dict2timedelta(batch_dict: _BatchDict) -> datetime.timedelta:
115
133
  """
116
134
  Convert a batch dictionary to timedelta.
@@ -260,3 +278,17 @@ def get_endpoint_record(project: str, endpoint_id: str):
260
278
  project=project,
261
279
  )
262
280
  return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
281
+
282
+
283
+ def get_result_instance_fqn(
284
+ model_endpoint_id: str, app_name: str, result_name: str
285
+ ) -> str:
286
+ return f"{model_endpoint_id}.{app_name}.result.{result_name}"
287
+
288
+
289
+ def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
290
+ return get_result_instance_fqn(
291
+ model_endpoint_id,
292
+ mm_constants.HistogramDataDriftApplicationConstants.NAME,
293
+ mm_constants.HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
294
+ )
@@ -136,7 +136,11 @@ class EventStreamProcessor:
136
136
  self.tsdb_batching_max_events = tsdb_batching_max_events
137
137
  self.tsdb_batching_timeout_secs = tsdb_batching_timeout_secs
138
138
 
139
- def apply_monitoring_serving_graph(self, fn: mlrun.runtimes.ServingRuntime) -> None:
139
+ def apply_monitoring_serving_graph(
140
+ self,
141
+ fn: mlrun.runtimes.ServingRuntime,
142
+ tsdb_service_provider: typing.Optional[typing.Callable] = None,
143
+ ) -> None:
140
144
  """
141
145
  Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
142
146
  parts that each one them includes several steps of different operations that are executed on the events from
@@ -163,6 +167,7 @@ class EventStreamProcessor:
163
167
  using CE, the parquet target path is based on the defined MLRun artifact path.
164
168
 
165
169
  :param fn: A serving function.
170
+ :param tsdb_service_provider: An optional callable function that provides the TSDB connection string.
166
171
  """
167
172
 
168
173
  graph = typing.cast(
@@ -322,15 +327,13 @@ class EventStreamProcessor:
322
327
 
323
328
  # TSDB branch (skip to Prometheus if in CE env)
324
329
  if not mlrun.mlconf.is_ce_mode():
325
- # TSDB branch
326
330
  tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
327
- project=self.project,
331
+ project=self.project, secret_provider=tsdb_service_provider
328
332
  )
329
333
  tsdb_connector.apply_monitoring_stream_steps(graph=graph)
330
334
 
331
335
  else:
332
336
  # Prometheus
333
-
334
337
  # Increase the prediction counter by 1 and update the latency value
335
338
  graph.add_step(
336
339
  "IncCounter",
@@ -17,7 +17,7 @@ from typing import Any, NewType
17
17
 
18
18
  import mlrun.common.model_monitoring
19
19
  import mlrun.common.schemas
20
- import mlrun.common.schemas.alert as alert_constants
20
+ import mlrun.common.schemas.alert as alert_objects
21
21
  import mlrun.model_monitoring
22
22
  from mlrun.common.schemas.model_monitoring.constants import (
23
23
  EventFieldType,
@@ -29,7 +29,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
29
29
  WriterEventKind,
30
30
  )
31
31
  from mlrun.common.schemas.notification import NotificationKind, NotificationSeverity
32
- from mlrun.model_monitoring.helpers import get_endpoint_record
32
+ from mlrun.model_monitoring.helpers import get_endpoint_record, get_result_instance_fqn
33
33
  from mlrun.serving.utils import StepToDict
34
34
  from mlrun.utils import logger
35
35
  from mlrun.utils.notifications.notification_pusher import CustomNotificationPusher
@@ -101,7 +101,7 @@ class ModelMonitoringWriter(StepToDict):
101
101
 
102
102
  kind = "monitoring_application_stream_pusher"
103
103
 
104
- def __init__(self, project: str) -> None:
104
+ def __init__(self, project: str, tsdb_secret_provider=None) -> None:
105
105
  self.project = project
106
106
  self.name = project # required for the deployment process
107
107
 
@@ -113,24 +113,24 @@ class ModelMonitoringWriter(StepToDict):
113
113
  project=self.project
114
114
  )
115
115
  self._tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
116
- project=self.project,
116
+ project=self.project, secret_provider=tsdb_secret_provider
117
117
  )
118
118
  self._endpoints_records = {}
119
119
 
120
120
  @staticmethod
121
121
  def _generate_event_on_drift(
122
- model_endpoint: str, drift_status: str, event_value: dict, project_name: str
122
+ entity_id: str, drift_status: str, event_value: dict, project_name: str
123
123
  ) -> None:
124
- logger.info("Sending an alert")
124
+ logger.info("Sending an event")
125
125
  entity = mlrun.common.schemas.alert.EventEntities(
126
- kind=alert_constants.EventEntityKind.MODEL,
126
+ kind=alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT,
127
127
  project=project_name,
128
- ids=[model_endpoint],
128
+ ids=[entity_id],
129
129
  )
130
130
  event_kind = (
131
- alert_constants.EventKind.DRIFT_DETECTED
131
+ alert_objects.EventKind.DATA_DRIFT_DETECTED
132
132
  if drift_status == ResultStatusApp.detected.value
133
- else alert_constants.EventKind.DRIFT_SUSPECTED
133
+ else alert_objects.EventKind.DATA_DRIFT_SUSPECTED
134
134
  )
135
135
  event_data = mlrun.common.schemas.Event(
136
136
  kind=event_kind, entity=entity, value_dict=event_value
@@ -138,7 +138,7 @@ class ModelMonitoringWriter(StepToDict):
138
138
  mlrun.get_run_db().generate_event(event_kind, event_data)
139
139
 
140
140
  @staticmethod
141
- def _reconstruct_event(event: _RawEvent) -> tuple[_AppResultEvent, str]:
141
+ def _reconstruct_event(event: _RawEvent) -> tuple[_AppResultEvent, WriterEventKind]:
142
142
  """
143
143
  Modify the raw event into the expected monitoring application event
144
144
  schema as defined in `mlrun.common.schemas.model_monitoring.constants.WriterEvent`
@@ -179,12 +179,13 @@ class ModelMonitoringWriter(StepToDict):
179
179
  def do(self, event: _RawEvent) -> None:
180
180
  event, kind = self._reconstruct_event(event)
181
181
  logger.info("Starting to write event", event=event)
182
-
183
182
  self._tsdb_connector.write_application_event(event=event.copy(), kind=kind)
184
183
  self._app_result_store.write_application_event(event=event.copy(), kind=kind)
184
+
185
185
  logger.info("Completed event DB writes")
186
186
 
187
- _Notifier(event=event, notification_pusher=self._custom_notifier).notify()
187
+ if kind == WriterEventKind.RESULT:
188
+ _Notifier(event=event, notification_pusher=self._custom_notifier).notify()
188
189
 
189
190
  if (
190
191
  mlrun.mlconf.alerts.mode == mlrun.common.schemas.alert.AlertsModes.enabled
@@ -208,7 +209,11 @@ class ModelMonitoringWriter(StepToDict):
208
209
  "result_value": event[ResultData.RESULT_VALUE],
209
210
  }
210
211
  self._generate_event_on_drift(
211
- event[WriterEvent.ENDPOINT_ID],
212
+ get_result_instance_fqn(
213
+ event[WriterEvent.ENDPOINT_ID],
214
+ event[WriterEvent.APPLICATION_NAME],
215
+ event[ResultData.RESULT_NAME],
216
+ ),
212
217
  event[ResultData.RESULT_STATUS],
213
218
  event_value,
214
219
  self.project,
@@ -142,11 +142,11 @@ class _YAMLFormatter(_Formatter):
142
142
 
143
143
  :param obj: The object to write.
144
144
  :param file_path: The file path to write to.
145
- :param dump_kwargs: Additional keyword arguments to pass to the `yaml.dump` method of the formatter in use.
145
+ :param dump_kwargs: Additional keyword arguments to pass to the `yaml.safe_dump` method of the formatter in use.
146
146
  """
147
147
  dump_kwargs = dump_kwargs or cls.DEFAULT_DUMP_KWARGS
148
148
  with open(file_path, "w") as file:
149
- yaml.dump(obj, file, **dump_kwargs)
149
+ yaml.safe_dump(obj, file, **dump_kwargs)
150
150
 
151
151
  @classmethod
152
152
  def read(cls, file_path: str) -> Union[list, dict]:
mlrun/projects/project.py CHANGED
@@ -39,6 +39,7 @@ import yaml
39
39
  from mlrun_pipelines.models import PipelineNodeWrapper
40
40
 
41
41
  import mlrun.common.helpers
42
+ import mlrun.common.runtimes.constants
42
43
  import mlrun.common.schemas.artifact
43
44
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
44
45
  import mlrun.db
@@ -2962,8 +2963,12 @@ class MlrunProject(ModelObj):
2962
2963
  engine = "remote"
2963
2964
  # The default engine is kfp if not given:
2964
2965
  workflow_engine = get_workflow_engine(engine or workflow_spec.engine, local)
2965
- if not inner_engine and engine == "remote":
2966
- inner_engine = get_workflow_engine(workflow_spec.engine, local).engine
2966
+ if not inner_engine and workflow_engine.engine == "remote":
2967
+ # if inner engine is set to remote, assume kfp as the default inner engine with remote as the runner
2968
+ engine_kind = (
2969
+ workflow_spec.engine if workflow_spec.engine != "remote" else "kfp"
2970
+ )
2971
+ inner_engine = get_workflow_engine(engine_kind, local).engine
2967
2972
  workflow_spec.engine = inner_engine or workflow_engine.engine
2968
2973
 
2969
2974
  run = workflow_engine.run(
@@ -2991,7 +2996,7 @@ class MlrunProject(ModelObj):
2991
2996
  # run's engine gets replaced with inner engine if engine is remote,
2992
2997
  # so in that case we need to get the status from the remote engine manually
2993
2998
  # TODO: support watch for remote:local
2994
- if engine == "remote" and status_engine.engine != "local":
2999
+ if workflow_engine.engine == "remote" and status_engine.engine != "local":
2995
3000
  status_engine = _RemoteRunner
2996
3001
 
2997
3002
  status_engine.get_run_status(project=self, run=run, timeout=timeout)
@@ -3094,17 +3099,18 @@ class MlrunProject(ModelObj):
3094
3099
 
3095
3100
  def set_model_monitoring_credentials(
3096
3101
  self,
3097
- access_key: str = None,
3098
- endpoint_store_connection: str = None,
3099
- stream_path: str = None,
3102
+ access_key: Optional[str] = None,
3103
+ endpoint_store_connection: Optional[str] = None,
3104
+ stream_path: Optional[str] = None,
3105
+ tsdb_connection: Optional[str] = None,
3100
3106
  ):
3101
3107
  """Set the credentials that will be used by the project's model monitoring
3102
3108
  infrastructure functions.
3103
3109
 
3104
- :param access_key: Model Monitoring access key for managing user permissions
3105
3110
  :param access_key: Model Monitoring access key for managing user permissions
3106
3111
  :param endpoint_store_connection: Endpoint store connection string
3107
3112
  :param stream_path: Path to the model monitoring stream
3113
+ :param tsdb_connection: Connection string to the time series database
3108
3114
  """
3109
3115
 
3110
3116
  secrets_dict = {}
@@ -3127,6 +3133,16 @@ class MlrunProject(ModelObj):
3127
3133
  mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
3128
3134
  ] = stream_path
3129
3135
 
3136
+ if tsdb_connection:
3137
+ if not tsdb_connection.startswith("taosws://"):
3138
+ raise mlrun.errors.MLRunInvalidArgumentError(
3139
+ "Currently only TDEngine websocket connection is supported for non-v3io TSDB,"
3140
+ "please provide a full URL (e.g. taosws://user:password@host:port)"
3141
+ )
3142
+ secrets_dict[
3143
+ mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION
3144
+ ] = tsdb_connection
3145
+
3130
3146
  self.set_secrets(
3131
3147
  secrets=secrets_dict,
3132
3148
  provider=mlrun.common.schemas.SecretProviderName.kubernetes,
@@ -3685,7 +3701,10 @@ class MlrunProject(ModelObj):
3685
3701
  name: Optional[str] = None,
3686
3702
  uid: Optional[Union[str, list[str]]] = None,
3687
3703
  labels: Optional[Union[str, list[str]]] = None,
3688
- state: Optional[str] = None,
3704
+ state: Optional[
3705
+ mlrun.common.runtimes.constants.RunStates
3706
+ ] = None, # Backward compatibility
3707
+ states: typing.Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
3689
3708
  sort: bool = True,
3690
3709
  last: int = 0,
3691
3710
  iter: bool = False,
@@ -3719,10 +3738,11 @@ class MlrunProject(ModelObj):
3719
3738
  :param labels: A list of labels to filter by. Label filters work by either filtering a specific value
3720
3739
  of a label (i.e. list("key=value")) or by looking for the existence of a given
3721
3740
  key (i.e. "key").
3722
- :param state: List only runs whose state is specified.
3741
+ :param state: Deprecated - List only runs whose state is specified.
3742
+ :param states: List only runs whose state is one of the provided states.
3723
3743
  :param sort: Whether to sort the result according to their start time. Otherwise, results will be
3724
3744
  returned by their internal order in the DB (order will not be guaranteed).
3725
- :param last: Deprecated - currently not used (will be removed in 1.8.0).
3745
+ :param last: Deprecated - currently not used (will be removed in 1.9.0).
3726
3746
  :param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
3727
3747
  :param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
3728
3748
  :param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
@@ -3730,13 +3750,22 @@ class MlrunProject(ModelObj):
3730
3750
  last_update_time_to)``.
3731
3751
  :param last_update_time_to: Filter by run last update time in ``(last_update_time_from, last_update_time_to)``.
3732
3752
  """
3753
+ if state:
3754
+ # TODO: Remove this in 1.9.0
3755
+ warnings.warn(
3756
+ "'state' is deprecated and will be removed in 1.9.0. Use 'states' instead.",
3757
+ FutureWarning,
3758
+ )
3759
+
3733
3760
  db = mlrun.db.get_run_db(secrets=self._secrets)
3734
3761
  return db.list_runs(
3735
3762
  name,
3736
3763
  uid,
3737
3764
  self.metadata.name,
3738
3765
  labels=labels,
3739
- state=state,
3766
+ states=mlrun.utils.helpers.as_list(state)
3767
+ if state is not None
3768
+ else states or None,
3740
3769
  sort=sort,
3741
3770
  last=last,
3742
3771
  iter=iter,
mlrun/render.py CHANGED
@@ -126,7 +126,7 @@ def artifacts_html(
126
126
 
127
127
  if not attribute_value:
128
128
  mlrun.utils.logger.warning(
129
- "Artifact is incomplete, omitting from output (most likely due to a failed artifact logging)",
129
+ f"Artifact required attribute {attribute_name} is missing, omitting from output",
130
130
  artifact_key=key,
131
131
  )
132
132
  continue
@@ -400,14 +400,17 @@ def runs_to_html(
400
400
  else:
401
401
  df["labels"] = df["labels"].apply(dict_html)
402
402
  df["inputs"] = df["inputs"].apply(inputs_html)
403
- if df["artifact_uris"][0]:
404
- df["artifact_uris"] = df["artifact_uris"].apply(dict_html)
405
- df.drop("artifacts", axis=1, inplace=True)
406
- else:
403
+ if df["artifacts"][0]:
407
404
  df["artifacts"] = df["artifacts"].apply(
408
405
  lambda artifacts: artifacts_html(artifacts, "target_path"),
409
406
  )
410
407
  df.drop("artifact_uris", axis=1, inplace=True)
408
+ elif df["artifact_uris"][0]:
409
+ df["artifact_uris"] = df["artifact_uris"].apply(dict_html)
410
+ df.drop("artifacts", axis=1, inplace=True)
411
+ else:
412
+ df.drop("artifacts", axis=1, inplace=True)
413
+ df.drop("artifact_uris", axis=1, inplace=True)
411
414
 
412
415
  def expand_error(x):
413
416
  if x["state"] == "error":
@@ -43,6 +43,7 @@ from .nuclio import (
43
43
  new_v2_model_server,
44
44
  nuclio_init_hook,
45
45
  )
46
+ from .nuclio.api_gateway import APIGateway
46
47
  from .nuclio.application import ApplicationRuntime
47
48
  from .nuclio.serving import serving_subkind
48
49
  from .remotesparkjob import RemoteSparkRuntime
@@ -99,7 +99,7 @@ def save_credentials(
99
99
  credentials["DATABRICKS_CLUSTER_ID"] = cluster_id
100
100
 
101
101
  with open(credentials_path, "w") as yaml_file:
102
- yaml.dump(credentials, yaml_file, default_flow_style=False)
102
+ yaml.safe_dump(credentials, yaml_file, default_flow_style=False)
103
103
 
104
104
 
105
105
  def run_mlrun_databricks_job(