mlrun 1.7.0rc20__py3-none-any.whl → 1.7.0rc28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (92) hide show
  1. mlrun/__main__.py +10 -8
  2. mlrun/alerts/alert.py +55 -18
  3. mlrun/api/schemas/__init__.py +3 -3
  4. mlrun/artifacts/manager.py +26 -0
  5. mlrun/common/constants.py +3 -2
  6. mlrun/common/formatters/__init__.py +1 -0
  7. mlrun/common/formatters/artifact.py +26 -3
  8. mlrun/common/formatters/base.py +44 -9
  9. mlrun/common/formatters/function.py +12 -7
  10. mlrun/common/formatters/run.py +26 -0
  11. mlrun/common/helpers.py +11 -0
  12. mlrun/common/schemas/__init__.py +4 -0
  13. mlrun/common/schemas/alert.py +5 -9
  14. mlrun/common/schemas/api_gateway.py +64 -16
  15. mlrun/common/schemas/artifact.py +11 -0
  16. mlrun/common/schemas/constants.py +3 -0
  17. mlrun/common/schemas/feature_store.py +58 -28
  18. mlrun/common/schemas/model_monitoring/constants.py +21 -12
  19. mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
  20. mlrun/common/schemas/pipeline.py +16 -0
  21. mlrun/common/schemas/project.py +17 -0
  22. mlrun/common/schemas/runs.py +17 -0
  23. mlrun/common/schemas/schedule.py +1 -1
  24. mlrun/common/types.py +6 -0
  25. mlrun/config.py +17 -25
  26. mlrun/datastore/azure_blob.py +2 -1
  27. mlrun/datastore/datastore.py +3 -3
  28. mlrun/datastore/google_cloud_storage.py +6 -2
  29. mlrun/datastore/snowflake_utils.py +3 -1
  30. mlrun/datastore/sources.py +26 -11
  31. mlrun/datastore/store_resources.py +2 -0
  32. mlrun/datastore/targets.py +68 -16
  33. mlrun/db/base.py +83 -2
  34. mlrun/db/httpdb.py +280 -63
  35. mlrun/db/nopdb.py +60 -3
  36. mlrun/errors.py +5 -3
  37. mlrun/execution.py +28 -13
  38. mlrun/feature_store/feature_vector.py +8 -0
  39. mlrun/feature_store/retrieval/spark_merger.py +13 -2
  40. mlrun/launcher/local.py +4 -0
  41. mlrun/launcher/remote.py +1 -0
  42. mlrun/model.py +32 -3
  43. mlrun/model_monitoring/api.py +7 -52
  44. mlrun/model_monitoring/applications/base.py +5 -7
  45. mlrun/model_monitoring/applications/histogram_data_drift.py +1 -1
  46. mlrun/model_monitoring/db/stores/__init__.py +37 -24
  47. mlrun/model_monitoring/db/stores/base/store.py +40 -1
  48. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +42 -87
  49. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +27 -35
  50. mlrun/model_monitoring/db/tsdb/__init__.py +15 -15
  51. mlrun/model_monitoring/db/tsdb/base.py +1 -14
  52. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +22 -18
  53. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +86 -56
  54. mlrun/model_monitoring/helpers.py +34 -9
  55. mlrun/model_monitoring/stream_processing.py +12 -11
  56. mlrun/model_monitoring/writer.py +11 -11
  57. mlrun/projects/operations.py +5 -0
  58. mlrun/projects/pipelines.py +35 -21
  59. mlrun/projects/project.py +216 -107
  60. mlrun/render.py +10 -5
  61. mlrun/run.py +15 -5
  62. mlrun/runtimes/__init__.py +2 -0
  63. mlrun/runtimes/base.py +17 -4
  64. mlrun/runtimes/daskjob.py +8 -1
  65. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  66. mlrun/runtimes/local.py +23 -4
  67. mlrun/runtimes/nuclio/application/application.py +0 -2
  68. mlrun/runtimes/nuclio/function.py +31 -2
  69. mlrun/runtimes/nuclio/serving.py +9 -6
  70. mlrun/runtimes/pod.py +5 -29
  71. mlrun/runtimes/remotesparkjob.py +8 -2
  72. mlrun/serving/__init__.py +8 -1
  73. mlrun/serving/routers.py +75 -59
  74. mlrun/serving/server.py +11 -0
  75. mlrun/serving/states.py +80 -8
  76. mlrun/serving/utils.py +19 -11
  77. mlrun/serving/v2_serving.py +66 -39
  78. mlrun/utils/helpers.py +91 -11
  79. mlrun/utils/logger.py +36 -2
  80. mlrun/utils/notifications/notification/base.py +43 -7
  81. mlrun/utils/notifications/notification/git.py +21 -0
  82. mlrun/utils/notifications/notification/slack.py +9 -14
  83. mlrun/utils/notifications/notification/webhook.py +41 -1
  84. mlrun/utils/notifications/notification_pusher.py +3 -9
  85. mlrun/utils/regex.py +9 -0
  86. mlrun/utils/version/version.json +2 -2
  87. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/METADATA +16 -9
  88. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/RECORD +92 -91
  89. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/WHEEL +1 -1
  90. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/LICENSE +0 -0
  91. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/entry_points.txt +0 -0
  92. {mlrun-1.7.0rc20.dist-info → mlrun-1.7.0rc28.dist-info}/top_level.txt +0 -0
mlrun/run.py CHANGED
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import importlib.util as imputil
15
16
  import json
16
17
  import os
@@ -28,10 +29,11 @@ from typing import Optional, Union
28
29
 
29
30
  import nuclio
30
31
  import yaml
31
- from kfp import Client
32
32
  from mlrun_pipelines.common.models import RunStatuses
33
33
  from mlrun_pipelines.common.ops import format_summary_from_kfp_run, show_kfp_run
34
+ from mlrun_pipelines.utils import get_client
34
35
 
36
+ import mlrun.common.constants as mlrun_constants
35
37
  import mlrun.common.formatters
36
38
  import mlrun.common.schemas
37
39
  import mlrun.errors
@@ -61,11 +63,11 @@ from .runtimes.funcdoc import update_function_entry_points
61
63
  from .runtimes.nuclio.application import ApplicationRuntime
62
64
  from .runtimes.utils import add_code_metadata, global_context
63
65
  from .utils import (
66
+ RunKeys,
64
67
  extend_hub_uri_if_needed,
65
68
  get_in,
66
69
  logger,
67
70
  retry_until_successful,
68
- run_keys,
69
71
  update_in,
70
72
  )
71
73
 
@@ -278,7 +280,7 @@ def get_or_create_ctx(
278
280
  artifact_path = mlrun.utils.helpers.template_artifact_path(
279
281
  mlconf.artifact_path, project or mlconf.default_project
280
282
  )
281
- update_in(newspec, ["spec", run_keys.output_path], artifact_path)
283
+ update_in(newspec, ["spec", RunKeys.output_path], artifact_path)
282
284
 
283
285
  newspec.setdefault("metadata", {})
284
286
  update_in(newspec, "metadata.name", name, replace=False)
@@ -293,6 +295,14 @@ def get_or_create_ctx(
293
295
  newspec["metadata"].get("project") or project or mlconf.default_project
294
296
  )
295
297
 
298
+ newspec["metadata"].setdefault("labels", {})
299
+
300
+ # This function can also be called as a local run if it is not called within a function.
301
+ # It will create a local run, and the run kind must be local by default.
302
+ newspec["metadata"]["labels"].setdefault(
303
+ mlrun_constants.MLRunInternalLabels.kind, RuntimeKinds.local
304
+ )
305
+
296
306
  ctx = MLClientCtx.from_dict(
297
307
  newspec, rundb=out, autocommit=autocommit, tmp=tmp, host=socket.gethostname()
298
308
  )
@@ -943,7 +953,7 @@ def wait_for_pipeline_completion(
943
953
  _wait_for_pipeline_completion,
944
954
  )
945
955
  else:
946
- client = Client(namespace=namespace)
956
+ client = get_client(namespace=namespace)
947
957
  resp = client.wait_for_run_completion(run_id, timeout)
948
958
  if resp:
949
959
  resp = resp.to_dict()
@@ -1004,7 +1014,7 @@ def get_pipeline(
1004
1014
  )
1005
1015
 
1006
1016
  else:
1007
- client = Client(namespace=namespace)
1017
+ client = get_client(namespace=namespace)
1008
1018
  resp = client.get_run(run_id)
1009
1019
  if resp:
1010
1020
  resp = resp.to_dict()
@@ -26,6 +26,8 @@ __all__ = [
26
26
  "Spark3Runtime",
27
27
  "DatabricksRuntime",
28
28
  "KubeResource",
29
+ "ApplicationRuntime",
30
+ "MpiRuntimeV1",
29
31
  ]
30
32
 
31
33
  from mlrun.runtimes.utils import resolve_spark_operator_version
mlrun/runtimes/base.py CHANGED
@@ -68,6 +68,7 @@ spec_fields = [
68
68
  "disable_auto_mount",
69
69
  "allow_empty_resources",
70
70
  "clone_target_dir",
71
+ "reset_on_run",
71
72
  ]
72
73
 
73
74
 
@@ -336,6 +337,7 @@ class BaseRuntime(ModelObj):
336
337
  notifications: Optional[list[mlrun.model.Notification]] = None,
337
338
  returns: Optional[list[Union[str, dict[str, str]]]] = None,
338
339
  state_thresholds: Optional[dict[str, int]] = None,
340
+ reset_on_run: Optional[bool] = None,
339
341
  **launcher_kwargs,
340
342
  ) -> RunObject:
341
343
  """
@@ -390,6 +392,9 @@ class BaseRuntime(ModelObj):
390
392
  standards and is at least 1 minute (-1 for infinite).
391
393
  If the phase is active for longer than the threshold, the run will be aborted.
392
394
  See mlconf.function.spec.state_thresholds for the state options and default values.
395
+ :param reset_on_run: When True, function python modules would reload prior to code execution.
396
+ This ensures latest code changes are executed. This argument must be used in
397
+ conjunction with the local=True argument.
393
398
  :return: Run context object (RunObject) with run metadata, results and status
394
399
  """
395
400
  launcher = mlrun.launcher.factory.LauncherFactory().create_launcher(
@@ -418,15 +423,22 @@ class BaseRuntime(ModelObj):
418
423
  notifications=notifications,
419
424
  returns=returns,
420
425
  state_thresholds=state_thresholds,
426
+ reset_on_run=reset_on_run,
421
427
  )
422
428
 
423
- def _get_db_run(self, task: RunObject = None):
429
+ def _get_db_run(
430
+ self,
431
+ task: RunObject = None,
432
+ run_format: mlrun.common.formatters.RunFormat = mlrun.common.formatters.RunFormat.full,
433
+ ):
424
434
  if self._get_db() and task:
425
435
  project = task.metadata.project
426
436
  uid = task.metadata.uid
427
437
  iter = task.metadata.iteration
428
438
  try:
429
- return self._get_db().read_run(uid, project, iter=iter)
439
+ return self._get_db().read_run(
440
+ uid, project, iter=iter, format_=run_format
441
+ )
430
442
  except mlrun.db.RunDBError:
431
443
  return None
432
444
  if task:
@@ -543,13 +555,14 @@ class BaseRuntime(ModelObj):
543
555
  self,
544
556
  resp: dict = None,
545
557
  task: RunObject = None,
546
- err=None,
558
+ err: Union[Exception, str] = None,
559
+ run_format: mlrun.common.formatters.RunFormat = mlrun.common.formatters.RunFormat.full,
547
560
  ) -> typing.Optional[dict]:
548
561
  """update the task state in the DB"""
549
562
  was_none = False
550
563
  if resp is None and task:
551
564
  was_none = True
552
- resp = self._get_db_run(task)
565
+ resp = self._get_db_run(task, run_format)
553
566
 
554
567
  if not resp:
555
568
  self.store_run(task)
mlrun/runtimes/daskjob.py CHANGED
@@ -494,6 +494,7 @@ class DaskCluster(KubejobRuntime):
494
494
  notifications: Optional[list[mlrun.model.Notification]] = None,
495
495
  returns: Optional[list[Union[str, dict[str, str]]]] = None,
496
496
  state_thresholds: Optional[dict[str, int]] = None,
497
+ reset_on_run: Optional[bool] = None,
497
498
  **launcher_kwargs,
498
499
  ) -> RunObject:
499
500
  if state_thresholds:
@@ -547,7 +548,13 @@ class DaskCluster(KubejobRuntime):
547
548
  "specified handler (string) without command "
548
549
  "(py file path), specify command or use handler pointer"
549
550
  )
550
- handler = load_module(self.spec.command, handler, context=context)
551
+ # Do not embed the module in system as it is not persistent with the dask cluster
552
+ handler = load_module(
553
+ self.spec.command,
554
+ handler,
555
+ context=context,
556
+ embed_in_sys=False,
557
+ )
551
558
  client = self.client
552
559
  setattr(context, "dask_client", client)
553
560
  sout, serr = exec_from_params(handler, runobj, context)
@@ -232,6 +232,7 @@ def run_mlrun_databricks_job(context,task_parameters: dict, **kwargs):
232
232
  notifications: Optional[list[mlrun.model.Notification]] = None,
233
233
  returns: Optional[list[Union[str, dict[str, str]]]] = None,
234
234
  state_thresholds: Optional[dict[str, int]] = None,
235
+ reset_on_run: Optional[bool] = None,
235
236
  **launcher_kwargs,
236
237
  ) -> RunObject:
237
238
  if local:
mlrun/runtimes/local.py CHANGED
@@ -372,8 +372,20 @@ class LocalRuntime(BaseRuntime, ParallelRunner):
372
372
  return run_obj_dict
373
373
 
374
374
 
375
- def load_module(file_name, handler, context):
376
- """Load module from file name"""
375
+ def load_module(
376
+ file_name: str,
377
+ handler: str,
378
+ context: MLClientCtx,
379
+ embed_in_sys: bool = True,
380
+ ):
381
+ """
382
+ Load module from filename
383
+ :param file_name: The module path to load
384
+ :param handler: The callable to load
385
+ :param context: Execution context
386
+ :param embed_in_sys: Embed the file-named module in sys.modules. This is not persistent with remote
387
+ environments and therefore can effect pickling.
388
+ """
377
389
  module = None
378
390
  if file_name:
379
391
  path = Path(file_name)
@@ -384,14 +396,21 @@ def load_module(file_name, handler, context):
384
396
  if spec is None:
385
397
  raise RunError(f"Cannot import from {file_name!r}")
386
398
  module = imputil.module_from_spec(spec)
387
- sys.modules[mod_name] = module
399
+ if embed_in_sys:
400
+ sys.modules[mod_name] = module
388
401
  spec.loader.exec_module(module)
389
402
 
390
403
  class_args = {}
391
404
  if context:
392
405
  class_args = copy(context._parameters.get("_init_args", {}))
393
406
 
394
- return get_handler_extended(handler, context, class_args, namespaces=module)
407
+ return get_handler_extended(
408
+ handler,
409
+ context,
410
+ class_args,
411
+ namespaces=module,
412
+ reload_modules=context._reset_on_run,
413
+ )
395
414
 
396
415
 
397
416
  def run_exec(cmd, args, env=None, cwd=None):
@@ -263,7 +263,6 @@ class ApplicationRuntime(RemoteRuntime):
263
263
  is_kfp=False,
264
264
  mlrun_version_specifier=None,
265
265
  show_on_failure: bool = False,
266
- skip_access_key_auth: bool = False,
267
266
  direct_port_access: bool = False,
268
267
  authentication_mode: schemas.APIGatewayAuthenticationMode = None,
269
268
  authentication_creds: tuple[str] = None,
@@ -283,7 +282,6 @@ class ApplicationRuntime(RemoteRuntime):
283
282
  :param is_kfp: Deploy as part of a kfp pipeline
284
283
  :param mlrun_version_specifier: Which mlrun package version to include (if not current)
285
284
  :param show_on_failure: Show logs only in case of build failure
286
- :param skip_access_key_auth: Skip adding access key auth to the API Gateway
287
285
  :param direct_port_access: Set True to allow direct port access to the application sidecar
288
286
  :param authentication_mode: API Gateway authentication mode
289
287
  :param authentication_creds: API Gateway authentication credentials as a tuple (username, password)
@@ -19,6 +19,7 @@ import warnings
19
19
  from datetime import datetime
20
20
  from time import sleep
21
21
 
22
+ import inflection
22
23
  import nuclio
23
24
  import nuclio.utils
24
25
  import requests
@@ -65,7 +66,14 @@ def min_nuclio_versions(*versions):
65
66
  if validate_nuclio_version_compatibility(*versions):
66
67
  return function(*args, **kwargs)
67
68
 
68
- message = f"'{function.__qualname__}' function requires Nuclio v{' or v'.join(versions)} or higher"
69
+ if function.__name__ == "__init__":
70
+ name = inflection.titleize(function.__qualname__.split(".")[0])
71
+ else:
72
+ name = function.__qualname__
73
+
74
+ message = (
75
+ f"'{name}' function requires Nuclio v{' or v'.join(versions)} or higher"
76
+ )
69
77
  raise mlrun.errors.MLRunIncompatibleVersionError(message)
70
78
 
71
79
  return wrapper
@@ -263,7 +271,8 @@ class RemoteRuntime(KubeResource):
263
271
  self._status = self._verify_dict(status, "status", NuclioStatus)
264
272
 
265
273
  def pre_deploy_validation(self):
266
- pass
274
+ if self.metadata.tag:
275
+ mlrun.utils.validate_tag_name(self.metadata.tag, "function.metadata.tag")
267
276
 
268
277
  def set_config(self, key, value):
269
278
  self.spec.config[key] = value
@@ -1318,3 +1327,23 @@ def get_nuclio_deploy_status(
1318
1327
  else:
1319
1328
  text = "\n".join(outputs) if outputs else ""
1320
1329
  return state, address, name, last_log_timestamp, text, function_status
1330
+
1331
+
1332
+ def enrich_nuclio_function_from_headers(
1333
+ func: RemoteRuntime,
1334
+ headers: dict,
1335
+ ):
1336
+ func.status.state = headers.get("x-mlrun-function-status", "")
1337
+ func.status.address = headers.get("x-mlrun-address", "")
1338
+ func.status.nuclio_name = headers.get("x-mlrun-name", "")
1339
+ func.status.internal_invocation_urls = (
1340
+ headers.get("x-mlrun-internal-invocation-urls", "").split(",")
1341
+ if headers.get("x-mlrun-internal-invocation-urls")
1342
+ else []
1343
+ )
1344
+ func.status.external_invocation_urls = (
1345
+ headers.get("x-mlrun-external-invocation-urls", "").split(",")
1346
+ if headers.get("x-mlrun-external-invocation-urls")
1347
+ else []
1348
+ )
1349
+ func.status.container_image = headers.get("x-mlrun-container-image", "")
@@ -312,15 +312,18 @@ class ServingRuntime(RemoteRuntime):
312
312
  sample: Optional[int] = None,
313
313
  stream_args: Optional[dict] = None,
314
314
  tracking_policy: Optional[Union["TrackingPolicy", dict]] = None,
315
+ enable_tracking: bool = True,
315
316
  ) -> None:
316
317
  """apply on your serving function to monitor a deployed model, including real-time dashboards to detect drift
317
318
  and analyze performance.
318
319
 
319
- :param stream_path: Path/url of the tracking stream e.g. v3io:///users/mike/mystream
320
- you can use the "dummy://" path for test/simulation.
321
- :param batch: Micro batch size (send micro batches of N records at a time).
322
- :param sample: Sample size (send only one of N records).
323
- :param stream_args: Stream initialization parameters, e.g. shards, retention_in_hours, ..
320
+ :param stream_path: Path/url of the tracking stream e.g. v3io:///users/mike/mystream
321
+ you can use the "dummy://" path for test/simulation.
322
+ :param batch: Micro batch size (send micro batches of N records at a time).
323
+ :param sample: Sample size (send only one of N records).
324
+ :param stream_args: Stream initialization parameters, e.g. shards, retention_in_hours, ..
325
+ :param enable_tracking: Enabled/Disable model-monitoring tracking.
326
+ Default True (tracking enabled).
324
327
 
325
328
  example::
326
329
 
@@ -331,7 +334,7 @@ class ServingRuntime(RemoteRuntime):
331
334
 
332
335
  """
333
336
  # Applying model monitoring configurations
334
- self.spec.track_models = True
337
+ self.spec.track_models = enable_tracking
335
338
 
336
339
  if stream_path:
337
340
  self.spec.parameters["log_stream"] = stream_path
mlrun/runtimes/pod.py CHANGED
@@ -532,7 +532,9 @@ class KubeResourceSpec(FunctionSpec):
532
532
  return
533
533
 
534
534
  # merge node selectors - precedence to existing node selector
535
- self.node_selector = {**node_selector, **self.node_selector}
535
+ self.node_selector = mlrun.utils.helpers.merge_with_precedence(
536
+ node_selector, self.node_selector
537
+ )
536
538
 
537
539
  def _merge_tolerations(
538
540
  self,
@@ -1038,32 +1040,6 @@ class KubeResource(BaseRuntime, KfpAdapterMixin):
1038
1040
  return True
1039
1041
  return False
1040
1042
 
1041
- def enrich_runtime_spec(
1042
- self,
1043
- project_node_selector: dict[str, str],
1044
- ):
1045
- """
1046
- Enriches the runtime spec with the project-level node selector.
1047
-
1048
- This method merges the project-level node selector with the existing function node_selector.
1049
- The merge logic used here combines the two dictionaries, giving precedence to
1050
- the keys in the runtime node_selector. If there are conflicting keys between the
1051
- two dictionaries, the values from self.spec.node_selector will overwrite the
1052
- values from project_node_selector.
1053
-
1054
- Example:
1055
- Suppose self.spec.node_selector = {"type": "gpu", "zone": "us-east-1"}
1056
- and project_node_selector = {"type": "cpu", "environment": "production"}.
1057
- After the merge, the resulting node_selector will be:
1058
- {"type": "gpu", "zone": "us-east-1", "environment": "production"}
1059
-
1060
- Note:
1061
- - The merge uses the ** operator, also known as the "unpacking" operator in Python,
1062
- combining key-value pairs from each dictionary. Later dictionaries take precedence
1063
- when there are conflicting keys.
1064
- """
1065
- self.spec.node_selector = {**project_node_selector, **self.spec.node_selector}
1066
-
1067
1043
  def _set_env(self, name, value=None, value_from=None):
1068
1044
  new_var = k8s_client.V1EnvVar(name=name, value=value, value_from=value_from)
1069
1045
 
@@ -1542,7 +1518,7 @@ def get_sanitized_attribute(spec, attribute_name: str):
1542
1518
 
1543
1519
  # check if attribute of type dict, and then check if type is sanitized
1544
1520
  if isinstance(attribute, dict):
1545
- if attribute_config["not_sanitized_class"] != dict:
1521
+ if not isinstance(attribute_config["not_sanitized_class"], dict):
1546
1522
  raise mlrun.errors.MLRunInvalidArgumentTypeError(
1547
1523
  f"expected to be of type {attribute_config.get('not_sanitized_class')} but got dict"
1548
1524
  )
@@ -1552,7 +1528,7 @@ def get_sanitized_attribute(spec, attribute_name: str):
1552
1528
  elif isinstance(attribute, list) and not isinstance(
1553
1529
  attribute[0], attribute_config["sub_attribute_type"]
1554
1530
  ):
1555
- if attribute_config["not_sanitized_class"] != list:
1531
+ if not isinstance(attribute_config["not_sanitized_class"], list):
1556
1532
  raise mlrun.errors.MLRunInvalidArgumentTypeError(
1557
1533
  f"expected to be of type {attribute_config.get('not_sanitized_class')} but got list"
1558
1534
  )
@@ -130,14 +130,20 @@ class RemoteSparkRuntime(KubejobRuntime):
130
130
  def spec(self, spec):
131
131
  self._spec = self._verify_dict(spec, "spec", RemoteSparkSpec)
132
132
 
133
- def with_spark_service(self, spark_service, provider=RemoteSparkProviders.iguazio):
133
+ def with_spark_service(
134
+ self,
135
+ spark_service,
136
+ provider=RemoteSparkProviders.iguazio,
137
+ with_v3io_mount=True,
138
+ ):
134
139
  """Attach spark service to function"""
135
140
  self.spec.provider = provider
136
141
  if provider == RemoteSparkProviders.iguazio:
137
142
  self.spec.env.append(
138
143
  {"name": "MLRUN_SPARK_CLIENT_IGZ_SPARK", "value": "true"}
139
144
  )
140
- self.apply(mount_v3io())
145
+ if with_v3io_mount:
146
+ self.apply(mount_v3io())
141
147
  self.apply(
142
148
  mount_v3iod(
143
149
  namespace=config.namespace,
mlrun/serving/__init__.py CHANGED
@@ -22,10 +22,17 @@ __all__ = [
22
22
  "RouterStep",
23
23
  "QueueStep",
24
24
  "ErrorStep",
25
+ "MonitoringApplicationStep",
25
26
  ]
26
27
 
27
28
  from .routers import ModelRouter, VotingEnsemble # noqa
28
29
  from .server import GraphContext, GraphServer, create_graph_server # noqa
29
- from .states import ErrorStep, QueueStep, RouterStep, TaskStep # noqa
30
+ from .states import (
31
+ ErrorStep,
32
+ QueueStep,
33
+ RouterStep,
34
+ TaskStep,
35
+ MonitoringApplicationStep,
36
+ ) # noqa
30
37
  from .v1_serving import MLModelServer, new_v1_model_server # noqa
31
38
  from .v2_serving import V2ModelServer # noqa
mlrun/serving/routers.py CHANGED
@@ -1030,74 +1030,90 @@ def _init_endpoint_record(
1030
1030
  function_uri=graph_server.function_uri, versioned_model=versioned_model_name
1031
1031
  ).uid
1032
1032
 
1033
- # If model endpoint object was found in DB, skip the creation process.
1034
1033
  try:
1035
- mlrun.get_run_db().get_model_endpoint(project=project, endpoint_id=endpoint_uid)
1036
-
1034
+ model_ep = mlrun.get_run_db().get_model_endpoint(
1035
+ project=project, endpoint_id=endpoint_uid
1036
+ )
1037
1037
  except mlrun.errors.MLRunNotFoundError:
1038
- logger.info("Creating a new model endpoint record", endpoint_id=endpoint_uid)
1038
+ model_ep = None
1039
+ except mlrun.errors.MLRunBadRequestError as err:
1040
+ logger.debug(
1041
+ f"Cant reach to model endpoints store, due to : {err}",
1042
+ )
1043
+ return
1039
1044
 
1040
- try:
1041
- # Get the children model endpoints ids
1042
- children_uids = []
1043
- for _, c in voting_ensemble.routes.items():
1044
- if hasattr(c, "endpoint_uid"):
1045
- children_uids.append(c.endpoint_uid)
1046
-
1047
- model_endpoint = mlrun.common.schemas.ModelEndpoint(
1048
- metadata=mlrun.common.schemas.ModelEndpointMetadata(
1049
- project=project, uid=endpoint_uid
1050
- ),
1051
- spec=mlrun.common.schemas.ModelEndpointSpec(
1052
- function_uri=graph_server.function_uri,
1053
- model=versioned_model_name,
1054
- model_class=voting_ensemble.__class__.__name__,
1055
- stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
1056
- project=project, kind="stream"
1057
- ),
1058
- active=True,
1059
- monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
1060
- if voting_ensemble.context.server.track_models
1061
- else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
1062
- ),
1063
- status=mlrun.common.schemas.ModelEndpointStatus(
1064
- children=list(voting_ensemble.routes.keys()),
1065
- endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
1066
- children_uids=children_uids,
1045
+ if voting_ensemble.context.server.track_models and not model_ep:
1046
+ logger.info("Creating a new model endpoint record", endpoint_id=endpoint_uid)
1047
+ # Get the children model endpoints ids
1048
+ children_uids = []
1049
+ for _, c in voting_ensemble.routes.items():
1050
+ if hasattr(c, "endpoint_uid"):
1051
+ children_uids.append(c.endpoint_uid)
1052
+ model_endpoint = mlrun.common.schemas.ModelEndpoint(
1053
+ metadata=mlrun.common.schemas.ModelEndpointMetadata(
1054
+ project=project, uid=endpoint_uid
1055
+ ),
1056
+ spec=mlrun.common.schemas.ModelEndpointSpec(
1057
+ function_uri=graph_server.function_uri,
1058
+ model=versioned_model_name,
1059
+ model_class=voting_ensemble.__class__.__name__,
1060
+ stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
1061
+ project=project, kind="stream"
1067
1062
  ),
1068
- )
1063
+ active=True,
1064
+ monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
1065
+ ),
1066
+ status=mlrun.common.schemas.ModelEndpointStatus(
1067
+ children=list(voting_ensemble.routes.keys()),
1068
+ endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
1069
+ children_uids=children_uids,
1070
+ ),
1071
+ )
1069
1072
 
1070
- db = mlrun.get_run_db()
1073
+ db = mlrun.get_run_db()
1074
+
1075
+ db.create_model_endpoint(
1076
+ project=project,
1077
+ endpoint_id=model_endpoint.metadata.uid,
1078
+ model_endpoint=model_endpoint.dict(),
1079
+ )
1071
1080
 
1081
+ # Update model endpoint children type
1082
+ for model_endpoint in children_uids:
1083
+ current_endpoint = db.get_model_endpoint(
1084
+ project=project, endpoint_id=model_endpoint
1085
+ )
1086
+ current_endpoint.status.endpoint_type = (
1087
+ mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
1088
+ )
1072
1089
  db.create_model_endpoint(
1073
1090
  project=project,
1074
- endpoint_id=model_endpoint.metadata.uid,
1075
- model_endpoint=model_endpoint.dict(),
1076
- )
1077
-
1078
- # Update model endpoint children type
1079
- for model_endpoint in children_uids:
1080
- current_endpoint = db.get_model_endpoint(
1081
- project=project, endpoint_id=model_endpoint
1082
- )
1083
- current_endpoint.status.endpoint_type = (
1084
- mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
1085
- )
1086
- db.create_model_endpoint(
1087
- project=project,
1088
- endpoint_id=model_endpoint,
1089
- model_endpoint=current_endpoint,
1090
- )
1091
-
1092
- except Exception as exc:
1093
- logger.warning(
1094
- "Failed creating model endpoint record",
1095
- exc=err_to_str(exc),
1096
- traceback=traceback.format_exc(),
1091
+ endpoint_id=model_endpoint,
1092
+ model_endpoint=current_endpoint,
1097
1093
  )
1098
-
1099
- except Exception as e:
1100
- logger.error("Failed to retrieve model endpoint object", exc=err_to_str(e))
1094
+ elif (
1095
+ model_ep
1096
+ and (
1097
+ model_ep.spec.monitoring_mode
1098
+ == mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
1099
+ )
1100
+ != voting_ensemble.context.server.track_models
1101
+ ):
1102
+ monitoring_mode = (
1103
+ mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
1104
+ if voting_ensemble.context.server.track_models
1105
+ else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
1106
+ )
1107
+ db = mlrun.get_run_db()
1108
+ db.patch_model_endpoint(
1109
+ project=project,
1110
+ endpoint_id=endpoint_uid,
1111
+ attributes={"monitoring_mode": monitoring_mode},
1112
+ )
1113
+ logger.debug(
1114
+ f"Updating model endpoint monitoring_mode to {monitoring_mode}",
1115
+ endpoint_id=endpoint_uid,
1116
+ )
1101
1117
 
1102
1118
  return endpoint_uid
1103
1119
 
mlrun/serving/server.py CHANGED
@@ -383,6 +383,17 @@ def v2_serving_handler(context, event, get_body=False):
383
383
  if event.body == b"":
384
384
  event.body = None
385
385
 
386
+ # original path is saved in stream_path so it can be used by explicit ack, but path is reset to / as a
387
+ # workaround for NUC-178
388
+ event.stream_path = event.path
389
+ if hasattr(event, "trigger") and event.trigger.kind in (
390
+ "kafka",
391
+ "kafka-cluster",
392
+ "v3ioStream",
393
+ "v3io-stream",
394
+ ):
395
+ event.path = "/"
396
+
386
397
  return context._server.run(event, context, get_body)
387
398
 
388
399