mlrun 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (235) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -1
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +31 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +196 -0
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +13 -2
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +233 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +387 -119
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +245 -20
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +909 -231
  77. mlrun/db/nopdb.py +279 -14
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1176 -406
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +208 -181
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +54 -24
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/__init__.py +1 -0
  178. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  179. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  180. mlrun/runtimes/nuclio/application/application.py +758 -0
  181. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  182. mlrun/runtimes/nuclio/function.py +188 -68
  183. mlrun/runtimes/nuclio/serving.py +57 -60
  184. mlrun/runtimes/pod.py +191 -58
  185. mlrun/runtimes/remotesparkjob.py +11 -8
  186. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  187. mlrun/runtimes/utils.py +40 -73
  188. mlrun/secrets.py +6 -2
  189. mlrun/serving/__init__.py +8 -1
  190. mlrun/serving/remote.py +2 -3
  191. mlrun/serving/routers.py +89 -64
  192. mlrun/serving/server.py +54 -26
  193. mlrun/serving/states.py +187 -56
  194. mlrun/serving/utils.py +19 -11
  195. mlrun/serving/v2_serving.py +136 -63
  196. mlrun/track/tracker.py +2 -1
  197. mlrun/track/trackers/mlflow_tracker.py +5 -0
  198. mlrun/utils/async_http.py +26 -6
  199. mlrun/utils/db.py +18 -0
  200. mlrun/utils/helpers.py +375 -105
  201. mlrun/utils/http.py +2 -2
  202. mlrun/utils/logger.py +75 -9
  203. mlrun/utils/notifications/notification/__init__.py +14 -10
  204. mlrun/utils/notifications/notification/base.py +48 -0
  205. mlrun/utils/notifications/notification/console.py +2 -0
  206. mlrun/utils/notifications/notification/git.py +24 -1
  207. mlrun/utils/notifications/notification/ipython.py +2 -0
  208. mlrun/utils/notifications/notification/slack.py +96 -21
  209. mlrun/utils/notifications/notification/webhook.py +63 -2
  210. mlrun/utils/notifications/notification_pusher.py +146 -16
  211. mlrun/utils/regex.py +9 -0
  212. mlrun/utils/retryer.py +3 -2
  213. mlrun/utils/v3io_clients.py +2 -3
  214. mlrun/utils/version/version.json +2 -2
  215. mlrun-1.7.2.dist-info/METADATA +390 -0
  216. mlrun-1.7.2.dist-info/RECORD +351 -0
  217. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  218. mlrun/feature_store/retrieval/conversion.py +0 -271
  219. mlrun/kfpops.py +0 -868
  220. mlrun/model_monitoring/application.py +0 -310
  221. mlrun/model_monitoring/batch.py +0 -974
  222. mlrun/model_monitoring/controller_handler.py +0 -37
  223. mlrun/model_monitoring/prometheus.py +0 -216
  224. mlrun/model_monitoring/stores/__init__.py +0 -111
  225. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  226. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  227. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  228. mlrun/model_monitoring/stores/models/base.py +0 -84
  229. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  230. mlrun/platforms/other.py +0 -305
  231. mlrun-1.7.0rc4.dist-info/METADATA +0 -269
  232. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  233. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  234. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  235. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
mlrun/serving/server.py CHANGED
@@ -22,9 +22,14 @@ import traceback
22
22
  import uuid
23
23
  from typing import Optional, Union
24
24
 
25
+ from nuclio import Context as NuclioContext
26
+ from nuclio.request import Logger as NuclioLogger
27
+
25
28
  import mlrun
29
+ import mlrun.common.constants
26
30
  import mlrun.common.helpers
27
31
  import mlrun.model_monitoring
32
+ import mlrun.utils
28
33
  from mlrun.config import config
29
34
  from mlrun.errors import err_to_str
30
35
  from mlrun.secrets import SecretsStore
@@ -37,10 +42,7 @@ from ..errors import MLRunInvalidArgumentError
37
42
  from ..model import ModelObj
38
43
  from ..utils import get_caller_globals
39
44
  from .states import RootFlowStep, RouterStep, get_function, graph_root_setter
40
- from .utils import (
41
- event_id_key,
42
- event_path_key,
43
- )
45
+ from .utils import event_id_key, event_path_key
44
46
 
45
47
 
46
48
  class _StreamContext:
@@ -52,7 +54,7 @@ class _StreamContext:
52
54
  Initialize _StreamContext object.
53
55
  :param enabled: A boolean indication for applying the stream context
54
56
  :param parameters: Dictionary of optional parameters, such as `log_stream` and `stream_args`. Note that these
55
- parameters might be relevant to the output source such as `kafka_bootstrap_servers` if
57
+ parameters might be relevant to the output source such as `kafka_brokers` if
56
58
  the output source is from type Kafka.
57
59
  :param function_uri: Full value of the function uri, usually it's <project-name>/<function-name>
58
60
  """
@@ -70,15 +72,15 @@ class _StreamContext:
70
72
  function_uri, config.default_project
71
73
  )
72
74
 
73
- stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
75
+ self.stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
74
76
 
75
77
  if log_stream:
76
78
  # Update the stream path to the log stream value
77
- stream_uri = log_stream.format(project=project)
79
+ self.stream_uri = log_stream.format(project=project)
78
80
 
79
81
  stream_args = parameters.get("stream_args", {})
80
82
 
81
- self.output_stream = get_stream_pusher(stream_uri, **stream_args)
83
+ self.output_stream = get_stream_pusher(self.stream_uri, **stream_args)
82
84
 
83
85
 
84
86
  class GraphServer(ModelObj):
@@ -152,6 +154,7 @@ class GraphServer(ModelObj):
152
154
  resource_cache: ResourceCache = None,
153
155
  logger=None,
154
156
  is_mock=False,
157
+ monitoring_mock=False,
155
158
  ):
156
159
  """for internal use, initialize all steps (recursively)"""
157
160
 
@@ -164,6 +167,7 @@ class GraphServer(ModelObj):
164
167
 
165
168
  context = GraphContext(server=self, nuclio_context=context, logger=logger)
166
169
  context.is_mock = is_mock
170
+ context.monitoring_mock = monitoring_mock
167
171
  context.root = self.graph
168
172
 
169
173
  context.stream = _StreamContext(
@@ -311,11 +315,8 @@ class GraphServer(ModelObj):
311
315
  def v2_serving_init(context, namespace=None):
312
316
  """hook for nuclio init_context()"""
313
317
 
314
- data = os.environ.get("SERVING_SPEC_ENV", "")
315
- if not data:
316
- raise MLRunInvalidArgumentError("failed to find spec env var")
317
- spec = json.loads(data)
318
318
  context.logger.info("Initializing server from spec")
319
+ spec = mlrun.utils.get_serving_spec()
319
320
  server = GraphServer.from_dict(spec)
320
321
  if config.log_level.lower() == "debug":
321
322
  server.verbose = True
@@ -323,9 +324,9 @@ def v2_serving_init(context, namespace=None):
323
324
  server.http_trigger = getattr(context.trigger, "kind", "http") == "http"
324
325
  context.logger.info_with(
325
326
  "Setting current function",
326
- current_functiton=os.environ.get("SERVING_CURRENT_FUNCTION", ""),
327
+ current_function=os.getenv("SERVING_CURRENT_FUNCTION", ""),
327
328
  )
328
- server.set_current_function(os.environ.get("SERVING_CURRENT_FUNCTION", ""))
329
+ server.set_current_function(os.getenv("SERVING_CURRENT_FUNCTION", ""))
329
330
  context.logger.info_with(
330
331
  "Initializing states", namespace=namespace or get_caller_globals()
331
332
  )
@@ -346,28 +347,33 @@ def v2_serving_init(context, namespace=None):
346
347
  if server.verbose:
347
348
  context.logger.info(server.to_yaml())
348
349
 
349
- if hasattr(context, "platform") and hasattr(
350
- context.platform, "set_termination_callback"
351
- ):
350
+ _set_callbacks(server, context)
351
+
352
+
353
+ def _set_callbacks(server, context):
354
+ if not server.graph.supports_termination() or not hasattr(context, "platform"):
355
+ return
356
+
357
+ if hasattr(context.platform, "set_termination_callback"):
352
358
  context.logger.info(
353
359
  "Setting termination callback to terminate graph on worker shutdown"
354
360
  )
355
361
 
356
362
  async def termination_callback():
357
363
  context.logger.info("Termination callback called")
358
- await server.wait_for_completion()
364
+ server.wait_for_completion()
359
365
  context.logger.info("Termination of async flow is completed")
360
366
 
361
367
  context.platform.set_termination_callback(termination_callback)
362
368
 
363
- if hasattr(context, "platform") and hasattr(context.platform, "set_drain_callback"):
369
+ if hasattr(context.platform, "set_drain_callback"):
364
370
  context.logger.info(
365
371
  "Setting drain callback to terminate and restart the graph on a drain event (such as rebalancing)"
366
372
  )
367
373
 
368
374
  async def drain_callback():
369
375
  context.logger.info("Drain callback called")
370
- await server.wait_for_completion()
376
+ server.wait_for_completion()
371
377
  context.logger.info(
372
378
  "Termination of async flow is completed. Rerunning async flow."
373
379
  )
@@ -385,11 +391,26 @@ def v2_serving_handler(context, event, get_body=False):
385
391
  if event.body == b"":
386
392
  event.body = None
387
393
 
394
+ # original path is saved in stream_path so it can be used by explicit ack, but path is reset to / as a
395
+ # workaround for NUC-178
396
+ # nuclio 1.12.12 added the topic attribute, and we must use it as part of the fix for NUC-233
397
+ # TODO: Remove fallback on event.path once support for nuclio<1.12.12 is dropped
398
+ event.stream_path = getattr(event, "topic", event.path)
399
+ if hasattr(event, "trigger") and event.trigger.kind in (
400
+ "kafka",
401
+ "kafka-cluster",
402
+ "v3ioStream",
403
+ "v3io-stream",
404
+ "rabbit-mq",
405
+ "rabbitMq",
406
+ ):
407
+ event.path = "/"
408
+
388
409
  return context._server.run(event, context, get_body)
389
410
 
390
411
 
391
412
  def create_graph_server(
392
- parameters={},
413
+ parameters=None,
393
414
  load_mode=None,
394
415
  graph=None,
395
416
  verbose=False,
@@ -405,9 +426,10 @@ def create_graph_server(
405
426
  server.graph.add_route("my", class_name=MyModelClass, model_path="{path}", z=100)
406
427
  print(server.test("/v2/models/my/infer", testdata))
407
428
  """
429
+ parameters = parameters or {}
408
430
  server = GraphServer(graph, parameters, load_mode, verbose=verbose, **kwargs)
409
431
  server.set_current_function(
410
- current_function or os.environ.get("SERVING_CURRENT_FUNCTION", "")
432
+ current_function or os.getenv("SERVING_CURRENT_FUNCTION", "")
411
433
  )
412
434
  return server
413
435
 
@@ -471,7 +493,13 @@ class Response:
471
493
  class GraphContext:
472
494
  """Graph context object"""
473
495
 
474
- def __init__(self, level="info", logger=None, server=None, nuclio_context=None):
496
+ def __init__(
497
+ self,
498
+ level="info", # Unused argument
499
+ logger=None,
500
+ server=None,
501
+ nuclio_context: Optional[NuclioContext] = None,
502
+ ) -> None:
475
503
  self.state = None
476
504
  self.logger = logger
477
505
  self.worker_id = 0
@@ -481,7 +509,7 @@ class GraphContext:
481
509
  self.root = None
482
510
 
483
511
  if nuclio_context:
484
- self.logger = nuclio_context.logger
512
+ self.logger: NuclioLogger = nuclio_context.logger
485
513
  self.Response = nuclio_context.Response
486
514
  if hasattr(nuclio_context, "trigger") and hasattr(
487
515
  nuclio_context.trigger, "kind"
@@ -491,7 +519,7 @@ class GraphContext:
491
519
  if hasattr(nuclio_context, "platform"):
492
520
  self.platform = nuclio_context.platform
493
521
  elif not logger:
494
- self.logger = mlrun.utils.helpers.logger
522
+ self.logger: mlrun.utils.Logger = mlrun.utils.logger
495
523
 
496
524
  self._server = server
497
525
  self.current_function = None
@@ -504,7 +532,7 @@ class GraphContext:
504
532
  return self._server
505
533
 
506
534
  @property
507
- def project(self):
535
+ def project(self) -> str:
508
536
  """current project name (for the current function)"""
509
537
  project, _, _, _ = mlrun.common.helpers.parse_versioned_object_uri(
510
538
  self._server.function_uri
mlrun/serving/states.py CHANGED
@@ -12,21 +12,31 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __all__ = ["TaskStep", "RouterStep", "RootFlowStep", "ErrorStep"]
15
+ __all__ = [
16
+ "TaskStep",
17
+ "RouterStep",
18
+ "RootFlowStep",
19
+ "ErrorStep",
20
+ "MonitoringApplicationStep",
21
+ ]
16
22
 
17
- import asyncio
18
23
  import os
19
24
  import pathlib
20
25
  import traceback
21
26
  from copy import copy, deepcopy
22
27
  from inspect import getfullargspec, signature
23
- from typing import Union
28
+ from typing import Any, Union
29
+
30
+ import storey.utils
24
31
 
25
32
  import mlrun
26
33
 
27
34
  from ..config import config
28
35
  from ..datastore import get_stream_pusher
29
- from ..datastore.utils import parse_kafka_url
36
+ from ..datastore.utils import (
37
+ get_kafka_brokers_from_dict,
38
+ parse_kafka_url,
39
+ )
30
40
  from ..errors import MLRunInvalidArgumentError, err_to_str
31
41
  from ..model import ModelObj, ObjectDict
32
42
  from ..platforms.iguazio import parse_path
@@ -53,6 +63,7 @@ class StepKinds:
53
63
  choice = "choice"
54
64
  root = "root"
55
65
  error_step = "error_step"
66
+ monitoring_application = "monitoring_application"
56
67
 
57
68
 
58
69
  _task_step_fields = [
@@ -73,6 +84,9 @@ _task_step_fields = [
73
84
  ]
74
85
 
75
86
 
87
+ MAX_ALLOWED_STEPS = 4500
88
+
89
+
76
90
  def new_model_endpoint(class_name, model_path, handler=None, **class_args):
77
91
  class_args = deepcopy(class_args)
78
92
  class_args["model_path"] = model_path
@@ -325,7 +339,7 @@ class BaseStep(ModelObj):
325
339
  parent = self._parent
326
340
  else:
327
341
  raise GraphError(
328
- f"step {self.name} parent is not set or its not part of a graph"
342
+ f"step {self.name} parent is not set or it's not part of a graph"
329
343
  )
330
344
 
331
345
  name, step = params_to_step(
@@ -347,6 +361,39 @@ class BaseStep(ModelObj):
347
361
  parent._last_added = step
348
362
  return step
349
363
 
364
+ def set_flow(
365
+ self,
366
+ steps: list[Union[str, StepToDict, dict[str, Any]]],
367
+ force: bool = False,
368
+ ):
369
+ """set list of steps as downstream from this step, in the order specified. This will overwrite any existing
370
+ downstream steps.
371
+
372
+ :param steps: list of steps to follow this one
373
+ :param force: whether to overwrite existing downstream steps. If False, this method will fail if any downstream
374
+ steps have already been defined. Defaults to False.
375
+ :return: the last step added to the flow
376
+
377
+ example:
378
+ The below code sets the downstream nodes of step1 by using a list of steps (provided to `set_flow()`) and a
379
+ single step (provided to `to()`), resulting in the graph (step1 -> step2 -> step3 -> step4).
380
+ Notice that using `force=True` is required in case step1 already had downstream nodes (e.g. if the existing
381
+ graph is step1 -> step2_old) and that following the execution of this code the existing downstream steps
382
+ are removed. If the intention is to split the graph (and not to overwrite), please use `to()`.
383
+
384
+ step1.set_flow(
385
+ [
386
+ dict(name="step2", handler="step2_handler"),
387
+ dict(name="step3", class_name="Step3Class"),
388
+ ],
389
+ force=True,
390
+ ).to(dict(name="step4", class_name="Step4Class"))
391
+ """
392
+ raise NotImplementedError("set_flow() can only be called on a FlowStep")
393
+
394
+ def supports_termination(self):
395
+ return False
396
+
350
397
 
351
398
  class TaskStep(BaseStep):
352
399
  """task execution step, runs a class or handler"""
@@ -453,13 +500,15 @@ class TaskStep(BaseStep):
453
500
  class_args[key] = arg
454
501
  class_args.update(extra_kwargs)
455
502
 
456
- # add common args (name, context, ..) only if target class can accept them
457
- argspec = getfullargspec(class_object)
458
- for key in ["name", "context", "input_path", "result_path", "full_event"]:
459
- if argspec.varkw or key in argspec.args:
460
- class_args[key] = getattr(self, key)
461
- if argspec.varkw or "graph_step" in argspec.args:
462
- class_args["graph_step"] = self
503
+ if not isinstance(self, MonitoringApplicationStep):
504
+ # add common args (name, context, ..) only if target class can accept them
505
+ argspec = getfullargspec(class_object)
506
+
507
+ for key in ["name", "context", "input_path", "result_path", "full_event"]:
508
+ if argspec.varkw or key in argspec.args:
509
+ class_args[key] = getattr(self, key)
510
+ if argspec.varkw or "graph_step" in argspec.args:
511
+ class_args["graph_step"] = self
463
512
  return class_args
464
513
 
465
514
  def get_step_class_object(self, namespace):
@@ -550,6 +599,39 @@ class TaskStep(BaseStep):
550
599
  return event
551
600
 
552
601
 
602
+ class MonitoringApplicationStep(TaskStep):
603
+ """monitoring application execution step, runs users class code"""
604
+
605
+ kind = "monitoring_application"
606
+ _default_class = ""
607
+
608
+ def __init__(
609
+ self,
610
+ class_name: Union[str, type] = None,
611
+ class_args: dict = None,
612
+ handler: str = None,
613
+ name: str = None,
614
+ after: list = None,
615
+ full_event: bool = None,
616
+ function: str = None,
617
+ responder: bool = None,
618
+ input_path: str = None,
619
+ result_path: str = None,
620
+ ):
621
+ super().__init__(
622
+ class_name=class_name,
623
+ class_args=class_args,
624
+ handler=handler,
625
+ name=name,
626
+ after=after,
627
+ full_event=full_event,
628
+ function=function,
629
+ responder=responder,
630
+ input_path=input_path,
631
+ result_path=result_path,
632
+ )
633
+
634
+
553
635
  class ErrorStep(TaskStep):
554
636
  """error execution step, runs a class or handler"""
555
637
 
@@ -654,6 +736,11 @@ class RouterStep(TaskStep):
654
736
  if not route:
655
737
  route = TaskStep(class_name, class_args, handler=handler)
656
738
  route.function = function or route.function
739
+
740
+ if len(self._routes) >= MAX_ALLOWED_STEPS:
741
+ raise mlrun.errors.MLRunInvalidArgumentError(
742
+ f"Cannot create the serving graph: the maximum number of steps is {MAX_ALLOWED_STEPS}"
743
+ )
657
744
  route = self._routes.update(key, route)
658
745
  route.set_parent(self)
659
746
  return route
@@ -752,19 +839,53 @@ class QueueStep(BaseStep):
752
839
  retention_in_hours=self.retention_in_hours,
753
840
  **self.options,
754
841
  )
842
+ if hasattr(self._stream, "create_stream"):
843
+ self._stream.create_stream()
755
844
  self._set_error_handler()
756
845
 
757
846
  @property
758
847
  def async_object(self):
759
848
  return self._async_object
760
849
 
850
+ def to(
851
+ self,
852
+ class_name: Union[str, StepToDict] = None,
853
+ name: str = None,
854
+ handler: str = None,
855
+ graph_shape: str = None,
856
+ function: str = None,
857
+ full_event: bool = None,
858
+ input_path: str = None,
859
+ result_path: str = None,
860
+ **class_args,
861
+ ):
862
+ if not function:
863
+ name = get_name(name, class_name)
864
+ raise mlrun.errors.MLRunInvalidArgumentError(
865
+ f"step '{name}' must specify a function, because it follows a queue step"
866
+ )
867
+ return super().to(
868
+ class_name,
869
+ name,
870
+ handler,
871
+ graph_shape,
872
+ function,
873
+ full_event,
874
+ input_path,
875
+ result_path,
876
+ **class_args,
877
+ )
878
+
761
879
  def run(self, event, *args, **kwargs):
762
880
  data = event.body
763
881
  if not data:
764
882
  return event
765
883
 
766
884
  if self._stream:
767
- self._stream.push({"id": event.id, "body": data, "path": event.path})
885
+ full_event = self.options.get("full_event")
886
+ if full_event or full_event is None and self.next:
887
+ data = storey.utils.wrap_event_for_serialization(event, data)
888
+ self._stream.push(data)
768
889
  event.terminated = True
769
890
  event.body = None
770
891
  return event
@@ -1128,8 +1249,8 @@ class FlowStep(BaseStep):
1128
1249
  links[next_step.function] = step
1129
1250
  return links
1130
1251
 
1131
- def init_queues(self):
1132
- """init/create the streams used in this flow"""
1252
+ def create_queue_streams(self):
1253
+ """create the streams used in this flow"""
1133
1254
  for step in self.get_children():
1134
1255
  if step.kind == StepKinds.queue:
1135
1256
  step.init_object(self.context, None)
@@ -1161,23 +1282,17 @@ class FlowStep(BaseStep):
1161
1282
  if self._controller:
1162
1283
  # async flow (using storey)
1163
1284
  event._awaitable_result = None
1164
- if self.context.is_mock:
1165
- resp = self._controller.emit(
1166
- event, return_awaitable_result=self._wait_for_result
1167
- )
1168
- if self._wait_for_result and resp:
1169
- return resp.await_result()
1170
- else:
1171
- resp_awaitable = self._controller.emit(
1172
- event, await_result=self._wait_for_result
1173
- )
1174
- if self._wait_for_result:
1175
- return resp_awaitable
1176
- return self._await_and_return_id(resp_awaitable, event)
1285
+ resp = self._controller.emit(
1286
+ event, return_awaitable_result=self._wait_for_result
1287
+ )
1288
+ if self._wait_for_result and resp:
1289
+ return resp.await_result()
1177
1290
  event = copy(event)
1178
1291
  event.body = {"id": event.id}
1179
1292
  return event
1180
1293
 
1294
+ event = storey.utils.unpack_event_if_wrapped(event)
1295
+
1181
1296
  if len(self._start_steps) == 0:
1182
1297
  return event
1183
1298
  next_obj = self._start_steps[0]
@@ -1213,18 +1328,9 @@ class FlowStep(BaseStep):
1213
1328
  """wait for completion of run in async flows"""
1214
1329
 
1215
1330
  if self._controller:
1216
- if asyncio.iscoroutinefunction(self._controller.await_termination):
1217
-
1218
- async def terminate_and_await_termination():
1219
- if hasattr(self._controller, "terminate"):
1220
- await self._controller.terminate()
1221
- return await self._controller.await_termination()
1222
-
1223
- return terminate_and_await_termination()
1224
- else:
1225
- if hasattr(self._controller, "terminate"):
1226
- self._controller.terminate()
1227
- return self._controller.await_termination()
1331
+ if hasattr(self._controller, "terminate"):
1332
+ self._controller.terminate()
1333
+ return self._controller.await_termination()
1228
1334
 
1229
1335
  def plot(self, filename=None, format=None, source=None, targets=None, **kw):
1230
1336
  """plot/save graph using graphviz
@@ -1273,6 +1379,30 @@ class FlowStep(BaseStep):
1273
1379
  )
1274
1380
  self[step_name].after_step(name)
1275
1381
 
1382
+ def set_flow(
1383
+ self,
1384
+ steps: list[Union[str, StepToDict, dict[str, Any]]],
1385
+ force: bool = False,
1386
+ ):
1387
+ if not force and self.steps:
1388
+ raise mlrun.errors.MLRunInvalidArgumentError(
1389
+ "set_flow() called on a step that already has downstream steps. "
1390
+ "If you want to overwrite existing steps, set force=True."
1391
+ )
1392
+
1393
+ self.steps = None
1394
+ step = self
1395
+ for next_step in steps:
1396
+ if isinstance(next_step, dict):
1397
+ step = step.to(**next_step)
1398
+ else:
1399
+ step = step.to(next_step)
1400
+
1401
+ return step
1402
+
1403
+ def supports_termination(self):
1404
+ return self.engine != "sync"
1405
+
1276
1406
 
1277
1407
  class RootFlowStep(FlowStep):
1278
1408
  """root flow step"""
@@ -1287,6 +1417,7 @@ classes_map = {
1287
1417
  "flow": FlowStep,
1288
1418
  "queue": QueueStep,
1289
1419
  "error_step": ErrorStep,
1420
+ "monitoring_application": MonitoringApplicationStep,
1290
1421
  }
1291
1422
 
1292
1423
 
@@ -1510,15 +1641,17 @@ def _init_async_objects(context, steps):
1510
1641
  if step.path and not skip_stream:
1511
1642
  stream_path = step.path
1512
1643
  endpoint = None
1513
- options = {}
1644
+ # in case of a queue, we default to a full_event=True
1645
+ full_event = step.options.get("full_event")
1646
+ options = {
1647
+ "full_event": full_event or full_event is None and step.next
1648
+ }
1514
1649
  options.update(step.options)
1515
- kafka_bootstrap_servers = options.pop(
1516
- "kafka_bootstrap_servers", None
1517
- )
1518
- if stream_path.startswith("kafka://") or kafka_bootstrap_servers:
1519
- topic, bootstrap_servers = parse_kafka_url(
1520
- stream_path, kafka_bootstrap_servers
1521
- )
1650
+
1651
+ kafka_brokers = get_kafka_brokers_from_dict(options, pop=True)
1652
+
1653
+ if stream_path.startswith("kafka://") or kafka_brokers:
1654
+ topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
1522
1655
 
1523
1656
  kafka_producer_options = options.pop(
1524
1657
  "kafka_producer_options", None
@@ -1526,7 +1659,7 @@ def _init_async_objects(context, steps):
1526
1659
 
1527
1660
  step._async_object = storey.KafkaTarget(
1528
1661
  topic=topic,
1529
- bootstrap_servers=bootstrap_servers,
1662
+ brokers=brokers,
1530
1663
  producer_options=kafka_producer_options,
1531
1664
  context=context,
1532
1665
  **options,
@@ -1566,14 +1699,12 @@ def _init_async_objects(context, steps):
1566
1699
  wait_for_result = True
1567
1700
 
1568
1701
  source_args = context.get_param("source_args", {})
1569
- explicit_ack = is_explicit_ack_supported(context) and mlrun.mlconf.is_explicit_ack()
1570
-
1571
- if context.is_mock:
1572
- source_class = storey.SyncEmitSource
1573
- else:
1574
- source_class = storey.AsyncEmitSource
1702
+ explicit_ack = (
1703
+ is_explicit_ack_supported(context) and mlrun.mlconf.is_explicit_ack_enabled()
1704
+ )
1575
1705
 
1576
- default_source = source_class(
1706
+ # TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
1707
+ default_source = storey.SyncEmitSource(
1577
1708
  context=context,
1578
1709
  explicit_ack=explicit_ack,
1579
1710
  **source_args,
mlrun/serving/utils.py CHANGED
@@ -46,6 +46,15 @@ def _update_result_body(result_path, event_body, result):
46
46
  class StepToDict:
47
47
  """auto serialization of graph steps to a python dictionary"""
48
48
 
49
+ meta_keys = [
50
+ "context",
51
+ "name",
52
+ "input_path",
53
+ "result_path",
54
+ "full_event",
55
+ "kwargs",
56
+ ]
57
+
49
58
  def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
50
59
  """convert the step object to a python dictionary"""
51
60
  fields = fields or getattr(self, "_dict_fields", None)
@@ -54,24 +63,16 @@ class StepToDict:
54
63
  if exclude:
55
64
  fields = [field for field in fields if field not in exclude]
56
65
 
57
- meta_keys = [
58
- "context",
59
- "name",
60
- "input_path",
61
- "result_path",
62
- "full_event",
63
- "kwargs",
64
- ]
65
66
  args = {
66
67
  key: getattr(self, key)
67
68
  for key in fields
68
- if getattr(self, key, None) is not None and key not in meta_keys
69
+ if getattr(self, key, None) is not None and key not in self.meta_keys
69
70
  }
70
71
  # add storey kwargs or extra kwargs
71
72
  if "kwargs" in fields and (hasattr(self, "kwargs") or hasattr(self, "_kwargs")):
72
73
  kwargs = getattr(self, "kwargs", {}) or getattr(self, "_kwargs", {})
73
74
  for key, value in kwargs.items():
74
- if key not in meta_keys:
75
+ if key not in self.meta_keys:
75
76
  args[key] = value
76
77
 
77
78
  mod_name = self.__class__.__module__
@@ -80,7 +81,9 @@ class StepToDict:
80
81
  class_path = f"{mod_name}.{class_path}"
81
82
  struct = {
82
83
  "class_name": class_path,
83
- "name": self.name or self.__class__.__name__,
84
+ "name": self.name
85
+ if hasattr(self, "name") and self.name
86
+ else self.__class__.__name__,
84
87
  "class_args": args,
85
88
  }
86
89
  if hasattr(self, "_STEP_KIND"):
@@ -94,6 +97,11 @@ class StepToDict:
94
97
  return struct
95
98
 
96
99
 
100
+ class MonitoringApplicationToDict(StepToDict):
101
+ _STEP_KIND = "monitoring_application"
102
+ meta_keys = []
103
+
104
+
97
105
  class RouterToDict(StepToDict):
98
106
  _STEP_KIND = "router"
99
107