mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (305) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +40 -122
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +47 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +79 -47
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +74 -1
  13. mlrun/common/db/sql_session.py +5 -5
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +45 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +33 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +12 -3
  23. mlrun/common/model_monitoring/helpers.py +9 -5
  24. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  25. mlrun/common/schemas/__init__.py +31 -5
  26. mlrun/common/schemas/alert.py +202 -0
  27. mlrun/common/schemas/api_gateway.py +196 -0
  28. mlrun/common/schemas/artifact.py +25 -4
  29. mlrun/common/schemas/auth.py +16 -5
  30. mlrun/common/schemas/background_task.py +1 -1
  31. mlrun/common/schemas/client_spec.py +4 -2
  32. mlrun/common/schemas/common.py +7 -4
  33. mlrun/common/schemas/constants.py +3 -0
  34. mlrun/common/schemas/feature_store.py +74 -44
  35. mlrun/common/schemas/frontend_spec.py +15 -7
  36. mlrun/common/schemas/function.py +12 -1
  37. mlrun/common/schemas/hub.py +11 -18
  38. mlrun/common/schemas/memory_reports.py +2 -2
  39. mlrun/common/schemas/model_monitoring/__init__.py +20 -4
  40. mlrun/common/schemas/model_monitoring/constants.py +123 -42
  41. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  42. mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
  43. mlrun/common/schemas/notification.py +71 -14
  44. mlrun/common/schemas/object.py +2 -2
  45. mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
  46. mlrun/common/schemas/pipeline.py +8 -1
  47. mlrun/common/schemas/project.py +69 -18
  48. mlrun/common/schemas/runs.py +7 -1
  49. mlrun/common/schemas/runtime_resource.py +8 -12
  50. mlrun/common/schemas/schedule.py +4 -4
  51. mlrun/common/schemas/tag.py +1 -2
  52. mlrun/common/schemas/workflow.py +12 -4
  53. mlrun/common/types.py +14 -1
  54. mlrun/config.py +154 -69
  55. mlrun/data_types/data_types.py +6 -1
  56. mlrun/data_types/spark.py +2 -2
  57. mlrun/data_types/to_pandas.py +67 -37
  58. mlrun/datastore/__init__.py +6 -8
  59. mlrun/datastore/alibaba_oss.py +131 -0
  60. mlrun/datastore/azure_blob.py +143 -42
  61. mlrun/datastore/base.py +102 -58
  62. mlrun/datastore/datastore.py +34 -13
  63. mlrun/datastore/datastore_profile.py +146 -20
  64. mlrun/datastore/dbfs_store.py +3 -7
  65. mlrun/datastore/filestore.py +1 -4
  66. mlrun/datastore/google_cloud_storage.py +97 -33
  67. mlrun/datastore/hdfs.py +56 -0
  68. mlrun/datastore/inmem.py +6 -3
  69. mlrun/datastore/redis.py +7 -2
  70. mlrun/datastore/s3.py +34 -12
  71. mlrun/datastore/snowflake_utils.py +45 -0
  72. mlrun/datastore/sources.py +303 -111
  73. mlrun/datastore/spark_utils.py +31 -2
  74. mlrun/datastore/store_resources.py +9 -7
  75. mlrun/datastore/storeytargets.py +151 -0
  76. mlrun/datastore/targets.py +453 -176
  77. mlrun/datastore/utils.py +72 -58
  78. mlrun/datastore/v3io.py +6 -1
  79. mlrun/db/base.py +274 -41
  80. mlrun/db/factory.py +1 -1
  81. mlrun/db/httpdb.py +893 -225
  82. mlrun/db/nopdb.py +291 -33
  83. mlrun/errors.py +36 -6
  84. mlrun/execution.py +115 -42
  85. mlrun/feature_store/__init__.py +0 -2
  86. mlrun/feature_store/api.py +65 -73
  87. mlrun/feature_store/common.py +7 -12
  88. mlrun/feature_store/feature_set.py +76 -55
  89. mlrun/feature_store/feature_vector.py +39 -31
  90. mlrun/feature_store/ingestion.py +7 -6
  91. mlrun/feature_store/retrieval/base.py +16 -11
  92. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  93. mlrun/feature_store/retrieval/job.py +13 -4
  94. mlrun/feature_store/retrieval/local_merger.py +2 -0
  95. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  96. mlrun/feature_store/steps.py +45 -34
  97. mlrun/features.py +11 -21
  98. mlrun/frameworks/_common/artifacts_library.py +9 -9
  99. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  100. mlrun/frameworks/_common/model_handler.py +48 -48
  101. mlrun/frameworks/_common/plan.py +5 -6
  102. mlrun/frameworks/_common/producer.py +3 -4
  103. mlrun/frameworks/_common/utils.py +5 -5
  104. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  105. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  106. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  107. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  108. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  109. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  110. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  111. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  112. mlrun/frameworks/_ml_common/plan.py +2 -2
  113. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  114. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  115. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  116. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  117. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  118. mlrun/frameworks/_ml_common/utils.py +4 -4
  119. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  120. mlrun/frameworks/huggingface/model_server.py +4 -4
  121. mlrun/frameworks/lgbm/__init__.py +33 -33
  122. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  123. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  124. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  125. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  126. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  127. mlrun/frameworks/lgbm/model_handler.py +10 -10
  128. mlrun/frameworks/lgbm/model_server.py +6 -6
  129. mlrun/frameworks/lgbm/utils.py +5 -5
  130. mlrun/frameworks/onnx/dataset.py +8 -8
  131. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  132. mlrun/frameworks/onnx/model_handler.py +6 -6
  133. mlrun/frameworks/onnx/model_server.py +7 -7
  134. mlrun/frameworks/parallel_coordinates.py +6 -6
  135. mlrun/frameworks/pytorch/__init__.py +18 -18
  136. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  137. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  138. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  139. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  140. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  141. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  142. mlrun/frameworks/pytorch/model_handler.py +17 -17
  143. mlrun/frameworks/pytorch/model_server.py +7 -7
  144. mlrun/frameworks/sklearn/__init__.py +13 -13
  145. mlrun/frameworks/sklearn/estimator.py +4 -4
  146. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  147. mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
  148. mlrun/frameworks/sklearn/model_handler.py +2 -2
  149. mlrun/frameworks/tf_keras/__init__.py +10 -7
  150. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  151. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  152. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  153. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  154. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  155. mlrun/frameworks/tf_keras/model_server.py +6 -6
  156. mlrun/frameworks/xgboost/__init__.py +13 -13
  157. mlrun/frameworks/xgboost/model_handler.py +6 -6
  158. mlrun/k8s_utils.py +61 -17
  159. mlrun/launcher/__init__.py +1 -1
  160. mlrun/launcher/base.py +16 -15
  161. mlrun/launcher/client.py +13 -11
  162. mlrun/launcher/factory.py +1 -1
  163. mlrun/launcher/local.py +23 -13
  164. mlrun/launcher/remote.py +17 -10
  165. mlrun/lists.py +7 -6
  166. mlrun/model.py +478 -103
  167. mlrun/model_monitoring/__init__.py +1 -1
  168. mlrun/model_monitoring/api.py +163 -371
  169. mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
  170. mlrun/model_monitoring/applications/_application_steps.py +188 -0
  171. mlrun/model_monitoring/applications/base.py +108 -0
  172. mlrun/model_monitoring/applications/context.py +341 -0
  173. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  174. mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
  175. mlrun/model_monitoring/applications/results.py +99 -0
  176. mlrun/model_monitoring/controller.py +131 -278
  177. mlrun/model_monitoring/db/__init__.py +18 -0
  178. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  179. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  180. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  181. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  182. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  183. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  184. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  185. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  186. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  187. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  188. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  189. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  190. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  191. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  192. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  193. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
  194. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  195. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
  196. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  197. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  198. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  199. mlrun/model_monitoring/features_drift_table.py +134 -106
  200. mlrun/model_monitoring/helpers.py +199 -55
  201. mlrun/model_monitoring/metrics/__init__.py +13 -0
  202. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  203. mlrun/model_monitoring/model_endpoint.py +3 -2
  204. mlrun/model_monitoring/stream_processing.py +131 -398
  205. mlrun/model_monitoring/tracking_policy.py +9 -2
  206. mlrun/model_monitoring/writer.py +161 -125
  207. mlrun/package/__init__.py +6 -6
  208. mlrun/package/context_handler.py +5 -5
  209. mlrun/package/packager.py +7 -7
  210. mlrun/package/packagers/default_packager.py +8 -8
  211. mlrun/package/packagers/numpy_packagers.py +15 -15
  212. mlrun/package/packagers/pandas_packagers.py +5 -5
  213. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  214. mlrun/package/packagers_manager.py +19 -23
  215. mlrun/package/utils/_formatter.py +6 -6
  216. mlrun/package/utils/_pickler.py +2 -2
  217. mlrun/package/utils/_supported_format.py +4 -4
  218. mlrun/package/utils/log_hint_utils.py +2 -2
  219. mlrun/package/utils/type_hint_utils.py +4 -9
  220. mlrun/platforms/__init__.py +11 -10
  221. mlrun/platforms/iguazio.py +24 -203
  222. mlrun/projects/operations.py +52 -25
  223. mlrun/projects/pipelines.py +191 -197
  224. mlrun/projects/project.py +1227 -400
  225. mlrun/render.py +16 -19
  226. mlrun/run.py +209 -184
  227. mlrun/runtimes/__init__.py +83 -15
  228. mlrun/runtimes/base.py +51 -35
  229. mlrun/runtimes/daskjob.py +17 -10
  230. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  231. mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
  232. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  233. mlrun/runtimes/funcdoc.py +1 -29
  234. mlrun/runtimes/function_reference.py +1 -1
  235. mlrun/runtimes/kubejob.py +34 -128
  236. mlrun/runtimes/local.py +40 -11
  237. mlrun/runtimes/mpijob/__init__.py +0 -20
  238. mlrun/runtimes/mpijob/abstract.py +9 -10
  239. mlrun/runtimes/mpijob/v1.py +1 -1
  240. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  241. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  242. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  243. mlrun/runtimes/nuclio/application/application.py +758 -0
  244. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  245. mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
  246. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  247. mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
  248. mlrun/runtimes/pod.py +281 -101
  249. mlrun/runtimes/remotesparkjob.py +12 -9
  250. mlrun/runtimes/sparkjob/spark3job.py +67 -51
  251. mlrun/runtimes/utils.py +41 -75
  252. mlrun/secrets.py +9 -5
  253. mlrun/serving/__init__.py +8 -1
  254. mlrun/serving/remote.py +2 -7
  255. mlrun/serving/routers.py +85 -69
  256. mlrun/serving/server.py +69 -44
  257. mlrun/serving/states.py +209 -36
  258. mlrun/serving/utils.py +22 -14
  259. mlrun/serving/v1_serving.py +6 -7
  260. mlrun/serving/v2_serving.py +129 -54
  261. mlrun/track/tracker.py +2 -1
  262. mlrun/track/tracker_manager.py +3 -3
  263. mlrun/track/trackers/mlflow_tracker.py +6 -2
  264. mlrun/utils/async_http.py +6 -8
  265. mlrun/utils/azure_vault.py +1 -1
  266. mlrun/utils/clones.py +1 -2
  267. mlrun/utils/condition_evaluator.py +3 -3
  268. mlrun/utils/db.py +21 -3
  269. mlrun/utils/helpers.py +405 -225
  270. mlrun/utils/http.py +3 -6
  271. mlrun/utils/logger.py +112 -16
  272. mlrun/utils/notifications/notification/__init__.py +17 -13
  273. mlrun/utils/notifications/notification/base.py +50 -2
  274. mlrun/utils/notifications/notification/console.py +2 -0
  275. mlrun/utils/notifications/notification/git.py +24 -1
  276. mlrun/utils/notifications/notification/ipython.py +3 -1
  277. mlrun/utils/notifications/notification/slack.py +96 -21
  278. mlrun/utils/notifications/notification/webhook.py +59 -2
  279. mlrun/utils/notifications/notification_pusher.py +149 -30
  280. mlrun/utils/regex.py +9 -0
  281. mlrun/utils/retryer.py +208 -0
  282. mlrun/utils/singleton.py +1 -1
  283. mlrun/utils/v3io_clients.py +4 -6
  284. mlrun/utils/version/version.json +2 -2
  285. mlrun/utils/version/version.py +2 -6
  286. mlrun-1.7.0.dist-info/METADATA +378 -0
  287. mlrun-1.7.0.dist-info/RECORD +351 -0
  288. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
  289. mlrun/feature_store/retrieval/conversion.py +0 -273
  290. mlrun/kfpops.py +0 -868
  291. mlrun/model_monitoring/application.py +0 -310
  292. mlrun/model_monitoring/batch.py +0 -1095
  293. mlrun/model_monitoring/prometheus.py +0 -219
  294. mlrun/model_monitoring/stores/__init__.py +0 -111
  295. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
  296. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
  297. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  298. mlrun/model_monitoring/stores/models/base.py +0 -84
  299. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  300. mlrun/platforms/other.py +0 -306
  301. mlrun-1.6.4rc7.dist-info/METADATA +0 -272
  302. mlrun-1.6.4rc7.dist-info/RECORD +0 -314
  303. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
  304. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
  305. {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
mlrun/serving/routers.py CHANGED
@@ -20,7 +20,7 @@ import traceback
20
20
  import typing
21
21
  from enum import Enum
22
22
  from io import BytesIO
23
- from typing import Dict, List, Union
23
+ from typing import Union
24
24
 
25
25
  import numpy
26
26
  import numpy as np
@@ -28,10 +28,10 @@ import numpy as np
28
28
  import mlrun
29
29
  import mlrun.common.model_monitoring
30
30
  import mlrun.common.schemas.model_monitoring
31
+ from mlrun.errors import err_to_str
31
32
  from mlrun.utils import logger, now_date
32
33
 
33
34
  from ..common.helpers import parse_versioned_object_uri
34
- from ..config import config
35
35
  from .server import GraphServer
36
36
  from .utils import RouterToDict, _extract_input_data, _update_result_body
37
37
  from .v2_serving import _ModelLogPusher
@@ -271,7 +271,9 @@ class ParallelRun(BaseModelRouter):
271
271
  fn = mlrun.new_function("parallel", kind="serving")
272
272
  graph = fn.set_topology(
273
273
  "router",
274
- mlrun.serving.routers.ParallelRun(extend_event=True, executor_type=executor),
274
+ mlrun.serving.routers.ParallelRun(
275
+ extend_event=True, executor_type=executor
276
+ ),
275
277
  )
276
278
  graph.add_route("child1", class_name="Cls1")
277
279
  graph.add_route("child2", class_name="Cls2", my_arg={"c": 7})
@@ -485,7 +487,7 @@ class VotingEnsemble(ParallelRun):
485
487
  url_prefix: str = None,
486
488
  health_prefix: str = None,
487
489
  vote_type: str = None,
488
- weights: Dict[str, float] = None,
490
+ weights: dict[str, float] = None,
489
491
  executor_type: Union[ParallelRunnerModes, str] = ParallelRunnerModes.thread,
490
492
  format_response_with_col_name_flag: bool = False,
491
493
  prediction_col_name: str = "prediction",
@@ -613,7 +615,7 @@ class VotingEnsemble(ParallelRun):
613
615
  logger.warn("GraphServer not initialized for VotingEnsemble instance")
614
616
  return
615
617
 
616
- if not self.context.is_mock or self.context.server.track_models:
618
+ if not self.context.is_mock or self.context.monitoring_mock:
617
619
  self.model_endpoint_uid = _init_endpoint_record(server, self)
618
620
 
619
621
  self._update_weights(self.weights)
@@ -703,7 +705,7 @@ class VotingEnsemble(ParallelRun):
703
705
  )
704
706
  return model, None, subpath
705
707
 
706
- def _majority_vote(self, all_predictions: List[List[int]], weights: List[float]):
708
+ def _majority_vote(self, all_predictions: list[list[int]], weights: list[float]):
707
709
  """
708
710
  Returns most predicted class for each event
709
711
 
@@ -727,7 +729,7 @@ class VotingEnsemble(ParallelRun):
727
729
  weighted_res = one_hot_representation @ weights
728
730
  return np.argmax(weighted_res, axis=1).tolist()
729
731
 
730
- def _mean_vote(self, all_predictions: List[List[float]], weights: List[float]):
732
+ def _mean_vote(self, all_predictions: list[list[float]], weights: list[float]):
731
733
  """
732
734
  Returns weighted mean of the predictions
733
735
 
@@ -741,7 +743,7 @@ class VotingEnsemble(ParallelRun):
741
743
  def _is_int(self, value):
742
744
  return float(value).is_integer()
743
745
 
744
- def logic(self, predictions: List[List[Union[int, float]]], weights: List[float]):
746
+ def logic(self, predictions: list[list[Union[int, float]]], weights: list[float]):
745
747
  """
746
748
  Returns the final prediction of all the models after applying the desire logic
747
749
 
@@ -957,7 +959,7 @@ class VotingEnsemble(ParallelRun):
957
959
  raise Exception('Expected "inputs" to be a list')
958
960
  return request
959
961
 
960
- def _normalize_weights(self, weights_dict: Dict[str, float]):
962
+ def _normalize_weights(self, weights_dict: dict[str, float]):
961
963
  """
962
964
  Normalized all the weights such that abs(weights_sum - 1.0) <= 0.001
963
965
  and adding 0 weight to all the routes that doesn't appear in the dict.
@@ -1013,7 +1015,7 @@ def _init_endpoint_record(
1013
1015
  graph_server.function_uri
1014
1016
  )
1015
1017
  except Exception as e:
1016
- logger.error("Failed to parse function URI", exc=e)
1018
+ logger.error("Failed to parse function URI", exc=err_to_str(e))
1017
1019
  return None
1018
1020
 
1019
1021
  # Generating version model value based on the model name and model version
@@ -1027,74 +1029,88 @@ def _init_endpoint_record(
1027
1029
  function_uri=graph_server.function_uri, versioned_model=versioned_model_name
1028
1030
  ).uid
1029
1031
 
1030
- # If model endpoint object was found in DB, skip the creation process.
1031
1032
  try:
1032
- mlrun.get_run_db().get_model_endpoint(project=project, endpoint_id=endpoint_uid)
1033
-
1033
+ model_ep = mlrun.get_run_db().get_model_endpoint(
1034
+ project=project, endpoint_id=endpoint_uid
1035
+ )
1034
1036
  except mlrun.errors.MLRunNotFoundError:
1037
+ model_ep = None
1038
+ except mlrun.errors.MLRunBadRequestError as err:
1039
+ logger.debug(
1040
+ f"Cant reach to model endpoints store, due to : {err}",
1041
+ )
1042
+ return
1043
+
1044
+ if voting_ensemble.context.server.track_models and not model_ep:
1035
1045
  logger.info("Creating a new model endpoint record", endpoint_id=endpoint_uid)
1046
+ # Get the children model endpoints ids
1047
+ children_uids = []
1048
+ for _, c in voting_ensemble.routes.items():
1049
+ if hasattr(c, "endpoint_uid"):
1050
+ children_uids.append(c.endpoint_uid)
1051
+ model_endpoint = mlrun.common.schemas.ModelEndpoint(
1052
+ metadata=mlrun.common.schemas.ModelEndpointMetadata(
1053
+ project=project, uid=endpoint_uid
1054
+ ),
1055
+ spec=mlrun.common.schemas.ModelEndpointSpec(
1056
+ function_uri=graph_server.function_uri,
1057
+ model=versioned_model_name,
1058
+ model_class=voting_ensemble.__class__.__name__,
1059
+ stream_path=voting_ensemble.context.stream.stream_uri,
1060
+ active=True,
1061
+ monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
1062
+ ),
1063
+ status=mlrun.common.schemas.ModelEndpointStatus(
1064
+ children=list(voting_ensemble.routes.keys()),
1065
+ endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
1066
+ children_uids=children_uids,
1067
+ ),
1068
+ )
1036
1069
 
1037
- try:
1038
- # Get the children model endpoints ids
1039
- children_uids = []
1040
- for _, c in voting_ensemble.routes.items():
1041
- if hasattr(c, "endpoint_uid"):
1042
- children_uids.append(c.endpoint_uid)
1043
-
1044
- model_endpoint = mlrun.common.schemas.ModelEndpoint(
1045
- metadata=mlrun.common.schemas.ModelEndpointMetadata(
1046
- project=project, uid=endpoint_uid
1047
- ),
1048
- spec=mlrun.common.schemas.ModelEndpointSpec(
1049
- function_uri=graph_server.function_uri,
1050
- model=versioned_model_name,
1051
- model_class=voting_ensemble.__class__.__name__,
1052
- stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
1053
- project=project, kind="stream"
1054
- ),
1055
- active=True,
1056
- monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
1057
- if voting_ensemble.context.server.track_models
1058
- else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
1059
- ),
1060
- status=mlrun.common.schemas.ModelEndpointStatus(
1061
- children=list(voting_ensemble.routes.keys()),
1062
- endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.ROUTER,
1063
- children_uids=children_uids,
1064
- ),
1065
- )
1070
+ db = mlrun.get_run_db()
1066
1071
 
1067
- db = mlrun.get_run_db()
1072
+ db.create_model_endpoint(
1073
+ project=project,
1074
+ endpoint_id=model_endpoint.metadata.uid,
1075
+ model_endpoint=model_endpoint.dict(),
1076
+ )
1068
1077
 
1078
+ # Update model endpoint children type
1079
+ for model_endpoint in children_uids:
1080
+ current_endpoint = db.get_model_endpoint(
1081
+ project=project, endpoint_id=model_endpoint
1082
+ )
1083
+ current_endpoint.status.endpoint_type = (
1084
+ mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
1085
+ )
1069
1086
  db.create_model_endpoint(
1070
1087
  project=project,
1071
- endpoint_id=model_endpoint.metadata.uid,
1072
- model_endpoint=model_endpoint.dict(),
1073
- )
1074
-
1075
- # Update model endpoint children type
1076
- for model_endpoint in children_uids:
1077
- current_endpoint = db.get_model_endpoint(
1078
- project=project, endpoint_id=model_endpoint
1079
- )
1080
- current_endpoint.status.endpoint_type = (
1081
- mlrun.common.schemas.model_monitoring.EndpointType.LEAF_EP
1082
- )
1083
- db.create_model_endpoint(
1084
- project=project,
1085
- endpoint_id=model_endpoint,
1086
- model_endpoint=current_endpoint,
1087
- )
1088
-
1089
- except Exception as exc:
1090
- logger.warning(
1091
- "Failed creating model endpoint record",
1092
- exc=exc,
1093
- traceback=traceback.format_exc(),
1088
+ endpoint_id=model_endpoint,
1089
+ model_endpoint=current_endpoint,
1094
1090
  )
1095
-
1096
- except Exception as e:
1097
- logger.error("Failed to retrieve model endpoint object", exc=e)
1091
+ elif (
1092
+ model_ep
1093
+ and (
1094
+ model_ep.spec.monitoring_mode
1095
+ == mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
1096
+ )
1097
+ != voting_ensemble.context.server.track_models
1098
+ ):
1099
+ monitoring_mode = (
1100
+ mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
1101
+ if voting_ensemble.context.server.track_models
1102
+ else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
1103
+ )
1104
+ db = mlrun.get_run_db()
1105
+ db.patch_model_endpoint(
1106
+ project=project,
1107
+ endpoint_id=endpoint_uid,
1108
+ attributes={"monitoring_mode": monitoring_mode},
1109
+ )
1110
+ logger.debug(
1111
+ f"Updating model endpoint monitoring_mode to {monitoring_mode}",
1112
+ endpoint_id=endpoint_uid,
1113
+ )
1098
1114
 
1099
1115
  return endpoint_uid
1100
1116
 
mlrun/serving/server.py CHANGED
@@ -22,9 +22,14 @@ import traceback
22
22
  import uuid
23
23
  from typing import Optional, Union
24
24
 
25
+ from nuclio import Context as NuclioContext
26
+ from nuclio.request import Logger as NuclioLogger
27
+
25
28
  import mlrun
29
+ import mlrun.common.constants
26
30
  import mlrun.common.helpers
27
31
  import mlrun.model_monitoring
32
+ import mlrun.utils
28
33
  from mlrun.config import config
29
34
  from mlrun.errors import err_to_str
30
35
  from mlrun.secrets import SecretsStore
@@ -37,10 +42,7 @@ from ..errors import MLRunInvalidArgumentError
37
42
  from ..model import ModelObj
38
43
  from ..utils import get_caller_globals
39
44
  from .states import RootFlowStep, RouterStep, get_function, graph_root_setter
40
- from .utils import (
41
- event_id_key,
42
- event_path_key,
43
- )
45
+ from .utils import event_id_key, event_path_key
44
46
 
45
47
 
46
48
  class _StreamContext:
@@ -52,7 +54,7 @@ class _StreamContext:
52
54
  Initialize _StreamContext object.
53
55
  :param enabled: A boolean indication for applying the stream context
54
56
  :param parameters: Dictionary of optional parameters, such as `log_stream` and `stream_args`. Note that these
55
- parameters might be relevant to the output source such as `kafka_bootstrap_servers` if
57
+ parameters might be relevant to the output source such as `kafka_brokers` if
56
58
  the output source is from type Kafka.
57
59
  :param function_uri: Full value of the function uri, usually it's <project-name>/<function-name>
58
60
  """
@@ -70,15 +72,15 @@ class _StreamContext:
70
72
  function_uri, config.default_project
71
73
  )
72
74
 
73
- stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
75
+ self.stream_uri = mlrun.model_monitoring.get_stream_path(project=project)
74
76
 
75
77
  if log_stream:
76
78
  # Update the stream path to the log stream value
77
- stream_uri = log_stream.format(project=project)
79
+ self.stream_uri = log_stream.format(project=project)
78
80
 
79
81
  stream_args = parameters.get("stream_args", {})
80
82
 
81
- self.output_stream = get_stream_pusher(stream_uri, **stream_args)
83
+ self.output_stream = get_stream_pusher(self.stream_uri, **stream_args)
82
84
 
83
85
 
84
86
  class GraphServer(ModelObj):
@@ -152,6 +154,7 @@ class GraphServer(ModelObj):
152
154
  resource_cache: ResourceCache = None,
153
155
  logger=None,
154
156
  is_mock=False,
157
+ monitoring_mock=False,
155
158
  ):
156
159
  """for internal use, initialize all steps (recursively)"""
157
160
 
@@ -164,6 +167,7 @@ class GraphServer(ModelObj):
164
167
 
165
168
  context = GraphContext(server=self, nuclio_context=context, logger=logger)
166
169
  context.is_mock = is_mock
170
+ context.monitoring_mock = monitoring_mock
167
171
  context.root = self.graph
168
172
 
169
173
  context.stream = _StreamContext(
@@ -188,11 +192,6 @@ class GraphServer(ModelObj):
188
192
 
189
193
  def init_object(self, namespace):
190
194
  self.graph.init_object(self.context, namespace, self.load_mode, reset=True)
191
- return (
192
- v2_serving_async_handler
193
- if config.datastore.async_source_mode == "enabled"
194
- else v2_serving_handler
195
- )
196
195
 
197
196
  def test(
198
197
  self,
@@ -310,17 +309,14 @@ class GraphServer(ModelObj):
310
309
 
311
310
  def wait_for_completion(self):
312
311
  """wait for async operation to complete"""
313
- self.graph.wait_for_completion()
312
+ return self.graph.wait_for_completion()
314
313
 
315
314
 
316
315
  def v2_serving_init(context, namespace=None):
317
316
  """hook for nuclio init_context()"""
318
317
 
319
- data = os.environ.get("SERVING_SPEC_ENV", "")
320
- if not data:
321
- raise MLRunInvalidArgumentError("failed to find spec env var")
322
- spec = json.loads(data)
323
318
  context.logger.info("Initializing server from spec")
319
+ spec = mlrun.utils.get_serving_spec()
324
320
  server = GraphServer.from_dict(spec)
325
321
  if config.log_level.lower() == "debug":
326
322
  server.verbose = True
@@ -328,42 +324,54 @@ def v2_serving_init(context, namespace=None):
328
324
  server.http_trigger = getattr(context.trigger, "kind", "http") == "http"
329
325
  context.logger.info_with(
330
326
  "Setting current function",
331
- current_functiton=os.environ.get("SERVING_CURRENT_FUNCTION", ""),
327
+ current_function=os.getenv("SERVING_CURRENT_FUNCTION", ""),
332
328
  )
333
- server.set_current_function(os.environ.get("SERVING_CURRENT_FUNCTION", ""))
329
+ server.set_current_function(os.getenv("SERVING_CURRENT_FUNCTION", ""))
334
330
  context.logger.info_with(
335
331
  "Initializing states", namespace=namespace or get_caller_globals()
336
332
  )
337
- server.init_states(context, namespace or get_caller_globals())
333
+ kwargs = {}
334
+ if hasattr(context, "is_mock"):
335
+ kwargs["is_mock"] = context.is_mock
336
+ server.init_states(
337
+ context,
338
+ namespace or get_caller_globals(),
339
+ **kwargs,
340
+ )
338
341
  context.logger.info("Initializing graph steps")
339
- serving_handler = server.init_object(namespace or get_caller_globals())
342
+ server.init_object(namespace or get_caller_globals())
340
343
  # set the handler hook to point to our handler
341
- setattr(context, "mlrun_handler", serving_handler)
344
+ setattr(context, "mlrun_handler", v2_serving_handler)
342
345
  setattr(context, "_server", server)
343
346
  context.logger.info_with("Serving was initialized", verbose=server.verbose)
344
347
  if server.verbose:
345
348
  context.logger.info(server.to_yaml())
346
349
 
347
- if hasattr(context, "platform") and hasattr(
348
- context.platform, "set_termination_callback"
349
- ):
350
+ _set_callbacks(server, context)
351
+
352
+
353
+ def _set_callbacks(server, context):
354
+ if not server.graph.supports_termination() or not hasattr(context, "platform"):
355
+ return
356
+
357
+ if hasattr(context.platform, "set_termination_callback"):
350
358
  context.logger.info(
351
359
  "Setting termination callback to terminate graph on worker shutdown"
352
360
  )
353
361
 
354
- def termination_callback():
362
+ async def termination_callback():
355
363
  context.logger.info("Termination callback called")
356
364
  server.wait_for_completion()
357
365
  context.logger.info("Termination of async flow is completed")
358
366
 
359
367
  context.platform.set_termination_callback(termination_callback)
360
368
 
361
- if hasattr(context, "platform") and hasattr(context.platform, "set_drain_callback"):
369
+ if hasattr(context.platform, "set_drain_callback"):
362
370
  context.logger.info(
363
371
  "Setting drain callback to terminate and restart the graph on a drain event (such as rebalancing)"
364
372
  )
365
373
 
366
- def drain_callback():
374
+ async def drain_callback():
367
375
  context.logger.info("Drain callback called")
368
376
  server.wait_for_completion()
369
377
  context.logger.info(
@@ -383,16 +391,26 @@ def v2_serving_handler(context, event, get_body=False):
383
391
  if event.body == b"":
384
392
  event.body = None
385
393
 
386
- return context._server.run(event, context, get_body)
387
-
394
+ # original path is saved in stream_path so it can be used by explicit ack, but path is reset to / as a
395
+ # workaround for NUC-178
396
+ # nuclio 1.12.12 added the topic attribute, and we must use it as part of the fix for NUC-233
397
+ # TODO: Remove fallback on event.path once support for nuclio<1.12.12 is dropped
398
+ event.stream_path = getattr(event, "topic", event.path)
399
+ if hasattr(event, "trigger") and event.trigger.kind in (
400
+ "kafka",
401
+ "kafka-cluster",
402
+ "v3ioStream",
403
+ "v3io-stream",
404
+ "rabbit-mq",
405
+ "rabbitMq",
406
+ ):
407
+ event.path = "/"
388
408
 
389
- async def v2_serving_async_handler(context, event, get_body=False):
390
- """hook for nuclio handler()"""
391
- return await context._server.run(event, context, get_body)
409
+ return context._server.run(event, context, get_body)
392
410
 
393
411
 
394
412
  def create_graph_server(
395
- parameters={},
413
+ parameters=None,
396
414
  load_mode=None,
397
415
  graph=None,
398
416
  verbose=False,
@@ -408,14 +426,15 @@ def create_graph_server(
408
426
  server.graph.add_route("my", class_name=MyModelClass, model_path="{path}", z=100)
409
427
  print(server.test("/v2/models/my/infer", testdata))
410
428
  """
429
+ parameters = parameters or {}
411
430
  server = GraphServer(graph, parameters, load_mode, verbose=verbose, **kwargs)
412
431
  server.set_current_function(
413
- current_function or os.environ.get("SERVING_CURRENT_FUNCTION", "")
432
+ current_function or os.getenv("SERVING_CURRENT_FUNCTION", "")
414
433
  )
415
434
  return server
416
435
 
417
436
 
418
- class MockTrigger(object):
437
+ class MockTrigger:
419
438
  """mock nuclio event trigger"""
420
439
 
421
440
  def __init__(self, kind="", name=""):
@@ -423,7 +442,7 @@ class MockTrigger(object):
423
442
  self.name = name
424
443
 
425
444
 
426
- class MockEvent(object):
445
+ class MockEvent:
427
446
  """mock basic nuclio event object"""
428
447
 
429
448
  def __init__(
@@ -456,7 +475,7 @@ class MockEvent(object):
456
475
  return f"Event(id={self.id}, body={self.body}, method={self.method}, path={self.path}{error})"
457
476
 
458
477
 
459
- class Response(object):
478
+ class Response:
460
479
  def __init__(self, headers=None, body=None, content_type=None, status_code=200):
461
480
  self.headers = headers or {}
462
481
  self.body = body
@@ -474,7 +493,13 @@ class Response(object):
474
493
  class GraphContext:
475
494
  """Graph context object"""
476
495
 
477
- def __init__(self, level="info", logger=None, server=None, nuclio_context=None):
496
+ def __init__(
497
+ self,
498
+ level="info", # Unused argument
499
+ logger=None,
500
+ server=None,
501
+ nuclio_context: Optional[NuclioContext] = None,
502
+ ) -> None:
478
503
  self.state = None
479
504
  self.logger = logger
480
505
  self.worker_id = 0
@@ -484,7 +509,7 @@ class GraphContext:
484
509
  self.root = None
485
510
 
486
511
  if nuclio_context:
487
- self.logger = nuclio_context.logger
512
+ self.logger: NuclioLogger = nuclio_context.logger
488
513
  self.Response = nuclio_context.Response
489
514
  if hasattr(nuclio_context, "trigger") and hasattr(
490
515
  nuclio_context.trigger, "kind"
@@ -494,7 +519,7 @@ class GraphContext:
494
519
  if hasattr(nuclio_context, "platform"):
495
520
  self.platform = nuclio_context.platform
496
521
  elif not logger:
497
- self.logger = mlrun.utils.helpers.logger
522
+ self.logger: mlrun.utils.Logger = mlrun.utils.logger
498
523
 
499
524
  self._server = server
500
525
  self.current_function = None
@@ -507,7 +532,7 @@ class GraphContext:
507
532
  return self._server
508
533
 
509
534
  @property
510
- def project(self):
535
+ def project(self) -> str:
511
536
  """current project name (for the current function)"""
512
537
  project, _, _, _ = mlrun.common.helpers.parse_versioned_object_uri(
513
538
  self._server.function_uri
@@ -563,7 +588,7 @@ class GraphContext:
563
588
  _,
564
589
  _,
565
590
  function_status,
566
- ) = mlrun.runtimes.function.get_nuclio_deploy_status(name, project, tag)
591
+ ) = mlrun.runtimes.nuclio.function.get_nuclio_deploy_status(name, project, tag)
567
592
 
568
593
  if state in ["error", "unhealthy"]:
569
594
  raise ValueError(