mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show
  1. mlrun/__init__.py +26 -22
  2. mlrun/__main__.py +15 -16
  3. mlrun/alerts/alert.py +150 -15
  4. mlrun/api/schemas/__init__.py +1 -9
  5. mlrun/artifacts/__init__.py +2 -3
  6. mlrun/artifacts/base.py +62 -19
  7. mlrun/artifacts/dataset.py +17 -17
  8. mlrun/artifacts/document.py +454 -0
  9. mlrun/artifacts/manager.py +28 -18
  10. mlrun/artifacts/model.py +91 -59
  11. mlrun/artifacts/plots.py +2 -2
  12. mlrun/common/constants.py +8 -0
  13. mlrun/common/formatters/__init__.py +1 -0
  14. mlrun/common/formatters/artifact.py +1 -1
  15. mlrun/common/formatters/feature_set.py +2 -0
  16. mlrun/common/formatters/function.py +1 -0
  17. mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
  18. mlrun/common/formatters/pipeline.py +1 -2
  19. mlrun/common/formatters/project.py +9 -0
  20. mlrun/common/model_monitoring/__init__.py +0 -5
  21. mlrun/common/model_monitoring/helpers.py +12 -62
  22. mlrun/common/runtimes/constants.py +25 -4
  23. mlrun/common/schemas/__init__.py +9 -5
  24. mlrun/common/schemas/alert.py +114 -19
  25. mlrun/common/schemas/api_gateway.py +3 -3
  26. mlrun/common/schemas/artifact.py +22 -9
  27. mlrun/common/schemas/auth.py +8 -4
  28. mlrun/common/schemas/background_task.py +7 -7
  29. mlrun/common/schemas/client_spec.py +4 -4
  30. mlrun/common/schemas/clusterization_spec.py +2 -2
  31. mlrun/common/schemas/common.py +53 -3
  32. mlrun/common/schemas/constants.py +15 -0
  33. mlrun/common/schemas/datastore_profile.py +1 -1
  34. mlrun/common/schemas/feature_store.py +9 -9
  35. mlrun/common/schemas/frontend_spec.py +4 -4
  36. mlrun/common/schemas/function.py +10 -10
  37. mlrun/common/schemas/hub.py +1 -1
  38. mlrun/common/schemas/k8s.py +3 -3
  39. mlrun/common/schemas/memory_reports.py +3 -3
  40. mlrun/common/schemas/model_monitoring/__init__.py +4 -8
  41. mlrun/common/schemas/model_monitoring/constants.py +127 -46
  42. mlrun/common/schemas/model_monitoring/grafana.py +18 -12
  43. mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
  44. mlrun/common/schemas/notification.py +24 -3
  45. mlrun/common/schemas/object.py +1 -1
  46. mlrun/common/schemas/pagination.py +4 -4
  47. mlrun/common/schemas/partition.py +142 -0
  48. mlrun/common/schemas/pipeline.py +3 -3
  49. mlrun/common/schemas/project.py +26 -18
  50. mlrun/common/schemas/runs.py +3 -3
  51. mlrun/common/schemas/runtime_resource.py +5 -5
  52. mlrun/common/schemas/schedule.py +1 -1
  53. mlrun/common/schemas/secret.py +1 -1
  54. mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
  55. mlrun/common/schemas/tag.py +3 -3
  56. mlrun/common/schemas/workflow.py +6 -5
  57. mlrun/common/types.py +1 -0
  58. mlrun/config.py +157 -89
  59. mlrun/data_types/__init__.py +5 -3
  60. mlrun/data_types/infer.py +13 -3
  61. mlrun/data_types/spark.py +2 -1
  62. mlrun/datastore/__init__.py +59 -18
  63. mlrun/datastore/alibaba_oss.py +4 -1
  64. mlrun/datastore/azure_blob.py +4 -1
  65. mlrun/datastore/base.py +19 -24
  66. mlrun/datastore/datastore.py +10 -4
  67. mlrun/datastore/datastore_profile.py +178 -45
  68. mlrun/datastore/dbfs_store.py +4 -1
  69. mlrun/datastore/filestore.py +4 -1
  70. mlrun/datastore/google_cloud_storage.py +4 -1
  71. mlrun/datastore/hdfs.py +4 -1
  72. mlrun/datastore/inmem.py +4 -1
  73. mlrun/datastore/redis.py +4 -1
  74. mlrun/datastore/s3.py +14 -3
  75. mlrun/datastore/sources.py +89 -92
  76. mlrun/datastore/store_resources.py +7 -4
  77. mlrun/datastore/storeytargets.py +51 -16
  78. mlrun/datastore/targets.py +38 -31
  79. mlrun/datastore/utils.py +87 -4
  80. mlrun/datastore/v3io.py +4 -1
  81. mlrun/datastore/vectorstore.py +291 -0
  82. mlrun/datastore/wasbfs/fs.py +13 -12
  83. mlrun/db/base.py +286 -100
  84. mlrun/db/httpdb.py +1562 -490
  85. mlrun/db/nopdb.py +250 -83
  86. mlrun/errors.py +6 -2
  87. mlrun/execution.py +194 -50
  88. mlrun/feature_store/__init__.py +2 -10
  89. mlrun/feature_store/api.py +20 -458
  90. mlrun/feature_store/common.py +9 -9
  91. mlrun/feature_store/feature_set.py +20 -18
  92. mlrun/feature_store/feature_vector.py +105 -479
  93. mlrun/feature_store/feature_vector_utils.py +466 -0
  94. mlrun/feature_store/retrieval/base.py +15 -11
  95. mlrun/feature_store/retrieval/job.py +2 -1
  96. mlrun/feature_store/retrieval/storey_merger.py +1 -1
  97. mlrun/feature_store/steps.py +3 -3
  98. mlrun/features.py +30 -13
  99. mlrun/frameworks/__init__.py +1 -2
  100. mlrun/frameworks/_common/__init__.py +1 -2
  101. mlrun/frameworks/_common/artifacts_library.py +2 -2
  102. mlrun/frameworks/_common/mlrun_interface.py +10 -6
  103. mlrun/frameworks/_common/model_handler.py +31 -31
  104. mlrun/frameworks/_common/producer.py +3 -1
  105. mlrun/frameworks/_dl_common/__init__.py +1 -2
  106. mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
  107. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
  108. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
  109. mlrun/frameworks/_ml_common/__init__.py +1 -2
  110. mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
  111. mlrun/frameworks/_ml_common/model_handler.py +21 -21
  112. mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
  113. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
  114. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  115. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  116. mlrun/frameworks/auto_mlrun/__init__.py +1 -2
  117. mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
  118. mlrun/frameworks/huggingface/__init__.py +1 -2
  119. mlrun/frameworks/huggingface/model_server.py +9 -9
  120. mlrun/frameworks/lgbm/__init__.py +47 -44
  121. mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
  122. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
  123. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
  124. mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
  125. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
  126. mlrun/frameworks/lgbm/model_handler.py +15 -11
  127. mlrun/frameworks/lgbm/model_server.py +11 -7
  128. mlrun/frameworks/lgbm/utils.py +2 -2
  129. mlrun/frameworks/onnx/__init__.py +1 -2
  130. mlrun/frameworks/onnx/dataset.py +3 -3
  131. mlrun/frameworks/onnx/mlrun_interface.py +2 -2
  132. mlrun/frameworks/onnx/model_handler.py +7 -5
  133. mlrun/frameworks/onnx/model_server.py +8 -6
  134. mlrun/frameworks/parallel_coordinates.py +11 -11
  135. mlrun/frameworks/pytorch/__init__.py +22 -23
  136. mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
  137. mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
  138. mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
  139. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
  140. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
  141. mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
  142. mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
  143. mlrun/frameworks/pytorch/model_handler.py +21 -17
  144. mlrun/frameworks/pytorch/model_server.py +13 -9
  145. mlrun/frameworks/sklearn/__init__.py +19 -18
  146. mlrun/frameworks/sklearn/estimator.py +2 -2
  147. mlrun/frameworks/sklearn/metric.py +3 -3
  148. mlrun/frameworks/sklearn/metrics_library.py +8 -6
  149. mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
  150. mlrun/frameworks/sklearn/model_handler.py +4 -3
  151. mlrun/frameworks/tf_keras/__init__.py +11 -12
  152. mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
  153. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
  154. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
  155. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
  156. mlrun/frameworks/tf_keras/model_handler.py +17 -13
  157. mlrun/frameworks/tf_keras/model_server.py +12 -8
  158. mlrun/frameworks/xgboost/__init__.py +19 -18
  159. mlrun/frameworks/xgboost/model_handler.py +13 -9
  160. mlrun/k8s_utils.py +2 -5
  161. mlrun/launcher/base.py +3 -4
  162. mlrun/launcher/client.py +2 -2
  163. mlrun/launcher/local.py +6 -2
  164. mlrun/launcher/remote.py +1 -1
  165. mlrun/lists.py +8 -4
  166. mlrun/model.py +132 -46
  167. mlrun/model_monitoring/__init__.py +3 -5
  168. mlrun/model_monitoring/api.py +113 -98
  169. mlrun/model_monitoring/applications/__init__.py +0 -5
  170. mlrun/model_monitoring/applications/_application_steps.py +81 -50
  171. mlrun/model_monitoring/applications/base.py +467 -14
  172. mlrun/model_monitoring/applications/context.py +212 -134
  173. mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
  174. mlrun/model_monitoring/applications/evidently/base.py +146 -0
  175. mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
  176. mlrun/model_monitoring/applications/results.py +67 -15
  177. mlrun/model_monitoring/controller.py +701 -315
  178. mlrun/model_monitoring/db/__init__.py +0 -2
  179. mlrun/model_monitoring/db/_schedules.py +242 -0
  180. mlrun/model_monitoring/db/_stats.py +189 -0
  181. mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
  182. mlrun/model_monitoring/db/tsdb/base.py +243 -49
  183. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
  184. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
  185. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
  187. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
  188. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
  189. mlrun/model_monitoring/helpers.py +356 -114
  190. mlrun/model_monitoring/stream_processing.py +190 -345
  191. mlrun/model_monitoring/tracking_policy.py +11 -4
  192. mlrun/model_monitoring/writer.py +49 -90
  193. mlrun/package/__init__.py +3 -6
  194. mlrun/package/context_handler.py +2 -2
  195. mlrun/package/packager.py +12 -9
  196. mlrun/package/packagers/__init__.py +0 -2
  197. mlrun/package/packagers/default_packager.py +14 -11
  198. mlrun/package/packagers/numpy_packagers.py +16 -7
  199. mlrun/package/packagers/pandas_packagers.py +18 -18
  200. mlrun/package/packagers/python_standard_library_packagers.py +25 -11
  201. mlrun/package/packagers_manager.py +35 -32
  202. mlrun/package/utils/__init__.py +0 -3
  203. mlrun/package/utils/_pickler.py +6 -6
  204. mlrun/platforms/__init__.py +47 -16
  205. mlrun/platforms/iguazio.py +4 -1
  206. mlrun/projects/operations.py +30 -30
  207. mlrun/projects/pipelines.py +116 -47
  208. mlrun/projects/project.py +1292 -329
  209. mlrun/render.py +5 -9
  210. mlrun/run.py +57 -14
  211. mlrun/runtimes/__init__.py +1 -3
  212. mlrun/runtimes/base.py +30 -22
  213. mlrun/runtimes/daskjob.py +9 -9
  214. mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
  215. mlrun/runtimes/function_reference.py +5 -2
  216. mlrun/runtimes/generators.py +3 -2
  217. mlrun/runtimes/kubejob.py +6 -7
  218. mlrun/runtimes/mounts.py +574 -0
  219. mlrun/runtimes/mpijob/__init__.py +0 -2
  220. mlrun/runtimes/mpijob/abstract.py +7 -6
  221. mlrun/runtimes/nuclio/api_gateway.py +7 -7
  222. mlrun/runtimes/nuclio/application/application.py +11 -13
  223. mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
  224. mlrun/runtimes/nuclio/function.py +127 -70
  225. mlrun/runtimes/nuclio/serving.py +105 -37
  226. mlrun/runtimes/pod.py +159 -54
  227. mlrun/runtimes/remotesparkjob.py +3 -2
  228. mlrun/runtimes/sparkjob/__init__.py +0 -2
  229. mlrun/runtimes/sparkjob/spark3job.py +22 -12
  230. mlrun/runtimes/utils.py +7 -6
  231. mlrun/secrets.py +2 -2
  232. mlrun/serving/__init__.py +8 -0
  233. mlrun/serving/merger.py +7 -5
  234. mlrun/serving/remote.py +35 -22
  235. mlrun/serving/routers.py +186 -240
  236. mlrun/serving/server.py +41 -10
  237. mlrun/serving/states.py +432 -118
  238. mlrun/serving/utils.py +13 -2
  239. mlrun/serving/v1_serving.py +3 -2
  240. mlrun/serving/v2_serving.py +161 -203
  241. mlrun/track/__init__.py +1 -1
  242. mlrun/track/tracker.py +2 -2
  243. mlrun/track/trackers/mlflow_tracker.py +6 -5
  244. mlrun/utils/async_http.py +35 -22
  245. mlrun/utils/clones.py +7 -4
  246. mlrun/utils/helpers.py +511 -58
  247. mlrun/utils/logger.py +119 -13
  248. mlrun/utils/notifications/notification/__init__.py +22 -19
  249. mlrun/utils/notifications/notification/base.py +39 -15
  250. mlrun/utils/notifications/notification/console.py +6 -6
  251. mlrun/utils/notifications/notification/git.py +11 -11
  252. mlrun/utils/notifications/notification/ipython.py +10 -9
  253. mlrun/utils/notifications/notification/mail.py +176 -0
  254. mlrun/utils/notifications/notification/slack.py +16 -8
  255. mlrun/utils/notifications/notification/webhook.py +24 -8
  256. mlrun/utils/notifications/notification_pusher.py +191 -200
  257. mlrun/utils/regex.py +12 -2
  258. mlrun/utils/version/version.json +2 -2
  259. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
  260. mlrun-1.8.0.dist-info/RECORD +351 -0
  261. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
  262. mlrun/model_monitoring/applications/evidently_base.py +0 -137
  263. mlrun/model_monitoring/db/stores/__init__.py +0 -136
  264. mlrun/model_monitoring/db/stores/base/store.py +0 -213
  265. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
  266. mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
  267. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
  268. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
  269. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
  270. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
  271. mlrun/model_monitoring/model_endpoint.py +0 -118
  272. mlrun-1.7.2rc3.dist-info/RECORD +0 -351
  273. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
  274. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
  275. {mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
mlrun/serving/utils.py CHANGED
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  import inspect
16
+ from typing import Optional
16
17
 
17
18
  from mlrun.utils import get_in, update_in
18
19
 
@@ -55,7 +56,12 @@ class StepToDict:
55
56
  "kwargs",
56
57
  ]
57
58
 
58
- def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
59
+ def to_dict(
60
+ self,
61
+ fields: Optional[list] = None,
62
+ exclude: Optional[list] = None,
63
+ strip: bool = False,
64
+ ):
59
65
  """convert the step object to a python dictionary"""
60
66
  fields = fields or getattr(self, "_dict_fields", None)
61
67
  if not fields:
@@ -105,5 +111,10 @@ class MonitoringApplicationToDict(StepToDict):
105
111
  class RouterToDict(StepToDict):
106
112
  _STEP_KIND = "router"
107
113
 
108
- def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
114
+ def to_dict(
115
+ self,
116
+ fields: Optional[list] = None,
117
+ exclude: Optional[list] = None,
118
+ strip: bool = False,
119
+ ):
109
120
  return super().to_dict(exclude=["routes"], strip=strip)
@@ -18,6 +18,7 @@ import socket
18
18
  from copy import deepcopy
19
19
  from datetime import datetime
20
20
  from io import BytesIO
21
+ from typing import Optional
21
22
  from urllib.request import urlopen
22
23
 
23
24
  import nuclio
@@ -33,7 +34,7 @@ serving_handler = "handler"
33
34
  def new_v1_model_server(
34
35
  name,
35
36
  model_class: str,
36
- models: dict = None,
37
+ models: Optional[dict] = None,
37
38
  filename="",
38
39
  protocol="",
39
40
  image="",
@@ -68,7 +69,7 @@ def new_v1_model_server(
68
69
 
69
70
 
70
71
  class MLModelServer:
71
- def __init__(self, name: str, model_dir: str = None, model=None):
72
+ def __init__(self, name: str, model_dir: Optional[str] = None, model=None):
72
73
  self.name = name
73
74
  self.ready = False
74
75
  self.model_dir = model_dir
@@ -12,20 +12,18 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import random
15
16
  import threading
16
17
  import time
17
18
  import traceback
18
- from typing import Optional, Union
19
+ from typing import Optional
19
20
 
20
21
  import mlrun.artifacts
21
22
  import mlrun.common.model_monitoring.helpers
22
23
  import mlrun.common.schemas.model_monitoring
23
24
  import mlrun.model_monitoring
24
- from mlrun.errors import err_to_str
25
25
  from mlrun.utils import logger, now_date
26
26
 
27
- from ..common.helpers import parse_versioned_object_uri
28
- from .server import GraphServer
29
27
  from .utils import StepToDict, _extract_input_data, _update_result_body
30
28
 
31
29
 
@@ -33,12 +31,12 @@ class V2ModelServer(StepToDict):
33
31
  def __init__(
34
32
  self,
35
33
  context=None,
36
- name: str = None,
37
- model_path: str = None,
34
+ name: Optional[str] = None,
35
+ model_path: Optional[str] = None,
38
36
  model=None,
39
37
  protocol=None,
40
- input_path: str = None,
41
- result_path: str = None,
38
+ input_path: Optional[str] = None,
39
+ result_path: Optional[str] = None,
42
40
  shard_by_endpoint: Optional[bool] = None,
43
41
  **kwargs,
44
42
  ):
@@ -97,9 +95,6 @@ class V2ModelServer(StepToDict):
97
95
  :param kwargs: extra arguments (can be accessed using self.get_param(key))
98
96
  """
99
97
  self.name = name
100
- self.version = ""
101
- if name and ":" in name:
102
- self.name, self.version = name.split(":", 1)
103
98
  self.context = context
104
99
  self.ready = False
105
100
  self.error = ""
@@ -110,21 +105,17 @@ class V2ModelServer(StepToDict):
110
105
  self._result_path = result_path
111
106
  self._kwargs = kwargs # for to_dict()
112
107
  self._params = kwargs
113
- self._model_logger = (
114
- _ModelLogPusher(self, context)
115
- if context and context.stream.enabled
116
- else None
117
- )
118
-
119
108
  self.metrics = {}
120
109
  self.labels = {}
121
110
  self.model = None
122
111
  if model:
123
112
  self.model = model
124
113
  self.ready = True
125
- self._versioned_model_name = None
126
- self.model_endpoint_uid = None
114
+ self.model_endpoint_uid = kwargs.get("model_endpoint_uid", None)
127
115
  self.shard_by_endpoint = shard_by_endpoint
116
+ self._model_logger = None
117
+ self.initialized = False
118
+ self.output_schema = kwargs.get("outputs", [])
128
119
 
129
120
  def _load_and_update_state(self):
130
121
  try:
@@ -136,7 +127,7 @@ class V2ModelServer(StepToDict):
136
127
  self.ready = True
137
128
  self.context.logger.info(f"model {self.name} was loaded")
138
129
 
139
- def post_init(self, mode="sync"):
130
+ def post_init(self, mode="sync", **kwargs):
140
131
  """sync/async model loading, for internal use"""
141
132
  if not self.ready:
142
133
  if mode == "async":
@@ -146,17 +137,35 @@ class V2ModelServer(StepToDict):
146
137
  else:
147
138
  self._load_and_update_state()
148
139
 
149
- server = getattr(self.context, "_server", None) or getattr(
150
- self.context, "server", None
151
- )
152
- if not server:
153
- logger.warn("GraphServer not initialized for VotingEnsemble instance")
154
- return
140
+ if self.ready and not self.context.is_mock and not self.model_spec:
141
+ self.get_model()
155
142
 
156
- if not self.context.is_mock or self.context.monitoring_mock:
157
- self.model_endpoint_uid = _init_endpoint_record(
158
- graph_server=server, model=self
159
- )
143
+ if self.model_spec:
144
+ self.output_schema = self.output_schema or [
145
+ feature.name for feature in self.model_spec.outputs
146
+ ]
147
+
148
+ if (
149
+ kwargs.get("endpoint_type", mlrun.common.schemas.EndpointType.LEAF_EP)
150
+ == mlrun.common.schemas.EndpointType.NODE_EP
151
+ ):
152
+ self._initialize_model_logger()
153
+
154
+ def _lazy_init(self, event):
155
+ if event and isinstance(event, dict) and not self.initialized:
156
+ background_task_state = event.get("background_task_state", None)
157
+ if (
158
+ background_task_state
159
+ == mlrun.common.schemas.BackgroundTaskState.succeeded
160
+ ):
161
+ self._model_logger = (
162
+ _ModelLogPusher(self, self.context)
163
+ if self.context
164
+ and self.context.stream.enabled
165
+ and self.model_endpoint_uid
166
+ else None
167
+ )
168
+ self.initialized = True
160
169
 
161
170
  def get_param(self, key: str, default=None):
162
171
  """get param by key (specified in the model or the function)"""
@@ -198,13 +207,15 @@ class V2ModelServer(StepToDict):
198
207
  extra dataitems dictionary
199
208
 
200
209
  """
201
- model_file, self.model_spec, extra_dataitems = mlrun.artifacts.get_model(
202
- self.model_path, suffix
203
- )
204
- if self.model_spec and self.model_spec.parameters:
205
- for key, value in self.model_spec.parameters.items():
206
- self._params[key] = value
207
- return model_file, extra_dataitems
210
+ if self.model_path:
211
+ model_file, self.model_spec, extra_dataitems = mlrun.artifacts.get_model(
212
+ self.model_path, suffix
213
+ )
214
+ if self.model_spec and self.model_spec.parameters:
215
+ for key, value in self.model_spec.parameters.items():
216
+ self._params[key] = value
217
+ return model_file, extra_dataitems
218
+ return None, None
208
219
 
209
220
  def load(self):
210
221
  """model loading function, see also .get_model() method"""
@@ -230,25 +241,10 @@ class V2ModelServer(StepToDict):
230
241
  request = self.preprocess(event_body, op)
231
242
  return self.validate(request, op)
232
243
 
233
- @property
234
- def versioned_model_name(self):
235
- if self._versioned_model_name:
236
- return self._versioned_model_name
237
-
238
- # Generating version model value based on the model name and model version
239
- if self.model_path and self.model_path.startswith("store://"):
240
- # Enrich the model server with the model artifact metadata
241
- self.get_model()
242
- if not self.version:
243
- # Enrich the model version with the model artifact tag
244
- self.version = self.model_spec.tag
245
- self.labels = self.model_spec.labels
246
- version = self.version or "latest"
247
- self._versioned_model_name = f"{self.name}:{version}"
248
- return self._versioned_model_name
249
-
250
244
  def do_event(self, event, *args, **kwargs):
251
245
  """main model event handler method"""
246
+ if not self.initialized:
247
+ self._lazy_init(event.body)
252
248
  start = now_date()
253
249
  original_body = event.body
254
250
  event_body = _extract_input_data(self._input_path, event.body)
@@ -289,13 +285,12 @@ class V2ModelServer(StepToDict):
289
285
 
290
286
  response = {
291
287
  "id": event_id,
292
- "model_name": self.name,
288
+ "model_name": self.name.split(":")[0],
293
289
  "outputs": outputs,
294
290
  "timestamp": start.isoformat(sep=" ", timespec="microseconds"),
295
291
  }
296
- if self.version:
297
- response["model_version"] = self.version
298
-
292
+ if self.model_endpoint_uid:
293
+ response["model_endpoint_uid"] = self.model_endpoint_uid
299
294
  elif op == "ready" and event.method == "GET":
300
295
  # get model health operation
301
296
  setattr(event, "terminated", True)
@@ -320,8 +315,8 @@ class V2ModelServer(StepToDict):
320
315
  # get model metadata operation
321
316
  setattr(event, "terminated", True)
322
317
  event_body = {
323
- "name": self.name,
324
- "version": self.version or "",
318
+ "name": self.name.split(":")[0],
319
+ "model_endpoint_uid": self.model_endpoint_uid or "",
325
320
  "inputs": [],
326
321
  "outputs": [],
327
322
  }
@@ -355,8 +350,8 @@ class V2ModelServer(StepToDict):
355
350
  "model_name": self.name,
356
351
  "outputs": outputs,
357
352
  }
358
- if self.version:
359
- response["model_version"] = self.version
353
+ if self.model_endpoint_uid:
354
+ response["model_endpoint_uid"] = self.model_endpoint_uid
360
355
 
361
356
  elif hasattr(self, "op_" + op):
362
357
  # custom operation (child methods starting with "op_")
@@ -472,30 +467,70 @@ class V2ModelServer(StepToDict):
472
467
  request["inputs"] = new_inputs
473
468
  return request
474
469
 
470
+ def _initialize_model_logger(self):
471
+ server: mlrun.serving.GraphServer = getattr(
472
+ self.context, "_server", None
473
+ ) or getattr(self.context, "server", None)
474
+ if not self.context.is_mock or self.context.monitoring_mock:
475
+ if server.model_endpoint_creation_task_name:
476
+ background_task = mlrun.get_run_db().get_project_background_task(
477
+ server.project, server.model_endpoint_creation_task_name
478
+ )
479
+ logger.debug(
480
+ "Checking model endpoint creation task status",
481
+ task_name=server.model_endpoint_creation_task_name,
482
+ )
483
+ if (
484
+ background_task.status.state
485
+ in mlrun.common.schemas.BackgroundTaskState.terminal_states()
486
+ ):
487
+ logger.debug(
488
+ f"Model endpoint creation task completed with state {background_task.status.state}"
489
+ )
490
+ if (
491
+ background_task.status.state
492
+ == mlrun.common.schemas.BackgroundTaskState.succeeded
493
+ ):
494
+ self._model_logger = (
495
+ _ModelLogPusher(self, self.context)
496
+ if self.context
497
+ and self.context.stream.enabled
498
+ and self.model_endpoint_uid
499
+ else None
500
+ )
501
+ self.initialized = True
502
+
503
+ else: # in progress
504
+ logger.debug(
505
+ f"Model endpoint creation task is still in progress with the current state: "
506
+ f"{background_task.status.state}.",
507
+ name=self.name,
508
+ )
509
+ else:
510
+ logger.debug(
511
+ "Model endpoint creation task name not provided",
512
+ )
513
+
475
514
 
476
515
  class _ModelLogPusher:
477
- def __init__(self, model, context, output_stream=None):
516
+ def __init__(self, model: V2ModelServer, context, output_stream=None):
478
517
  self.model = model
479
518
  self.verbose = context.verbose
480
519
  self.hostname = context.stream.hostname
481
520
  self.function_uri = context.stream.function_uri
482
- self.stream_path = context.stream.stream_uri
483
- self.stream_batch = int(context.get_param("log_stream_batch", 1))
484
- self.stream_sample = int(context.get_param("log_stream_sample", 1))
521
+ self.sampling_percentage = float(context.get_param("sampling_percentage", 100))
485
522
  self.output_stream = output_stream or context.stream.output_stream
486
523
  self._worker = context.worker_id
487
- self._sample_iter = 0
488
- self._batch_iter = 0
489
- self._batch = []
490
524
 
491
525
  def base_data(self):
492
526
  base_data = {
493
527
  "class": self.model.__class__.__name__,
494
528
  "worker": self._worker,
495
529
  "model": self.model.name,
496
- "version": self.model.version,
497
530
  "host": self.hostname,
498
531
  "function_uri": self.function_uri,
532
+ "endpoint_id": self.model.model_endpoint_uid,
533
+ "sampling_percentage": self.sampling_percentage,
499
534
  }
500
535
  if getattr(self.model, "labels", None):
501
536
  base_data["labels"] = self.model.labels
@@ -515,143 +550,66 @@ class _ModelLogPusher:
515
550
  self.output_stream.push([data], partition_key=partition_key)
516
551
  return
517
552
 
518
- self._sample_iter = (self._sample_iter + 1) % self.stream_sample
519
- if self.output_stream and self._sample_iter == 0:
553
+ if self.output_stream:
554
+ # Ensure that the inputs are a list of lists
555
+ request["inputs"] = (
556
+ request["inputs"]
557
+ if not any(not isinstance(req, list) for req in request["inputs"])
558
+ else [request["inputs"]]
559
+ )
520
560
  microsec = (now_date() - start).microseconds
521
561
 
522
- if self.stream_batch > 1:
523
- if self._batch_iter == 0:
524
- self._batch = []
525
- self._batch.append(
526
- [request, op, resp, str(start), microsec, self.model.metrics]
562
+ if self.sampling_percentage != 100:
563
+ # Randomly select a subset of the requests based on the percentage
564
+ num_of_inputs = len(request["inputs"])
565
+ sampled_requests_indices = self._pick_random_requests(
566
+ num_of_inputs, self.sampling_percentage
527
567
  )
528
- self._batch_iter = (self._batch_iter + 1) % self.stream_batch
529
-
530
- if self._batch_iter == 0:
531
- data = self.base_data()
532
- data["headers"] = [
533
- "request",
534
- "op",
535
- "resp",
536
- "when",
537
- "microsec",
538
- "metrics",
568
+ if not sampled_requests_indices:
569
+ # No events were selected for sampling
570
+ return
571
+
572
+ request["inputs"] = [
573
+ request["inputs"][i] for i in sampled_requests_indices
574
+ ]
575
+
576
+ if resp and "outputs" in resp and isinstance(resp["outputs"], list):
577
+ resp["outputs"] = [
578
+ resp["outputs"][i] for i in sampled_requests_indices
539
579
  ]
540
- data["values"] = self._batch
541
- self.output_stream.push([data], partition_key=partition_key)
542
- else:
543
- data = self.base_data()
544
- data["request"] = request
545
- data["op"] = op
546
- data["resp"] = resp
547
- data["when"] = start_str
548
- data["microsec"] = microsec
549
- if getattr(self.model, "metrics", None):
550
- data["metrics"] = self.model.metrics
551
- self.output_stream.push([data], partition_key=partition_key)
552
-
553
-
554
- def _init_endpoint_record(
555
- graph_server: GraphServer, model: V2ModelServer
556
- ) -> Union[str, None]:
557
- """
558
- Initialize model endpoint record and write it into the DB. In general, this method retrieve the unique model
559
- endpoint ID which is generated according to the function uri and the model version. If the model endpoint is
560
- already exist in the DB, we skip the creation process. Otherwise, it writes the new model endpoint record to the DB.
561
-
562
- :param graph_server: A GraphServer object which will be used for getting the function uri.
563
- :param model: Base model serving class (v2). It contains important details for the model endpoint record
564
- such as model name, model path, and model version.
565
-
566
- :return: Model endpoint unique ID.
567
- """
568
-
569
- logger.info("Initializing endpoint records")
570
-
571
- # Generate required values for the model endpoint record
572
- try:
573
- # Getting project name from the function uri
574
- project, uri, tag, hash_key = parse_versioned_object_uri(
575
- graph_server.function_uri
576
- )
577
- except Exception as e:
578
- logger.error("Failed to parse function URI", exc=err_to_str(e))
579
- return None
580
-
581
- # Generating model endpoint ID based on function uri and model version
582
- uid = mlrun.common.model_monitoring.create_model_endpoint_uid(
583
- function_uri=graph_server.function_uri,
584
- versioned_model=model.versioned_model_name,
585
- ).uid
586
-
587
- try:
588
- model_ep = mlrun.get_run_db().get_model_endpoint(
589
- project=project, endpoint_id=uid
590
- )
591
- except mlrun.errors.MLRunNotFoundError:
592
- model_ep = None
593
- except mlrun.errors.MLRunBadRequestError as err:
594
- logger.info(
595
- "Cannot get the model endpoints store", err=mlrun.errors.err_to_str(err)
596
- )
597
- return
598
-
599
- if model.context.server.track_models and not model_ep:
600
- logger.info("Creating a new model endpoint record", endpoint_id=uid)
601
- model_endpoint = mlrun.common.schemas.ModelEndpoint(
602
- metadata=mlrun.common.schemas.ModelEndpointMetadata(
603
- project=project, labels=model.labels, uid=uid
604
- ),
605
- spec=mlrun.common.schemas.ModelEndpointSpec(
606
- function_uri=graph_server.function_uri,
607
- model=model.versioned_model_name,
608
- model_class=model.__class__.__name__,
609
- model_uri=model.model_path,
610
- stream_path=model.context.stream.stream_uri,
611
- active=True,
612
- monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
613
- ),
614
- status=mlrun.common.schemas.ModelEndpointStatus(
615
- endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP
616
- ),
617
- )
580
+ if self.model.output_schema and len(self.model.output_schema) != len(
581
+ resp["outputs"][0]
582
+ ):
583
+ logger.info(
584
+ "The number of outputs returned by the model does not match the number of outputs "
585
+ "specified in the model endpoint.",
586
+ model_endpoint=self.model.name,
587
+ model_endpoint_id=self.model.model_endpoint_uid,
588
+ output_len=len(resp["outputs"][0]),
589
+ schema_len=len(self.model.output_schema),
590
+ )
618
591
 
619
- db = mlrun.get_run_db()
620
- db.create_model_endpoint(
621
- project=project,
622
- endpoint_id=uid,
623
- model_endpoint=model_endpoint.dict(),
624
- )
592
+ data = self.base_data()
593
+ data["request"] = request
594
+ data["op"] = op
595
+ data["resp"] = resp
596
+ data["when"] = start_str
597
+ data["microsec"] = microsec
598
+ if getattr(self.model, "metrics", None):
599
+ data["metrics"] = self.model.metrics
600
+ data["effective_sample_count"] = len(request["inputs"])
601
+ self.output_stream.push([data], partition_key=partition_key)
625
602
 
626
- elif model_ep:
627
- attributes = {}
628
- old_model_uri = model_ep.spec.model_uri
629
- mlrun.model_monitoring.helpers.enrich_model_endpoint_with_model_uri(
630
- model_endpoint=model_ep,
631
- model_obj=model.model_spec,
632
- )
633
- if model_ep.spec.model_uri != old_model_uri:
634
- attributes["model_uri"] = model_ep.spec.model_uri
635
- if (
636
- model_ep.spec.monitoring_mode
637
- == mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
638
- ) != model.context.server.track_models:
639
- attributes["monitoring_mode"] = (
640
- mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
641
- if model.context.server.track_models
642
- else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
643
- )
644
- if attributes:
645
- db = mlrun.get_run_db()
646
- db.patch_model_endpoint(
647
- project=project,
648
- endpoint_id=uid,
649
- attributes=attributes,
650
- )
651
- logger.info(
652
- "Updating model endpoint attributes",
653
- attributes=attributes,
654
- endpoint_id=uid,
655
- )
603
+ @staticmethod
604
+ def _pick_random_requests(num_of_reqs: int, percentage: float) -> list[int]:
605
+ """
606
+ Randomly selects indices of requests to sample based on the given percentage
607
+
608
+ :param num_of_reqs: Number of requests to select from
609
+ :param percentage: Sample percentage for each request
610
+ :return: A list containing the indices of the selected requests
611
+ """
656
612
 
657
- return uid
613
+ return [
614
+ req for req in range(num_of_reqs) if random.random() < (percentage / 100)
615
+ ]
mlrun/track/__init__.py CHANGED
@@ -11,6 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- # flake8: noqa - this is until we take care of the F401 violations with respect to __all__ & sphinx
14
+
15
15
  from mlrun.track.tracker import Tracker
16
16
  from mlrun.track.tracker_manager import TrackerManager
mlrun/track/tracker.py CHANGED
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from abc import ABC, abstractmethod
16
- from typing import Any, Union
16
+ from typing import Any, Optional, Union
17
17
 
18
18
  from mlrun.artifacts import Artifact, ModelArtifact
19
19
  from mlrun.execution import MLClientCtx
@@ -63,7 +63,7 @@ class Tracker(ABC):
63
63
  project: MlrunProject,
64
64
  reference_id: Any,
65
65
  function_name: str,
66
- handler: str = None,
66
+ handler: Optional[str] = None,
67
67
  **kwargs,
68
68
  ) -> RunObject:
69
69
  """
@@ -15,6 +15,7 @@ import os
15
15
  import pathlib
16
16
  import tempfile
17
17
  import zipfile
18
+ from typing import Optional
18
19
 
19
20
  import mlflow
20
21
  import mlflow.entities
@@ -190,7 +191,7 @@ class MLFlowTracker(Tracker):
190
191
  project: MlrunProject,
191
192
  reference_id: str,
192
193
  function_name: str,
193
- handler: str = None,
194
+ handler: Optional[str] = None,
194
195
  **kwargs,
195
196
  ) -> RunObject:
196
197
  """
@@ -251,9 +252,9 @@ class MLFlowTracker(Tracker):
251
252
  self,
252
253
  project: MlrunProject,
253
254
  reference_id: str,
254
- key: str = None,
255
- metrics: dict = None,
256
- extra_data: dict = None,
255
+ key: Optional[str] = None,
256
+ metrics: Optional[dict] = None,
257
+ extra_data: Optional[dict] = None,
257
258
  ) -> ModelArtifact:
258
259
  """
259
260
  Import a model from MLFlow to MLRun.
@@ -290,7 +291,7 @@ class MLFlowTracker(Tracker):
290
291
  return model
291
292
 
292
293
  def import_artifact(
293
- self, project: MlrunProject, reference_id: str, key: str = None
294
+ self, project: MlrunProject, reference_id: str, key: Optional[str] = None
294
295
  ) -> Artifact:
295
296
  """
296
297
  Import an artifact from MLFlow to MLRun.