mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (234) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -2
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +21 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +113 -2
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +11 -0
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +224 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +374 -102
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +231 -22
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +864 -228
  77. mlrun/db/nopdb.py +268 -16
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1125 -414
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +207 -180
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +40 -14
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/api_gateway.py +646 -177
  178. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  179. mlrun/runtimes/nuclio/application/application.py +758 -0
  180. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  181. mlrun/runtimes/nuclio/function.py +188 -68
  182. mlrun/runtimes/nuclio/serving.py +57 -60
  183. mlrun/runtimes/pod.py +191 -58
  184. mlrun/runtimes/remotesparkjob.py +11 -8
  185. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  186. mlrun/runtimes/utils.py +40 -73
  187. mlrun/secrets.py +6 -2
  188. mlrun/serving/__init__.py +8 -1
  189. mlrun/serving/remote.py +2 -3
  190. mlrun/serving/routers.py +89 -64
  191. mlrun/serving/server.py +54 -26
  192. mlrun/serving/states.py +187 -56
  193. mlrun/serving/utils.py +19 -11
  194. mlrun/serving/v2_serving.py +136 -63
  195. mlrun/track/tracker.py +2 -1
  196. mlrun/track/trackers/mlflow_tracker.py +5 -0
  197. mlrun/utils/async_http.py +26 -6
  198. mlrun/utils/db.py +18 -0
  199. mlrun/utils/helpers.py +375 -105
  200. mlrun/utils/http.py +2 -2
  201. mlrun/utils/logger.py +75 -9
  202. mlrun/utils/notifications/notification/__init__.py +14 -10
  203. mlrun/utils/notifications/notification/base.py +48 -0
  204. mlrun/utils/notifications/notification/console.py +2 -0
  205. mlrun/utils/notifications/notification/git.py +24 -1
  206. mlrun/utils/notifications/notification/ipython.py +2 -0
  207. mlrun/utils/notifications/notification/slack.py +96 -21
  208. mlrun/utils/notifications/notification/webhook.py +63 -2
  209. mlrun/utils/notifications/notification_pusher.py +146 -16
  210. mlrun/utils/regex.py +9 -0
  211. mlrun/utils/retryer.py +3 -2
  212. mlrun/utils/v3io_clients.py +2 -3
  213. mlrun/utils/version/version.json +2 -2
  214. mlrun-1.7.2.dist-info/METADATA +390 -0
  215. mlrun-1.7.2.dist-info/RECORD +351 -0
  216. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  217. mlrun/feature_store/retrieval/conversion.py +0 -271
  218. mlrun/kfpops.py +0 -868
  219. mlrun/model_monitoring/application.py +0 -310
  220. mlrun/model_monitoring/batch.py +0 -974
  221. mlrun/model_monitoring/controller_handler.py +0 -37
  222. mlrun/model_monitoring/prometheus.py +0 -216
  223. mlrun/model_monitoring/stores/__init__.py +0 -111
  224. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  225. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  226. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  227. mlrun/model_monitoring/stores/models/base.py +0 -84
  228. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  229. mlrun/platforms/other.py +0 -305
  230. mlrun-1.7.0rc5.dist-info/METADATA +0 -269
  231. mlrun-1.7.0rc5.dist-info/RECORD +0 -323
  232. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  233. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  234. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
@@ -15,12 +15,13 @@
15
15
  import threading
16
16
  import time
17
17
  import traceback
18
- from typing import Union
18
+ from typing import Optional, Union
19
19
 
20
- import mlrun.common.model_monitoring
20
+ import mlrun.artifacts
21
+ import mlrun.common.model_monitoring.helpers
21
22
  import mlrun.common.schemas.model_monitoring
22
- from mlrun.artifacts import ModelArtifact # noqa: F401
23
- from mlrun.config import config
23
+ import mlrun.model_monitoring
24
+ from mlrun.errors import err_to_str
24
25
  from mlrun.utils import logger, now_date
25
26
 
26
27
  from ..common.helpers import parse_versioned_object_uri
@@ -38,6 +39,7 @@ class V2ModelServer(StepToDict):
38
39
  protocol=None,
39
40
  input_path: str = None,
40
41
  result_path: str = None,
42
+ shard_by_endpoint: Optional[bool] = None,
41
43
  **kwargs,
42
44
  ):
43
45
  """base model serving class (v2), using similar API to KFServing v2 and Triton
@@ -62,11 +64,11 @@ class V2ModelServer(StepToDict):
62
64
  class MyClass(V2ModelServer):
63
65
  def load(self):
64
66
  # load and initialize the model and/or other elements
65
- model_file, extra_data = self.get_model(suffix='.pkl')
67
+ model_file, extra_data = self.get_model(suffix=".pkl")
66
68
  self.model = load(open(model_file, "rb"))
67
69
 
68
70
  def predict(self, request):
69
- events = np.array(request['inputs'])
71
+ events = np.array(request["inputs"])
70
72
  dmatrix = xgb.DMatrix(events)
71
73
  result: xgb.DMatrix = self.model.predict(dmatrix)
72
74
  return {"outputs": result.tolist()}
@@ -90,6 +92,8 @@ class V2ModelServer(StepToDict):
90
92
  this require that the event body will behave like a dict, example:
91
93
  event: {"x": 5} , result_path="resp" means the returned response will be written
92
94
  to event["y"] resulting in {"x": 5, "resp": <result>}
95
+ :param shard_by_endpoint: whether to use the endpoint as the partition/sharding key when writing to model
96
+ monitoring stream. Defaults to True.
93
97
  :param kwargs: extra arguments (can be accessed using self.get_param(key))
94
98
  """
95
99
  self.name = name
@@ -101,7 +105,7 @@ class V2ModelServer(StepToDict):
101
105
  self.error = ""
102
106
  self.protocol = protocol or "v2"
103
107
  self.model_path = model_path
104
- self.model_spec: mlrun.artifacts.ModelArtifact = None
108
+ self.model_spec: Optional[mlrun.artifacts.ModelArtifact] = None
105
109
  self._input_path = input_path
106
110
  self._result_path = result_path
107
111
  self._kwargs = kwargs # for to_dict()
@@ -118,7 +122,9 @@ class V2ModelServer(StepToDict):
118
122
  if model:
119
123
  self.model = model
120
124
  self.ready = True
125
+ self._versioned_model_name = None
121
126
  self.model_endpoint_uid = None
127
+ self.shard_by_endpoint = shard_by_endpoint
122
128
 
123
129
  def _load_and_update_state(self):
124
130
  try:
@@ -147,7 +153,7 @@ class V2ModelServer(StepToDict):
147
153
  logger.warn("GraphServer not initialized for VotingEnsemble instance")
148
154
  return
149
155
 
150
- if not self.context.is_mock or self.context.server.track_models:
156
+ if not self.context.is_mock or self.context.monitoring_mock:
151
157
  self.model_endpoint_uid = _init_endpoint_record(
152
158
  graph_server=server, model=self
153
159
  )
@@ -175,9 +181,9 @@ class V2ModelServer(StepToDict):
175
181
  ::
176
182
 
177
183
  def load(self):
178
- model_file, extra_data = self.get_model(suffix='.pkl')
184
+ model_file, extra_data = self.get_model(suffix=".pkl")
179
185
  self.model = load(open(model_file, "rb"))
180
- categories = extra_data['categories'].as_df()
186
+ categories = extra_data["categories"].as_df()
181
187
 
182
188
  Parameters
183
189
  ----------
@@ -224,6 +230,23 @@ class V2ModelServer(StepToDict):
224
230
  request = self.preprocess(event_body, op)
225
231
  return self.validate(request, op)
226
232
 
233
+ @property
234
+ def versioned_model_name(self):
235
+ if self._versioned_model_name:
236
+ return self._versioned_model_name
237
+
238
+ # Generating version model value based on the model name and model version
239
+ if self.model_path and self.model_path.startswith("store://"):
240
+ # Enrich the model server with the model artifact metadata
241
+ self.get_model()
242
+ if not self.version:
243
+ # Enrich the model version with the model artifact tag
244
+ self.version = self.model_spec.tag
245
+ self.labels = self.model_spec.labels
246
+ version = self.version or "latest"
247
+ self._versioned_model_name = f"{self.name}:{version}"
248
+ return self._versioned_model_name
249
+
227
250
  def do_event(self, event, *args, **kwargs):
228
251
  """main model event handler method"""
229
252
  start = now_date()
@@ -231,6 +254,11 @@ class V2ModelServer(StepToDict):
231
254
  event_body = _extract_input_data(self._input_path, event.body)
232
255
  event_id = event.id
233
256
  op = event.path.strip("/")
257
+
258
+ partition_key = (
259
+ self.model_endpoint_uid if self.shard_by_endpoint is not False else None
260
+ )
261
+
234
262
  if event_body and isinstance(event_body, dict):
235
263
  op = op or event_body.get("operation")
236
264
  event_id = event_body.get("id", event_id)
@@ -250,13 +278,20 @@ class V2ModelServer(StepToDict):
250
278
  except Exception as exc:
251
279
  request["id"] = event_id
252
280
  if self._model_logger:
253
- self._model_logger.push(start, request, op=op, error=exc)
281
+ self._model_logger.push(
282
+ start,
283
+ request,
284
+ op=op,
285
+ error=exc,
286
+ partition_key=partition_key,
287
+ )
254
288
  raise exc
255
289
 
256
290
  response = {
257
291
  "id": event_id,
258
292
  "model_name": self.name,
259
293
  "outputs": outputs,
294
+ "timestamp": start.isoformat(sep=" ", timespec="microseconds"),
260
295
  }
261
296
  if self.version:
262
297
  response["model_version"] = self.version
@@ -286,7 +321,7 @@ class V2ModelServer(StepToDict):
286
321
  setattr(event, "terminated", True)
287
322
  event_body = {
288
323
  "name": self.name,
289
- "version": self.version,
324
+ "version": self.version or "",
290
325
  "inputs": [],
291
326
  "outputs": [],
292
327
  }
@@ -306,7 +341,13 @@ class V2ModelServer(StepToDict):
306
341
  except Exception as exc:
307
342
  request["id"] = event_id
308
343
  if self._model_logger:
309
- self._model_logger.push(start, request, op=op, error=exc)
344
+ self._model_logger.push(
345
+ start,
346
+ request,
347
+ op=op,
348
+ error=exc,
349
+ partition_key=partition_key,
350
+ )
310
351
  raise exc
311
352
 
312
353
  response = {
@@ -330,11 +371,20 @@ class V2ModelServer(StepToDict):
330
371
  if self._model_logger:
331
372
  inputs, outputs = self.logged_results(request, response, op)
332
373
  if inputs is None and outputs is None:
333
- self._model_logger.push(start, request, response, op)
374
+ self._model_logger.push(
375
+ start, request, response, op, partition_key=partition_key
376
+ )
334
377
  else:
335
378
  track_request = {"id": event_id, "inputs": inputs or []}
336
379
  track_response = {"outputs": outputs or []}
337
- self._model_logger.push(start, track_request, track_response, op)
380
+ # TODO : check dict/list
381
+ self._model_logger.push(
382
+ start,
383
+ track_request,
384
+ track_response,
385
+ op,
386
+ partition_key=partition_key,
387
+ )
338
388
  event.body = _update_result_body(self._result_path, original_body, response)
339
389
  return event
340
390
 
@@ -375,8 +425,10 @@ class V2ModelServer(StepToDict):
375
425
  """postprocess, before returning response"""
376
426
  return request
377
427
 
378
- def predict(self, request: dict) -> dict:
379
- """model prediction operation"""
428
+ def predict(self, request: dict) -> list:
429
+ """model prediction operation
430
+ :return: list with the model prediction results (can be multi-port) or list of lists for multiple predictions
431
+ """
380
432
  raise NotImplementedError()
381
433
 
382
434
  def explain(self, request: dict) -> dict:
@@ -449,7 +501,7 @@ class _ModelLogPusher:
449
501
  base_data["labels"] = self.model.labels
450
502
  return base_data
451
503
 
452
- def push(self, start, request, resp=None, op=None, error=None):
504
+ def push(self, start, request, resp=None, op=None, error=None, partition_key=None):
453
505
  start_str = start.isoformat(sep=" ", timespec="microseconds")
454
506
  if error:
455
507
  data = self.base_data()
@@ -460,7 +512,7 @@ class _ModelLogPusher:
460
512
  if self.verbose:
461
513
  message = f"{message}\n{traceback.format_exc()}"
462
514
  data["error"] = message
463
- self.output_stream.push([data])
515
+ self.output_stream.push([data], partition_key=partition_key)
464
516
  return
465
517
 
466
518
  self._sample_iter = (self._sample_iter + 1) % self.stream_sample
@@ -486,7 +538,7 @@ class _ModelLogPusher:
486
538
  "metrics",
487
539
  ]
488
540
  data["values"] = self._batch
489
- self.output_stream.push([data])
541
+ self.output_stream.push([data], partition_key=partition_key)
490
542
  else:
491
543
  data = self.base_data()
492
544
  data["request"] = request
@@ -496,7 +548,7 @@ class _ModelLogPusher:
496
548
  data["microsec"] = microsec
497
549
  if getattr(self.model, "metrics", None):
498
550
  data["metrics"] = self.model.metrics
499
- self.output_stream.push([data])
551
+ self.output_stream.push([data], partition_key=partition_key)
500
552
 
501
553
 
502
554
  def _init_endpoint_record(
@@ -523,62 +575,83 @@ def _init_endpoint_record(
523
575
  graph_server.function_uri
524
576
  )
525
577
  except Exception as e:
526
- logger.error("Failed to parse function URI", exc=e)
578
+ logger.error("Failed to parse function URI", exc=err_to_str(e))
527
579
  return None
528
580
 
529
- # Generating version model value based on the model name and model version
530
- if model.version:
531
- versioned_model_name = f"{model.name}:{model.version}"
532
- else:
533
- versioned_model_name = f"{model.name}:latest"
534
-
535
581
  # Generating model endpoint ID based on function uri and model version
536
582
  uid = mlrun.common.model_monitoring.create_model_endpoint_uid(
537
- function_uri=graph_server.function_uri, versioned_model=versioned_model_name
583
+ function_uri=graph_server.function_uri,
584
+ versioned_model=model.versioned_model_name,
538
585
  ).uid
539
586
 
540
- # If model endpoint object was found in DB, skip the creation process.
541
587
  try:
542
- mlrun.get_run_db().get_model_endpoint(project=project, endpoint_id=uid)
543
-
588
+ model_ep = mlrun.get_run_db().get_model_endpoint(
589
+ project=project, endpoint_id=uid
590
+ )
544
591
  except mlrun.errors.MLRunNotFoundError:
592
+ model_ep = None
593
+ except mlrun.errors.MLRunBadRequestError as err:
594
+ logger.info(
595
+ "Cannot get the model endpoints store", err=mlrun.errors.err_to_str(err)
596
+ )
597
+ return
598
+
599
+ if model.context.server.track_models and not model_ep:
545
600
  logger.info("Creating a new model endpoint record", endpoint_id=uid)
601
+ model_endpoint = mlrun.common.schemas.ModelEndpoint(
602
+ metadata=mlrun.common.schemas.ModelEndpointMetadata(
603
+ project=project, labels=model.labels, uid=uid
604
+ ),
605
+ spec=mlrun.common.schemas.ModelEndpointSpec(
606
+ function_uri=graph_server.function_uri,
607
+ model=model.versioned_model_name,
608
+ model_class=model.__class__.__name__,
609
+ model_uri=model.model_path,
610
+ stream_path=model.context.stream.stream_uri,
611
+ active=True,
612
+ monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled,
613
+ ),
614
+ status=mlrun.common.schemas.ModelEndpointStatus(
615
+ endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP
616
+ ),
617
+ )
546
618
 
547
- try:
548
- model_endpoint = mlrun.common.schemas.ModelEndpoint(
549
- metadata=mlrun.common.schemas.ModelEndpointMetadata(
550
- project=project, labels=model.labels, uid=uid
551
- ),
552
- spec=mlrun.common.schemas.ModelEndpointSpec(
553
- function_uri=graph_server.function_uri,
554
- model=versioned_model_name,
555
- model_class=model.__class__.__name__,
556
- model_uri=model.model_path,
557
- stream_path=config.model_endpoint_monitoring.store_prefixes.default.format(
558
- project=project, kind="stream"
559
- ),
560
- active=True,
561
- monitoring_mode=mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
562
- if model.context.server.track_models
563
- else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled,
564
- ),
565
- status=mlrun.common.schemas.ModelEndpointStatus(
566
- endpoint_type=mlrun.common.schemas.model_monitoring.EndpointType.NODE_EP
567
- ),
568
- )
619
+ db = mlrun.get_run_db()
620
+ db.create_model_endpoint(
621
+ project=project,
622
+ endpoint_id=uid,
623
+ model_endpoint=model_endpoint.dict(),
624
+ )
569
625
 
626
+ elif model_ep:
627
+ attributes = {}
628
+ old_model_uri = model_ep.spec.model_uri
629
+ mlrun.model_monitoring.helpers.enrich_model_endpoint_with_model_uri(
630
+ model_endpoint=model_ep,
631
+ model_obj=model.model_spec,
632
+ )
633
+ if model_ep.spec.model_uri != old_model_uri:
634
+ attributes["model_uri"] = model_ep.spec.model_uri
635
+ if (
636
+ model_ep.spec.monitoring_mode
637
+ == mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
638
+ ) != model.context.server.track_models:
639
+ attributes["monitoring_mode"] = (
640
+ mlrun.common.schemas.model_monitoring.ModelMonitoringMode.enabled
641
+ if model.context.server.track_models
642
+ else mlrun.common.schemas.model_monitoring.ModelMonitoringMode.disabled
643
+ )
644
+ if attributes:
570
645
  db = mlrun.get_run_db()
571
-
572
- db.create_model_endpoint(
646
+ db.patch_model_endpoint(
573
647
  project=project,
574
648
  endpoint_id=uid,
575
- model_endpoint=model_endpoint.dict(),
649
+ attributes=attributes,
650
+ )
651
+ logger.info(
652
+ "Updating model endpoint attributes",
653
+ attributes=attributes,
654
+ endpoint_id=uid,
576
655
  )
577
-
578
- except Exception as e:
579
- logger.error("Failed to create endpoint record", exc=e)
580
-
581
- except Exception as e:
582
- logger.error("Failed to retrieve model endpoint object", exc=e)
583
656
 
584
657
  return uid
mlrun/track/tracker.py CHANGED
@@ -31,8 +31,9 @@ class Tracker(ABC):
31
31
  * Offline: Manually importing models and artifacts into an MLRun project using the `import_x` methods.
32
32
  """
33
33
 
34
+ @staticmethod
34
35
  @abstractmethod
35
- def is_enabled(self) -> bool:
36
+ def is_enabled() -> bool:
36
37
  """
37
38
  Checks if tracker is enabled.
38
39
 
@@ -442,6 +442,11 @@ class MLFlowTracker(Tracker):
442
442
  # Prepare the archive path:
443
443
  model_uri = pathlib.Path(model_uri)
444
444
  archive_path = pathlib.Path(tmp_path) / f"{model_uri.stem}.zip"
445
+ if not os.path.exists(model_uri):
446
+ local_path = mlflow.artifacts.download_artifacts(
447
+ artifact_uri=str(model_uri)
448
+ )
449
+ model_uri = pathlib.Path(local_path)
445
450
 
446
451
  # TODO add progress bar for the case of large files
447
452
  # Zip the artifact:
mlrun/utils/async_http.py CHANGED
@@ -24,7 +24,7 @@ from aiohttp_retry import ExponentialRetry, RequestParams, RetryClient, RetryOpt
24
24
  from aiohttp_retry.client import _RequestContext
25
25
 
26
26
  from mlrun.config import config
27
- from mlrun.errors import err_to_str
27
+ from mlrun.errors import err_to_str, raise_for_status
28
28
 
29
29
  from .helpers import logger as mlrun_logger
30
30
 
@@ -46,12 +46,21 @@ class AsyncClientWithRetry(RetryClient):
46
46
  *args,
47
47
  **kwargs,
48
48
  ):
49
+ # do not retry on PUT / PATCH as they might have side effects (not truly idempotent)
50
+ blacklisted_methods = (
51
+ blacklisted_methods
52
+ if blacklisted_methods is not None
53
+ else [
54
+ "POST",
55
+ "PUT",
56
+ "PATCH",
57
+ ]
58
+ )
49
59
  super().__init__(
50
60
  *args,
51
61
  retry_options=ExponentialRetryOverride(
52
62
  retry_on_exception=retry_on_exception,
53
- # do not retry on PUT / PATCH as they might have side effects (not truly idempotent)
54
- blacklisted_methods=blacklisted_methods or ["POST", "PUT", "PATCH"],
63
+ blacklisted_methods=blacklisted_methods,
55
64
  attempts=max_retries,
56
65
  statuses=retry_on_status_codes,
57
66
  factor=retry_backoff_factor,
@@ -63,6 +72,12 @@ class AsyncClientWithRetry(RetryClient):
63
72
  **kwargs,
64
73
  )
65
74
 
75
+ def methods_blacklist_update_required(self, new_blacklist: str):
76
+ self._retry_options: ExponentialRetryOverride
77
+ return set(self._retry_options.blacklisted_methods).difference(
78
+ set(new_blacklist)
79
+ )
80
+
66
81
  def _make_requests(
67
82
  self,
68
83
  params_list: list[RequestParams],
@@ -173,7 +188,7 @@ class _CustomRequestContext(_RequestContext):
173
188
  last_attempt = current_attempt == self._retry_options.attempts
174
189
  if self._is_status_code_ok(response.status) or last_attempt:
175
190
  if self._raise_for_status:
176
- response.raise_for_status()
191
+ raise_for_status(response)
177
192
 
178
193
  self._response = response
179
194
  return response
@@ -222,7 +237,7 @@ class _CustomRequestContext(_RequestContext):
222
237
  retry_wait = self._retry_options.get_timeout(
223
238
  attempt=current_attempt, response=None
224
239
  )
225
- self._logger.debug(
240
+ self._logger.warning(
226
241
  "Request failed on retryable exception, retrying",
227
242
  retry_wait_secs=retry_wait,
228
243
  method=params.method,
@@ -275,6 +290,11 @@ class _CustomRequestContext(_RequestContext):
275
290
  if isinstance(exc.os_error, exc_type):
276
291
  return
277
292
  if exc.__cause__:
278
- return self.verify_exception_type(exc.__cause__)
293
+ # If the cause exception is retriable, return, otherwise, raise the original exception
294
+ try:
295
+ self.verify_exception_type(exc.__cause__)
296
+ except Exception:
297
+ raise exc
298
+ return
279
299
  else:
280
300
  raise exc
mlrun/utils/db.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
+ import abc
15
16
  import pickle
16
17
  from datetime import datetime
17
18
 
@@ -28,12 +29,22 @@ class BaseModel:
28
29
  columns = [column.key for column in mapper.columns if column.key not in exclude]
29
30
 
30
31
  def get_key_value(c):
32
+ # all (never say never) DB classes have "object" defined as "full_object"
33
+ if c == "object":
34
+ c = "full_object"
31
35
  if isinstance(getattr(self, c), datetime):
32
36
  return c, getattr(self, c).isoformat()
33
37
  return c, getattr(self, c)
34
38
 
35
39
  return dict(map(get_key_value, columns))
36
40
 
41
+ @abc.abstractmethod
42
+ def get_identifier_string(self):
43
+ """
44
+ This method must be implemented by any subclass.
45
+ """
46
+ pass
47
+
37
48
 
38
49
  class HasStruct(BaseModel):
39
50
  @property
@@ -51,3 +62,10 @@ class HasStruct(BaseModel):
51
62
  exclude = exclude or []
52
63
  exclude.append("body")
53
64
  return super().to_dict(exclude, strip=strip)
65
+
66
+ @abc.abstractmethod
67
+ def get_identifier_string(self):
68
+ """
69
+ This method must be implemented by any subclass.
70
+ """
71
+ pass