mlrun 1.7.0rc4__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (235) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -1
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +31 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +196 -0
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +13 -2
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +233 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +387 -119
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +245 -20
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +909 -231
  77. mlrun/db/nopdb.py +279 -14
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1176 -406
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +208 -181
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +54 -24
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/__init__.py +1 -0
  178. mlrun/runtimes/nuclio/api_gateway.py +769 -0
  179. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  180. mlrun/runtimes/nuclio/application/application.py +758 -0
  181. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  182. mlrun/runtimes/nuclio/function.py +188 -68
  183. mlrun/runtimes/nuclio/serving.py +57 -60
  184. mlrun/runtimes/pod.py +191 -58
  185. mlrun/runtimes/remotesparkjob.py +11 -8
  186. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  187. mlrun/runtimes/utils.py +40 -73
  188. mlrun/secrets.py +6 -2
  189. mlrun/serving/__init__.py +8 -1
  190. mlrun/serving/remote.py +2 -3
  191. mlrun/serving/routers.py +89 -64
  192. mlrun/serving/server.py +54 -26
  193. mlrun/serving/states.py +187 -56
  194. mlrun/serving/utils.py +19 -11
  195. mlrun/serving/v2_serving.py +136 -63
  196. mlrun/track/tracker.py +2 -1
  197. mlrun/track/trackers/mlflow_tracker.py +5 -0
  198. mlrun/utils/async_http.py +26 -6
  199. mlrun/utils/db.py +18 -0
  200. mlrun/utils/helpers.py +375 -105
  201. mlrun/utils/http.py +2 -2
  202. mlrun/utils/logger.py +75 -9
  203. mlrun/utils/notifications/notification/__init__.py +14 -10
  204. mlrun/utils/notifications/notification/base.py +48 -0
  205. mlrun/utils/notifications/notification/console.py +2 -0
  206. mlrun/utils/notifications/notification/git.py +24 -1
  207. mlrun/utils/notifications/notification/ipython.py +2 -0
  208. mlrun/utils/notifications/notification/slack.py +96 -21
  209. mlrun/utils/notifications/notification/webhook.py +63 -2
  210. mlrun/utils/notifications/notification_pusher.py +146 -16
  211. mlrun/utils/regex.py +9 -0
  212. mlrun/utils/retryer.py +3 -2
  213. mlrun/utils/v3io_clients.py +2 -3
  214. mlrun/utils/version/version.json +2 -2
  215. mlrun-1.7.2.dist-info/METADATA +390 -0
  216. mlrun-1.7.2.dist-info/RECORD +351 -0
  217. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  218. mlrun/feature_store/retrieval/conversion.py +0 -271
  219. mlrun/kfpops.py +0 -868
  220. mlrun/model_monitoring/application.py +0 -310
  221. mlrun/model_monitoring/batch.py +0 -974
  222. mlrun/model_monitoring/controller_handler.py +0 -37
  223. mlrun/model_monitoring/prometheus.py +0 -216
  224. mlrun/model_monitoring/stores/__init__.py +0 -111
  225. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  226. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  227. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  228. mlrun/model_monitoring/stores/models/base.py +0 -84
  229. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  230. mlrun/platforms/other.py +0 -305
  231. mlrun-1.7.0rc4.dist-info/METADATA +0 -269
  232. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  233. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  234. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  235. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
@@ -15,10 +15,17 @@
15
15
  import asyncio
16
16
  import datetime
17
17
  import os
18
+ import re
18
19
  import traceback
19
20
  import typing
20
21
  from concurrent.futures import ThreadPoolExecutor
21
22
 
23
+ import mlrun_pipelines.common.ops
24
+ import mlrun_pipelines.models
25
+ import mlrun_pipelines.utils
26
+
27
+ import mlrun.common.constants as mlrun_constants
28
+ import mlrun.common.runtimes.constants
22
29
  import mlrun.common.schemas
23
30
  import mlrun.config
24
31
  import mlrun.db.base
@@ -161,6 +168,11 @@ class NotificationPusher(_NotificationPusherBase):
161
168
  logger.warning(
162
169
  "Failed to push notification async",
163
170
  error=mlrun.errors.err_to_str(result),
171
+ traceback=traceback.format_exception(
172
+ etype=type(result),
173
+ value=result,
174
+ tb=result.__traceback__,
175
+ ),
164
176
  )
165
177
 
166
178
  logger.debug(
@@ -233,25 +245,12 @@ class NotificationPusher(_NotificationPusherBase):
233
245
  resource = "Run"
234
246
  runs = [run.to_dict()]
235
247
 
236
- if "workflow" in run.metadata.labels:
237
- resource = "Workflow"
248
+ if mlrun_constants.MLRunInternalLabels.workflow in run.metadata.labels:
249
+ resource = mlrun_constants.MLRunInternalLabels.workflow
238
250
  custom_message = (
239
251
  f" (workflow: {run.metadata.labels['workflow']}){custom_message}"
240
252
  )
241
- db = mlrun.get_run_db()
242
-
243
- workflow_id = run.status.results.get("workflow_id", None)
244
- if workflow_id:
245
- workflow_runs = db.list_runs(
246
- project=run.metadata.project,
247
- labels=f"workflow={workflow_id}",
248
- )
249
- logger.debug(
250
- "Found workflow runs, extending notification runs",
251
- workflow_id=workflow_id,
252
- workflow_runs_amount=len(workflow_runs),
253
- )
254
- runs.extend(workflow_runs)
253
+ runs.extend(self.get_workflow_steps(run))
255
254
 
256
255
  message = (
257
256
  self.messages.get(run.state(), "").format(resource=resource)
@@ -395,6 +394,131 @@ class NotificationPusher(_NotificationPusherBase):
395
394
  mask_params=False,
396
395
  )
397
396
 
397
+ def get_workflow_steps(self, run: mlrun.model.RunObject) -> list:
398
+ steps = []
399
+ db = mlrun.get_run_db()
400
+
401
+ def _add_run_step(_step: mlrun_pipelines.models.PipelineStep):
402
+ try:
403
+ _run = db.list_runs(
404
+ project=run.metadata.project,
405
+ labels=f"{mlrun_constants.MLRunInternalLabels.runner_pod}={_step.node_name}",
406
+ )[0]
407
+ except IndexError:
408
+ _run = {
409
+ "metadata": {
410
+ "name": _step.display_name,
411
+ "project": run.metadata.project,
412
+ },
413
+ }
414
+ _run["step_kind"] = _step.step_type
415
+ if _step.skipped:
416
+ _run.setdefault("status", {})["state"] = (
417
+ mlrun.common.runtimes.constants.RunStates.skipped
418
+ )
419
+ steps.append(_run)
420
+
421
+ def _add_deploy_function_step(_step: mlrun_pipelines.models.PipelineStep):
422
+ project, name, hash_key = self._extract_function_uri(
423
+ _step.get_annotation("mlrun/function-uri")
424
+ )
425
+ if name:
426
+ try:
427
+ function = db.get_function(
428
+ project=project, name=name, hash_key=hash_key
429
+ )
430
+ except mlrun.errors.MLRunNotFoundError:
431
+ # If the function is not found (if build failed for example), we will create a dummy
432
+ # function object for the notification to display the function name
433
+ function = {
434
+ "metadata": {
435
+ "name": name,
436
+ "project": project,
437
+ "hash_key": hash_key,
438
+ },
439
+ }
440
+ pod_phase = _step.phase
441
+ if _step.skipped:
442
+ state = mlrun.common.schemas.FunctionState.skipped
443
+ else:
444
+ state = mlrun.common.runtimes.constants.PodPhases.pod_phase_to_run_state(
445
+ pod_phase
446
+ )
447
+ function["status"] = {"state": state}
448
+ if isinstance(function["metadata"].get("updated"), datetime.datetime):
449
+ function["metadata"]["updated"] = function["metadata"][
450
+ "updated"
451
+ ].isoformat()
452
+ function["step_kind"] = _step.step_type
453
+ steps.append(function)
454
+
455
+ step_methods = {
456
+ mlrun_pipelines.common.ops.PipelineRunType.run: _add_run_step,
457
+ mlrun_pipelines.common.ops.PipelineRunType.build: _add_deploy_function_step,
458
+ mlrun_pipelines.common.ops.PipelineRunType.deploy: _add_deploy_function_step,
459
+ }
460
+
461
+ workflow_id = run.status.results.get("workflow_id", None)
462
+ if not workflow_id:
463
+ return steps
464
+
465
+ workflow_manifest = self._get_workflow_manifest(workflow_id)
466
+ if not workflow_manifest:
467
+ return steps
468
+
469
+ try:
470
+ for step in workflow_manifest.get_steps():
471
+ step_method = step_methods.get(step.step_type)
472
+ if step_method:
473
+ step_method(step)
474
+ return steps
475
+ except Exception:
476
+ # If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
477
+ logger.warning(
478
+ "Failed to extract workflow steps from workflow manifest, "
479
+ "returning all runs with the workflow id label",
480
+ workflow_id=workflow_id,
481
+ traceback=traceback.format_exc(),
482
+ )
483
+ return db.list_runs(
484
+ project=run.metadata.project,
485
+ labels=f"workflow={workflow_id}",
486
+ )
487
+
488
+ @staticmethod
489
+ def _get_workflow_manifest(
490
+ workflow_id: str,
491
+ ) -> typing.Optional[mlrun_pipelines.models.PipelineManifest]:
492
+ kfp_client = mlrun_pipelines.utils.get_client(mlrun.mlconf.kfp_url)
493
+
494
+ # arbitrary timeout of 5 seconds, the workflow should be done by now
495
+ kfp_run = kfp_client.wait_for_run_completion(workflow_id, 5)
496
+ if not kfp_run:
497
+ return None
498
+
499
+ kfp_run = mlrun_pipelines.models.PipelineRun(kfp_run)
500
+ return kfp_run.workflow_manifest()
501
+
502
+ def _extract_function_uri(self, function_uri: str) -> tuple[str, str, str]:
503
+ """
504
+ Extract the project, name, and hash key from a function uri.
505
+ Examples:
506
+ - "project/name@hash_key" returns project, name, hash_key
507
+ - "project/name returns" project, name, ""
508
+ """
509
+ project, name, hash_key = None, None, None
510
+ hashed_pattern = r"^(.+)/(.+)@(.+)$"
511
+ pattern = r"^(.+)/(.+)$"
512
+ match = re.match(hashed_pattern, function_uri)
513
+ if match:
514
+ project, name, hash_key = match.groups()
515
+ else:
516
+ match = re.match(pattern, function_uri)
517
+ if match:
518
+ project, name = match.groups()
519
+ hash_key = ""
520
+ return project, name, hash_key
521
+
398
522
 
399
523
  class CustomNotificationPusher(_NotificationPusherBase):
400
524
  def __init__(self, notification_types: list[str] = None):
@@ -413,6 +537,12 @@ class CustomNotificationPusher(_NotificationPusherBase):
413
537
  if notification.is_async
414
538
  }
415
539
 
540
+ @property
541
+ def notifications(self):
542
+ notifications = self._sync_notifications.copy()
543
+ notifications.update(self._async_notifications)
544
+ return notifications
545
+
416
546
  def push(
417
547
  self,
418
548
  message: str,
mlrun/utils/regex.py CHANGED
@@ -92,3 +92,12 @@ artifact_key = [r"[^\/\\]+$"]
92
92
  # must be alphanumeric or _
93
93
  # max 256 length
94
94
  v3io_stream_consumer_group = [r"^(?!_)[a-zA-Z0-9_]{1,256}$"]
95
+
96
+ # URI patterns
97
+ run_uri_pattern = r"^(?P<project>.*)@(?P<uid>.*)\#(?P<iteration>.*?)(:(?P<tag>.*))?$"
98
+
99
+ artifact_uri_pattern = r"^((?P<project>.*)/)?(?P<key>.*?)(\#(?P<iteration>.*?))?(:(?P<tag>.*?))?(@(?P<tree>.*))?$"
100
+
101
+ artifact_producer_uri_pattern = (
102
+ r"^((?P<project>.*)/)?(?P<uid>.*?)(\-(?P<iteration>.*?))?$"
103
+ )
mlrun/utils/retryer.py CHANGED
@@ -117,7 +117,7 @@ class Retryer:
117
117
  self._raise_last_exception()
118
118
 
119
119
  def _prepare(self):
120
- self.start_time = time.time()
120
+ self.start_time = time.monotonic()
121
121
  self.last_exception = None
122
122
 
123
123
  # Check if backoff is just a simple interval
@@ -138,6 +138,7 @@ class Retryer:
138
138
  except mlrun.errors.MLRunFatalFailureError as exc:
139
139
  raise exc.original_exception
140
140
  except Exception as exc:
141
+ self.last_exception = exc
141
142
  return (
142
143
  None,
143
144
  self.last_exception,
@@ -172,7 +173,7 @@ class Retryer:
172
173
  ) from self.last_exception
173
174
 
174
175
  def _timeout_exceeded(self, next_interval=None):
175
- now = time.time()
176
+ now = time.monotonic()
176
177
  if next_interval:
177
178
  now = now + next_interval
178
179
  return self.timeout is not None and now >= self.start_time + self.timeout
@@ -11,10 +11,9 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  from v3io.dataplane import Client as V3IOClient
17
- from v3io_frames import Client as get_client
16
+ from v3io_frames import Client as V3IOFramesClient
18
17
  from v3io_frames.client import ClientBase
19
18
 
20
19
  _v3io_clients: dict[frozenset, V3IOClient] = {}
@@ -25,7 +24,7 @@ def get_frames_client(**kwargs) -> ClientBase:
25
24
  global _frames_clients
26
25
  kw_set = frozenset(kwargs.items())
27
26
  if kw_set not in _frames_clients:
28
- _frames_clients[kw_set] = get_client(**kwargs)
27
+ _frames_clients[kw_set] = V3IOFramesClient(**kwargs)
29
28
 
30
29
  return _frames_clients[kw_set]
31
30
 
@@ -1,4 +1,4 @@
1
1
  {
2
- "git_commit": "cb2750f25e202a321723af3465359944445dfda7",
3
- "version": "1.7.0-rc4"
2
+ "git_commit": "bb224abaed1b5d221269586c9497124a526615e8",
3
+ "version": "1.7.2"
4
4
  }
@@ -0,0 +1,390 @@
1
+ Metadata-Version: 2.2
2
+ Name: mlrun
3
+ Version: 1.7.2
4
+ Summary: Tracking and config of machine learning runs
5
+ Home-page: https://github.com/mlrun/mlrun
6
+ Author: Yaron Haviv
7
+ Author-email: yaronh@iguazio.com
8
+ License: Apache License 2.0
9
+ Keywords: mlrun,mlops,data-science,machine-learning,experiment-tracking
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Operating System :: Microsoft :: Windows
15
+ Classifier: Operating System :: MacOS
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Requires-Python: >=3.9, <3.12
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: urllib3<1.27,>=1.26.9
25
+ Requires-Dist: GitPython>=3.1.41,~=3.1
26
+ Requires-Dist: aiohttp<3.11,>=3.9
27
+ Requires-Dist: aiohttp-retry~=2.8.0
28
+ Requires-Dist: click~=8.1
29
+ Requires-Dist: nest-asyncio~=1.0
30
+ Requires-Dist: ipython~=8.10
31
+ Requires-Dist: nuclio-jupyter~=0.10.4
32
+ Requires-Dist: numpy<1.27.0,>=1.16.5
33
+ Requires-Dist: pandas<2.2,>=1.2
34
+ Requires-Dist: pyarrow<18,>=10.0
35
+ Requires-Dist: pyyaml<7,>=5.4.1
36
+ Requires-Dist: requests~=2.32
37
+ Requires-Dist: tabulate~=0.8.6
38
+ Requires-Dist: v3io~=0.6.9
39
+ Requires-Dist: pydantic<1.10.15,>=1.10.8
40
+ Requires-Dist: mergedeep~=1.3
41
+ Requires-Dist: v3io-frames~=0.10.14
42
+ Requires-Dist: semver~=3.0
43
+ Requires-Dist: dependency-injector~=4.41
44
+ Requires-Dist: fsspec<2024.7,>=2023.9.2
45
+ Requires-Dist: v3iofs~=0.1.17
46
+ Requires-Dist: storey~=1.7.50
47
+ Requires-Dist: inflection~=0.5.0
48
+ Requires-Dist: python-dotenv~=0.17.0
49
+ Requires-Dist: setuptools~=71.0
50
+ Requires-Dist: deprecated~=1.2
51
+ Requires-Dist: jinja2>=3.1.3,~=3.1
52
+ Requires-Dist: orjson<4,>=3.9.15
53
+ Requires-Dist: mlrun-pipelines-kfp-common~=0.1.9
54
+ Requires-Dist: mlrun-pipelines-kfp-v1-8~=0.1.6
55
+ Provides-Extra: s3
56
+ Requires-Dist: boto3<1.36,>=1.28.0; extra == "s3"
57
+ Requires-Dist: aiobotocore<2.16,>=2.5.0; extra == "s3"
58
+ Requires-Dist: s3fs<2024.7,>=2023.9.2; extra == "s3"
59
+ Provides-Extra: azure-blob-storage
60
+ Requires-Dist: msrest~=0.6.21; extra == "azure-blob-storage"
61
+ Requires-Dist: azure-core~=1.24; extra == "azure-blob-storage"
62
+ Requires-Dist: adlfs==2023.9.0; extra == "azure-blob-storage"
63
+ Requires-Dist: pyopenssl>=23; extra == "azure-blob-storage"
64
+ Provides-Extra: azure-key-vault
65
+ Requires-Dist: azure-identity~=1.5; extra == "azure-key-vault"
66
+ Requires-Dist: azure-keyvault-secrets~=4.2; extra == "azure-key-vault"
67
+ Requires-Dist: pyopenssl>=23; extra == "azure-key-vault"
68
+ Provides-Extra: bokeh
69
+ Requires-Dist: bokeh>=2.4.2,~=2.4; extra == "bokeh"
70
+ Provides-Extra: plotly
71
+ Requires-Dist: plotly~=5.23; extra == "plotly"
72
+ Provides-Extra: graphviz
73
+ Requires-Dist: graphviz~=0.20.0; extra == "graphviz"
74
+ Provides-Extra: google-cloud
75
+ Requires-Dist: google-cloud-storage==2.14.0; extra == "google-cloud"
76
+ Requires-Dist: google-cloud-bigquery[bqstorage,pandas]==3.14.1; extra == "google-cloud"
77
+ Requires-Dist: google-cloud-bigquery-storage~=2.17; extra == "google-cloud"
78
+ Requires-Dist: google-cloud==0.34; extra == "google-cloud"
79
+ Requires-Dist: gcsfs<2024.7,>=2023.9.2; extra == "google-cloud"
80
+ Provides-Extra: kafka
81
+ Requires-Dist: kafka-python~=2.0; extra == "kafka"
82
+ Requires-Dist: avro~=1.11; extra == "kafka"
83
+ Provides-Extra: redis
84
+ Requires-Dist: redis~=4.3; extra == "redis"
85
+ Provides-Extra: mlflow
86
+ Requires-Dist: mlflow~=2.8; extra == "mlflow"
87
+ Provides-Extra: databricks-sdk
88
+ Requires-Dist: databricks-sdk~=0.13.0; extra == "databricks-sdk"
89
+ Provides-Extra: sqlalchemy
90
+ Requires-Dist: sqlalchemy~=1.4; extra == "sqlalchemy"
91
+ Provides-Extra: dask
92
+ Requires-Dist: dask~=2023.12.1; extra == "dask"
93
+ Requires-Dist: distributed~=2023.12.1; extra == "dask"
94
+ Provides-Extra: alibaba-oss
95
+ Requires-Dist: ossfs==2023.12.0; extra == "alibaba-oss"
96
+ Requires-Dist: oss2==2.18.1; extra == "alibaba-oss"
97
+ Provides-Extra: tdengine
98
+ Requires-Dist: taos-ws-py==0.3.2; extra == "tdengine"
99
+ Requires-Dist: taoswswrap~=0.2.0; extra == "tdengine"
100
+ Provides-Extra: snowflake
101
+ Requires-Dist: snowflake-connector-python~=3.7; extra == "snowflake"
102
+ Provides-Extra: api
103
+ Requires-Dist: uvicorn~=0.27.1; extra == "api"
104
+ Requires-Dist: dask-kubernetes~=0.11.0; extra == "api"
105
+ Requires-Dist: apscheduler==3.10.3; extra == "api"
106
+ Requires-Dist: objgraph~=3.6; extra == "api"
107
+ Requires-Dist: igz-mgmt~=0.2.0; extra == "api"
108
+ Requires-Dist: humanfriendly~=10.0; extra == "api"
109
+ Requires-Dist: fastapi~=0.110.0; extra == "api"
110
+ Requires-Dist: sqlalchemy~=1.4; extra == "api"
111
+ Requires-Dist: pymysql~=1.0; extra == "api"
112
+ Requires-Dist: alembic~=1.9; extra == "api"
113
+ Requires-Dist: timelength~=1.1; extra == "api"
114
+ Requires-Dist: memray~=1.12; sys_platform != "win32" and extra == "api"
115
+ Provides-Extra: all
116
+ Requires-Dist: adlfs==2023.9.0; extra == "all"
117
+ Requires-Dist: aiobotocore<2.16,>=2.5.0; extra == "all"
118
+ Requires-Dist: avro~=1.11; extra == "all"
119
+ Requires-Dist: azure-core~=1.24; extra == "all"
120
+ Requires-Dist: azure-identity~=1.5; extra == "all"
121
+ Requires-Dist: azure-keyvault-secrets~=4.2; extra == "all"
122
+ Requires-Dist: bokeh>=2.4.2,~=2.4; extra == "all"
123
+ Requires-Dist: boto3<1.36,>=1.28.0; extra == "all"
124
+ Requires-Dist: dask~=2023.12.1; extra == "all"
125
+ Requires-Dist: databricks-sdk~=0.13.0; extra == "all"
126
+ Requires-Dist: distributed~=2023.12.1; extra == "all"
127
+ Requires-Dist: gcsfs<2024.7,>=2023.9.2; extra == "all"
128
+ Requires-Dist: google-cloud-bigquery-storage~=2.17; extra == "all"
129
+ Requires-Dist: google-cloud-bigquery[bqstorage,pandas]==3.14.1; extra == "all"
130
+ Requires-Dist: google-cloud-storage==2.14.0; extra == "all"
131
+ Requires-Dist: google-cloud==0.34; extra == "all"
132
+ Requires-Dist: graphviz~=0.20.0; extra == "all"
133
+ Requires-Dist: kafka-python~=2.0; extra == "all"
134
+ Requires-Dist: mlflow~=2.8; extra == "all"
135
+ Requires-Dist: msrest~=0.6.21; extra == "all"
136
+ Requires-Dist: oss2==2.18.1; extra == "all"
137
+ Requires-Dist: ossfs==2023.12.0; extra == "all"
138
+ Requires-Dist: plotly~=5.23; extra == "all"
139
+ Requires-Dist: pyopenssl>=23; extra == "all"
140
+ Requires-Dist: redis~=4.3; extra == "all"
141
+ Requires-Dist: s3fs<2024.7,>=2023.9.2; extra == "all"
142
+ Requires-Dist: snowflake-connector-python~=3.7; extra == "all"
143
+ Requires-Dist: sqlalchemy~=1.4; extra == "all"
144
+ Requires-Dist: taos-ws-py==0.3.2; extra == "all"
145
+ Requires-Dist: taoswswrap~=0.2.0; extra == "all"
146
+ Provides-Extra: complete
147
+ Requires-Dist: adlfs==2023.9.0; extra == "complete"
148
+ Requires-Dist: aiobotocore<2.16,>=2.5.0; extra == "complete"
149
+ Requires-Dist: avro~=1.11; extra == "complete"
150
+ Requires-Dist: azure-core~=1.24; extra == "complete"
151
+ Requires-Dist: azure-identity~=1.5; extra == "complete"
152
+ Requires-Dist: azure-keyvault-secrets~=4.2; extra == "complete"
153
+ Requires-Dist: boto3<1.36,>=1.28.0; extra == "complete"
154
+ Requires-Dist: dask~=2023.12.1; extra == "complete"
155
+ Requires-Dist: databricks-sdk~=0.13.0; extra == "complete"
156
+ Requires-Dist: distributed~=2023.12.1; extra == "complete"
157
+ Requires-Dist: gcsfs<2024.7,>=2023.9.2; extra == "complete"
158
+ Requires-Dist: google-cloud-bigquery-storage~=2.17; extra == "complete"
159
+ Requires-Dist: google-cloud-bigquery[bqstorage,pandas]==3.14.1; extra == "complete"
160
+ Requires-Dist: google-cloud-storage==2.14.0; extra == "complete"
161
+ Requires-Dist: google-cloud==0.34; extra == "complete"
162
+ Requires-Dist: graphviz~=0.20.0; extra == "complete"
163
+ Requires-Dist: kafka-python~=2.0; extra == "complete"
164
+ Requires-Dist: mlflow~=2.8; extra == "complete"
165
+ Requires-Dist: msrest~=0.6.21; extra == "complete"
166
+ Requires-Dist: oss2==2.18.1; extra == "complete"
167
+ Requires-Dist: ossfs==2023.12.0; extra == "complete"
168
+ Requires-Dist: plotly~=5.23; extra == "complete"
169
+ Requires-Dist: pyopenssl>=23; extra == "complete"
170
+ Requires-Dist: redis~=4.3; extra == "complete"
171
+ Requires-Dist: s3fs<2024.7,>=2023.9.2; extra == "complete"
172
+ Requires-Dist: snowflake-connector-python~=3.7; extra == "complete"
173
+ Requires-Dist: sqlalchemy~=1.4; extra == "complete"
174
+ Requires-Dist: taos-ws-py==0.3.2; extra == "complete"
175
+ Requires-Dist: taoswswrap~=0.2.0; extra == "complete"
176
+ Provides-Extra: complete-api
177
+ Requires-Dist: adlfs==2023.9.0; extra == "complete-api"
178
+ Requires-Dist: aiobotocore<2.16,>=2.5.0; extra == "complete-api"
179
+ Requires-Dist: alembic~=1.9; extra == "complete-api"
180
+ Requires-Dist: apscheduler==3.10.3; extra == "complete-api"
181
+ Requires-Dist: avro~=1.11; extra == "complete-api"
182
+ Requires-Dist: azure-core~=1.24; extra == "complete-api"
183
+ Requires-Dist: azure-identity~=1.5; extra == "complete-api"
184
+ Requires-Dist: azure-keyvault-secrets~=4.2; extra == "complete-api"
185
+ Requires-Dist: boto3<1.36,>=1.28.0; extra == "complete-api"
186
+ Requires-Dist: dask-kubernetes~=0.11.0; extra == "complete-api"
187
+ Requires-Dist: dask~=2023.12.1; extra == "complete-api"
188
+ Requires-Dist: databricks-sdk~=0.13.0; extra == "complete-api"
189
+ Requires-Dist: distributed~=2023.12.1; extra == "complete-api"
190
+ Requires-Dist: fastapi~=0.110.0; extra == "complete-api"
191
+ Requires-Dist: gcsfs<2024.7,>=2023.9.2; extra == "complete-api"
192
+ Requires-Dist: google-cloud-bigquery-storage~=2.17; extra == "complete-api"
193
+ Requires-Dist: google-cloud-bigquery[bqstorage,pandas]==3.14.1; extra == "complete-api"
194
+ Requires-Dist: google-cloud-storage==2.14.0; extra == "complete-api"
195
+ Requires-Dist: google-cloud==0.34; extra == "complete-api"
196
+ Requires-Dist: graphviz~=0.20.0; extra == "complete-api"
197
+ Requires-Dist: humanfriendly~=10.0; extra == "complete-api"
198
+ Requires-Dist: igz-mgmt~=0.2.0; extra == "complete-api"
199
+ Requires-Dist: kafka-python~=2.0; extra == "complete-api"
200
+ Requires-Dist: memray~=1.12; sys_platform != "win32" and extra == "complete-api"
201
+ Requires-Dist: mlflow~=2.8; extra == "complete-api"
202
+ Requires-Dist: msrest~=0.6.21; extra == "complete-api"
203
+ Requires-Dist: objgraph~=3.6; extra == "complete-api"
204
+ Requires-Dist: oss2==2.18.1; extra == "complete-api"
205
+ Requires-Dist: ossfs==2023.12.0; extra == "complete-api"
206
+ Requires-Dist: plotly~=5.23; extra == "complete-api"
207
+ Requires-Dist: pymysql~=1.0; extra == "complete-api"
208
+ Requires-Dist: pyopenssl>=23; extra == "complete-api"
209
+ Requires-Dist: redis~=4.3; extra == "complete-api"
210
+ Requires-Dist: s3fs<2024.7,>=2023.9.2; extra == "complete-api"
211
+ Requires-Dist: snowflake-connector-python~=3.7; extra == "complete-api"
212
+ Requires-Dist: sqlalchemy~=1.4; extra == "complete-api"
213
+ Requires-Dist: taos-ws-py==0.3.2; extra == "complete-api"
214
+ Requires-Dist: taoswswrap~=0.2.0; extra == "complete-api"
215
+ Requires-Dist: timelength~=1.1; extra == "complete-api"
216
+ Requires-Dist: uvicorn~=0.27.1; extra == "complete-api"
217
+ Dynamic: author
218
+ Dynamic: author-email
219
+ Dynamic: classifier
220
+ Dynamic: description
221
+ Dynamic: description-content-type
222
+ Dynamic: home-page
223
+ Dynamic: keywords
224
+ Dynamic: license
225
+ Dynamic: provides-extra
226
+ Dynamic: requires-dist
227
+ Dynamic: requires-python
228
+ Dynamic: summary
229
+
230
+ <a id="top"></a>
231
+ [![Build Status](https://github.com/mlrun/mlrun/actions/workflows/build.yaml/badge.svg?branch=development)](https://github.com/mlrun/mlrun/actions/workflows/build.yaml?query=branch%3Adevelopment)
232
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
233
+ [![PyPI version fury.io](https://badge.fury.io/py/mlrun.svg)](https://pypi.python.org/pypi/mlrun/)
234
+ [![Documentation](https://readthedocs.org/projects/mlrun/badge/?version=latest)](https://mlrun.readthedocs.io/en/latest/?badge=latest)
235
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
236
+ ![GitHub commit activity](https://img.shields.io/github/commit-activity/w/mlrun/mlrun)
237
+ ![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/mlrun/mlrun?sort=semver)
238
+ [![Join MLOps Live](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://mlopslive.slack.com)
239
+
240
+ <p align="left"><img src="https://github.com/mlrun/mlrun/raw/development/docs/_static/images/MLRun-logo.png" alt="MLRun logo" width="150"/></p>
241
+
242
+ # Using MLRun
243
+
244
+ MLRun is an open source AI orchestration platform for quickly building and managing continuous (gen) AI applications across their lifecycle. MLRun integrates into your development and CI/CD environment and automates the delivery of production data, ML pipelines, and online applications.
245
+ MLRun significantly reduces engineering efforts, time to production, and computation resources.
246
+ With MLRun, you can choose any IDE on your local machine or on the cloud. MLRun breaks the silos between data, ML, software, and DevOps/MLOps teams, enabling collaboration and fast continuous improvements.
247
+
248
+ Get started with the MLRun [**Tutorials and Examples**](https://docs.mlrun.org/en/stable/tutorials/index.html) and the [**Installation and setup guide**](https://docs.mlrun.org/en/stable/install.html), or read about the [**MLRun Architecture**](https://docs.mlrun.org/en/stable/architecture.html).
249
+
250
+ This page explains how MLRun addresses the [**gen AI tasks**](#genai-tasks), [**MLOps tasks**](#mlops-tasks), and presents the [**MLRun core components**](#core-components).
251
+
252
+ See the supported data stores, development tools, services, platforms, etc., supported by MLRun's open architecture in **https://docs.mlrun.org/en/stable/ecosystem.html**.
253
+
254
+ ## Gen AI tasks
255
+
256
+ <p align="center"><img src="https://github.com/mlrun/mlrun/raw/development/docs/_static/images/ai-tasks.png" alt="ai-tasks" width="800"/></p><br>
257
+
258
+ Use MLRun to develop, scale, deploy, and monitor your AI model across your enterprise. The [**gen AI development workflow**](https://docs.mlrun.org/en/stable/genai/genai-flow.html)
259
+ section describes the different tasks and stages in detail.
260
+
261
+ ### Data management
262
+
263
+
264
+ MLRun supports batch or realtime data processing at scale, data lineage and versioning, structured and unstructured data, and more.
265
+ Removing inappropriate data at an early stage saves resources that would otherwise be required later on.
266
+
267
+
268
+ **Docs:**
269
+ [Using LLMs to process unstructured data](https://docs.mlrun.org/en/stable/genai/data-mgmt/unstructured-data.html)
270
+ [Vector databases](https://docs.mlrun.org/en/stable/genai/data-mgmt/vector-databases.html)
271
+ [Guardrails for data management](https://docs.mlrun.org/en/stable/genai/data-mgmt/guardrails-data.html)
272
+ **Demo:**
273
+ [Call center demo](https://github.com/mlrun/demo-call-center>`
274
+ **Video:**
275
+ [Call center](https://youtu.be/YycMbxRgLBA>`
276
+
277
+ ### Development
278
+ Use MLRun to build an automated ML pipeline to: collect data,
279
+ preprocess (prepare) the data, run the training pipeline, and evaluate the model.
280
+
281
+ **Docs:**
282
+ [Working with RAG](https://docs.mlrun.org/en/stable/genai/development/working-with-rag.html), [Evalating LLMs](https://docs.mlrun.org/en/stable/genai/development/evaluating-llms.html), [Fine tuning LLMS](https://docs.mlrun.org/en/stable/genai/development/fine-tuning-llms.html)
283
+ **Demos:**
284
+ [Call center demo](https://github.com/mlrun/demo-call-center), [Build & deploy custom (fine-tuned) LLM models and applications](https://github.com/mlrun/demo-llm-tuning/blob/main), [Interactive bot demo using LLMs](https://github.com/mlrun/demo-llm-bot/blob/main)
285
+ **Video:**
286
+ [Call center](https://youtu.be/YycMbxRgLBA)
287
+
288
+
289
+ ### Deployment
290
+ MLRun serving can productize the newly trained LLM as a serverless function using real-time auto-scaling Nuclio serverless functions.
291
+ The application pipeline includes all the steps from accepting events or data, contextualizing it with a state preparing the required model features,
292
+ inferring results using one or more models, and driving actions.
293
+
294
+
295
+ **Docs:**
296
+ [Serving gen AI models](https://docs.mlrun.org/en/stable/genai/deployment/genai_serving.html), GPU utilization](https://docs.mlrun.org/en/stable/genai/deployment/gpu_utilization.html), [Gen AI realtime serving graph](https://docs.mlrun.org/en/stable/genai/deployment/genai_serving_graph.html)
297
+ **Tutorial:**
298
+ [Deploy LLM using MLRun](https://docs.mlrun.org/en/stable/tutorials/genai_01_basic_tutorial.html)
299
+ **Demos:**
300
+ [Call center demo](https://github.com/mlrun/demo-call-center), [Build & deploy custom(fine-tuned)]LLM models and applications <https://github.com/mlrun/demo-llm-tuning/blob/main), [Interactive bot demo using LLMs]<https://github.com/mlrun/demo-llm-bot/blob/main)
301
+ **Video:**
302
+ [Call center]<https://youtu.be/YycMbxRgLBA)
303
+
304
+
305
+ ### Live Ops
306
+ Monitor all resources, data, model and application metrics to ensure performance. Then identify risks, control costs, and measure business KPIs.
307
+ Collect production data, metadata, and metrics to tune the model and application further, and to enable governance and explainability.
308
+
309
+
310
+ **Docs:**
311
+ [Model monitoring <monitoring](https://docs.mlrun.org/en/stable/concepts/monitoring.html), [Alerts and notifications](https://docs.mlrun.org/en/stable/concepts/alerts-notifications.html)
312
+ **Tutorials:**
313
+ [Deploy LLM using MLRun](https://docs.mlrun.org/en/stable/tutorials/genai_01_basic_tutorial.html), [Model monitoring using LLM](https://docs.mlrun.org/en/stable/tutorials/genai-02-monitoring-llm.html)
314
+ **Demo:**
315
+ [Build & deploy custom (fine-tuned) LLM models and applications](https://github.com/mlrun/demo-llm-tuning/blob/main)
316
+
317
+
318
+ <a id="mlops-tasks"></a>
319
+ ## MLOps tasks
320
+
321
+ <p align="center"><img src="https://github.com/mlrun/mlrun/raw/development/docs/_static/images/mlops-task.png" alt="mlrun-tasks" width="800"/></p><br>
322
+
323
+ The [**MLOps development workflow**](https://docs.mlrun.org/en/stable/mlops-dev-flow.html) section describes the different tasks and stages in detail.
324
+ MLRun can be used to automate and orchestrate all the different tasks or just specific tasks (and integrate them with what you have already deployed).
325
+
326
+ ### Project management and CI/CD automation
327
+
328
+ In MLRun the assets, metadata, and services (data, functions, jobs, artifacts, models, secrets, etc.) are organized into projects.
329
+ Projects can be imported/exported as a whole, mapped to git repositories or IDE projects (in PyCharm, VSCode, etc.), which enables versioning, collaboration, and CI/CD.
330
+ Project access can be restricted to a set of users and roles.
331
+
332
+ **Docs:** [Projects and Automation](https://docs.mlrun.org/en/stable/projects/project.html), [CI/CD Integration](https://docs.mlrun.org/en/stable/projects/ci-integration.html)
333
+ **Tutorials:** [Quick start](https://docs.mlrun.org/en/stable/tutorials/01-mlrun-basics.html), [Automated ML Pipeline](https://docs.mlrun.org/en/stable/tutorials/04-pipeline.html)
334
+ **Video:** [Quick start](https://youtu.be/xI8KVGLlj7Q).
335
+
336
+ ### Ingest and process data
337
+
338
+ MLRun provides abstract interfaces to various offline and online [**data sources**](https://docs.mlrun.org/en/stable/store/datastore.html), supports batch or realtime data processing at scale, data lineage and versioning, structured and unstructured data, and more.
339
+ In addition, the MLRun [**Feature Store**](https://docs.mlrun.org/en/stable/feature-store/feature-store.html) automates the collection, transformation, storage, catalog, serving, and monitoring of data features across the ML lifecycle and enables feature reuse and sharing.
340
+
341
+ See: **Docs:** [Ingest and process data](https://docs.mlrun.org/en/stable/data-prep/index.html), [Feature Store](https://docs.mlrun.org/en/stable/feature-store/feature-store.html), [Data & Artifacts](https://docs.mlrun.org/en/stable/concepts/data.html)
342
+ **Tutorials:** [Quick start](https://docs.mlrun.org/en/stable/tutorials/01-mlrun-basics.html), [Feature Store](https://docs.mlrun.org/en/stable/feature-store/basic-demo.html).
343
+
344
+ ### Develop and train models
345
+
346
+ MLRun allows you to easily build ML pipelines that take data from various sources or the Feature Store and process it, train models at scale with multiple parameters, test models, tracks each experiments, register, version and deploy models, etc. MLRun provides scalable built-in or custom model training services, integrate with any framework and can work with 3rd party training/auto-ML services. You can also bring your own pre-trained model and use it in the pipeline.
347
+
348
+ **Docs:** [Develop and train models](https://docs.mlrun.org/en/stable/development/index.html), [Model Training and Tracking](https://docs.mlrun.org/en/stable/development/model-training-tracking.html), [Batch Runs and Workflows](https://docs.mlrun.org/en/stable/concepts/runs-workflows.html)
349
+ **Tutorials:** [Train, compare, and register models](https://docs.mlrun.org/en/stable/tutorials/02-model-training.html), [Automated ML Pipeline](https://docs.mlrun.org/en/stable/tutorials/04-pipeline.html)
350
+ **Video:** [Train and compare models](https://youtu.be/bZgBsmLMdQo).
351
+
352
+ ### Deploy models and applications
353
+
354
+ MLRun rapidly deploys and manages production-grade real-time or batch application pipelines using elastic and resilient serverless functions. MLRun addresses the entire ML application: intercepting application/user requests, running data processing tasks, inferencing using one or more models, driving actions, and integrating with the application logic.
355
+
356
+ **Docs:** [Deploy models and applications](https://docs.mlrun.org/en/stable/deployment/index.html), [Realtime Pipelines](https://docs.mlrun.org/en/stable/serving/serving-graph.html), [Batch Inference](https://docs.mlrun.org/en/stable/deployment/batch_inference.html)
357
+ **Tutorials:** [Realtime Serving](https://docs.mlrun.org/en/stable/tutorials/03-model-serving.html), [Batch Inference](https://docs.mlrun.org/en/stable/tutorials/07-batch-infer.html), [Advanced Pipeline](https://docs.mlrun.org/en/stable/tutorials/07-batch-infer.html)
358
+ **Video:** [Serving pre-trained models](https://youtu.be/OUjOus4dZfw).
359
+
360
+ ### Model Monitoring
361
+
362
+ Observability is built into the different MLRun objects (data, functions, jobs, models, pipelines, etc.), eliminating the need for complex integrations and code instrumentation. With MLRun, you can observe the application/model resource usage and model behavior (drift, performance, etc.), define custom app metrics, and trigger alerts or retraining jobs.
363
+
364
+ **Docs:** [Model monitoring](https://docs.mlrun.org/en/stable/concepts/model-monitoring.html), [Model Monitoring Overview](https://docs.mlrun.org/en/stable/monitoring/model-monitoring-deployment.html)
365
+ **Tutorials:** [Model Monitoring & Drift Detection](https://docs.mlrun.org/en/stable/tutorials/05-model-monitoring.html).
366
+
367
+
368
+ <a id="core-components"></a>
369
+ ## MLRun core components
370
+
371
+ <p align="center"><img src="https://github.com/mlrun/mlrun/raw/development/docs/_static/images/mlops-core.png" alt="mlrun-core" width="800"/></p><br>
372
+
373
+
374
+ MLRun includes the following major components:
375
+
376
+ [**Project Management:**](https://docs.mlrun.org/en/stable/projects/project.html) A service (API, SDK, DB, UI) that manages the different project assets (data, functions, jobs, workflows, secrets, etc.) and provides central control and metadata layer.
377
+
378
+ [**Functions:**](https://docs.mlrun.org/en/stable/runtimes/functions.html) automatically deployed software package with one or more methods and runtime-specific attributes (such as image, libraries, command, arguments, resources, etc.).
379
+
380
+ [**Data & Artifacts:**](https://docs.mlrun.org/en/stable/concepts/data.html) Glueless connectivity to various data sources, metadata management, catalog, and versioning for structures/unstructured artifacts.
381
+
382
+ [**Batch Runs & Workflows:**](https://docs.mlrun.org/en/stable/concepts/runs-workflows.html) Execute one or more functions with specific parameters and collect, track, and compare all their results and artifacts.
383
+
384
+ [**Real-Time Serving Pipeline:**](https://docs.mlrun.org/en/stable/serving/serving-graph.html) Rapid deployment of scalable data and ML pipelines using real-time serverless technology, including API handling, data preparation/enrichment, model serving, ensembles, driving and measuring actions, etc.
385
+
386
+ [**Model monitoring:**](https://docs.mlrun.org/en/stable/monitoring/index.html) monitors data, models, resources, and production components and provides a feedback loop for exploring production data, identifying drift, alerting on anomalies or data quality issues, triggering retraining jobs, measuring business impact, etc.
387
+
388
+ [**Alerts and notifications:**](https://docs.mlrun.org/en/stable/concepts/model-monitoring.html) Use alerts to identify and inform you of possible problem situations. Use notifications to report status on runs and pipelines.
389
+
390
+ [**Feature Store:**](https://docs.mlrun.org/en/stable/feature-store/feature-store.html) automatically collects, prepares, catalogs, and serves production data features for development (offline) and real-time (online) deployment using minimal engineering effort.