mlrun 1.5.0rc1__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (119) hide show
  1. mlrun/__init__.py +2 -35
  2. mlrun/__main__.py +1 -40
  3. mlrun/api/api/api.py +6 -0
  4. mlrun/api/api/endpoints/feature_store.py +0 -4
  5. mlrun/api/api/endpoints/files.py +14 -2
  6. mlrun/api/api/endpoints/functions.py +6 -1
  7. mlrun/api/api/endpoints/logs.py +17 -3
  8. mlrun/api/api/endpoints/pipelines.py +1 -5
  9. mlrun/api/api/endpoints/projects.py +88 -0
  10. mlrun/api/api/endpoints/runs.py +48 -6
  11. mlrun/api/api/endpoints/workflows.py +355 -0
  12. mlrun/api/api/utils.py +1 -1
  13. mlrun/api/crud/__init__.py +1 -0
  14. mlrun/api/crud/client_spec.py +3 -0
  15. mlrun/api/crud/model_monitoring/deployment.py +36 -7
  16. mlrun/api/crud/model_monitoring/grafana.py +1 -1
  17. mlrun/api/crud/model_monitoring/helpers.py +32 -2
  18. mlrun/api/crud/model_monitoring/model_endpoints.py +27 -5
  19. mlrun/api/crud/notifications.py +9 -4
  20. mlrun/api/crud/pipelines.py +4 -9
  21. mlrun/api/crud/runtime_resources.py +4 -3
  22. mlrun/api/crud/secrets.py +21 -0
  23. mlrun/api/crud/workflows.py +352 -0
  24. mlrun/api/db/base.py +16 -1
  25. mlrun/api/db/sqldb/db.py +97 -16
  26. mlrun/api/launcher.py +26 -7
  27. mlrun/api/main.py +3 -4
  28. mlrun/{mlutils → api/rundb}/__init__.py +2 -6
  29. mlrun/{db → api/rundb}/sqldb.py +35 -83
  30. mlrun/api/runtime_handlers/__init__.py +56 -0
  31. mlrun/api/runtime_handlers/base.py +1247 -0
  32. mlrun/api/runtime_handlers/daskjob.py +209 -0
  33. mlrun/api/runtime_handlers/kubejob.py +37 -0
  34. mlrun/api/runtime_handlers/mpijob.py +147 -0
  35. mlrun/api/runtime_handlers/remotesparkjob.py +29 -0
  36. mlrun/api/runtime_handlers/sparkjob.py +148 -0
  37. mlrun/api/utils/builder.py +1 -4
  38. mlrun/api/utils/clients/chief.py +14 -0
  39. mlrun/api/utils/scheduler.py +98 -15
  40. mlrun/api/utils/singletons/db.py +4 -0
  41. mlrun/artifacts/manager.py +1 -2
  42. mlrun/common/schemas/__init__.py +6 -0
  43. mlrun/common/schemas/auth.py +4 -1
  44. mlrun/common/schemas/client_spec.py +1 -1
  45. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  46. mlrun/common/schemas/model_monitoring/constants.py +11 -0
  47. mlrun/common/schemas/project.py +1 -0
  48. mlrun/common/schemas/runs.py +1 -8
  49. mlrun/common/schemas/schedule.py +1 -8
  50. mlrun/common/schemas/workflow.py +54 -0
  51. mlrun/config.py +42 -40
  52. mlrun/datastore/sources.py +1 -1
  53. mlrun/db/__init__.py +4 -68
  54. mlrun/db/base.py +12 -0
  55. mlrun/db/factory.py +65 -0
  56. mlrun/db/httpdb.py +175 -19
  57. mlrun/db/nopdb.py +4 -2
  58. mlrun/execution.py +4 -2
  59. mlrun/feature_store/__init__.py +1 -0
  60. mlrun/feature_store/api.py +1 -2
  61. mlrun/feature_store/feature_set.py +0 -10
  62. mlrun/feature_store/feature_vector.py +340 -2
  63. mlrun/feature_store/ingestion.py +5 -10
  64. mlrun/feature_store/retrieval/base.py +118 -104
  65. mlrun/feature_store/retrieval/dask_merger.py +17 -10
  66. mlrun/feature_store/retrieval/job.py +4 -1
  67. mlrun/feature_store/retrieval/local_merger.py +18 -18
  68. mlrun/feature_store/retrieval/spark_merger.py +21 -14
  69. mlrun/feature_store/retrieval/storey_merger.py +21 -15
  70. mlrun/kfpops.py +3 -9
  71. mlrun/launcher/base.py +3 -3
  72. mlrun/launcher/client.py +3 -2
  73. mlrun/launcher/factory.py +16 -13
  74. mlrun/lists.py +0 -11
  75. mlrun/model.py +9 -15
  76. mlrun/model_monitoring/helpers.py +15 -25
  77. mlrun/model_monitoring/model_monitoring_batch.py +72 -4
  78. mlrun/model_monitoring/prometheus.py +219 -0
  79. mlrun/model_monitoring/stores/__init__.py +15 -9
  80. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +3 -1
  81. mlrun/model_monitoring/stream_processing.py +181 -29
  82. mlrun/package/packager.py +6 -8
  83. mlrun/package/packagers/default_packager.py +121 -10
  84. mlrun/platforms/__init__.py +0 -2
  85. mlrun/platforms/iguazio.py +0 -56
  86. mlrun/projects/pipelines.py +57 -158
  87. mlrun/projects/project.py +6 -32
  88. mlrun/render.py +1 -1
  89. mlrun/run.py +2 -124
  90. mlrun/runtimes/__init__.py +6 -42
  91. mlrun/runtimes/base.py +26 -1241
  92. mlrun/runtimes/daskjob.py +2 -198
  93. mlrun/runtimes/function.py +16 -5
  94. mlrun/runtimes/kubejob.py +5 -29
  95. mlrun/runtimes/mpijob/__init__.py +2 -2
  96. mlrun/runtimes/mpijob/abstract.py +10 -1
  97. mlrun/runtimes/mpijob/v1.py +0 -76
  98. mlrun/runtimes/mpijob/v1alpha1.py +1 -74
  99. mlrun/runtimes/nuclio.py +3 -2
  100. mlrun/runtimes/pod.py +0 -10
  101. mlrun/runtimes/remotesparkjob.py +1 -15
  102. mlrun/runtimes/serving.py +1 -1
  103. mlrun/runtimes/sparkjob/__init__.py +0 -1
  104. mlrun/runtimes/sparkjob/abstract.py +4 -131
  105. mlrun/serving/states.py +1 -1
  106. mlrun/utils/db.py +0 -2
  107. mlrun/utils/helpers.py +19 -13
  108. mlrun/utils/notifications/notification_pusher.py +5 -25
  109. mlrun/utils/regex.py +7 -2
  110. mlrun/utils/version/version.json +2 -2
  111. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/METADATA +24 -23
  112. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/RECORD +116 -107
  113. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/WHEEL +1 -1
  114. mlrun/mlutils/data.py +0 -160
  115. mlrun/mlutils/models.py +0 -78
  116. mlrun/mlutils/plots.py +0 -902
  117. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/LICENSE +0 -0
  118. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/entry_points.txt +0 -0
  119. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/top_level.txt +0 -0
mlrun/db/httpdb.py CHANGED
@@ -18,10 +18,10 @@ import tempfile
18
18
  import time
19
19
  import traceback
20
20
  import typing
21
- import warnings
22
21
  from datetime import datetime
23
22
  from os import path, remove
24
23
  from typing import Dict, List, Optional, Union
24
+ from urllib.parse import urlparse
25
25
 
26
26
  import kfp
27
27
  import requests
@@ -30,6 +30,7 @@ import semver
30
30
  import mlrun
31
31
  import mlrun.common.schemas
32
32
  import mlrun.model_monitoring.model_endpoint
33
+ import mlrun.platforms
33
34
  import mlrun.projects
34
35
  from mlrun.errors import MLRunInvalidArgumentError, err_to_str
35
36
 
@@ -106,11 +107,7 @@ class HTTPRunDB(RunDBInterface):
106
107
  r"\/?run\/.+\/.+",
107
108
  ]
108
109
 
109
- def __init__(self, base_url, user="", password="", token=""):
110
- self.base_url = base_url
111
- self.user = user
112
- self.password = password
113
- self.token = token
110
+ def __init__(self, url):
114
111
  self.server_version = ""
115
112
  self.session = None
116
113
  self._wait_for_project_terminal_state_retry_interval = 3
@@ -119,6 +116,33 @@ class HTTPRunDB(RunDBInterface):
119
116
  self.client_version = version.Version().get()["version"]
120
117
  self.python_version = str(version.Version().get_python_version())
121
118
 
119
+ self._enrich_and_validate(url)
120
+
121
+ def _enrich_and_validate(self, url):
122
+ parsed_url = urlparse(url)
123
+ scheme = parsed_url.scheme.lower()
124
+ if scheme not in ("http", "https"):
125
+ raise ValueError(
126
+ f"Invalid URL scheme {scheme} for HTTPRunDB, only http(s) is supported"
127
+ )
128
+
129
+ endpoint = parsed_url.hostname
130
+ if parsed_url.port:
131
+ endpoint += f":{parsed_url.port}"
132
+ base_url = f"{parsed_url.scheme}://{endpoint}{parsed_url.path}"
133
+
134
+ username = parsed_url.username or config.httpdb.user
135
+ password = parsed_url.password or config.httpdb.password
136
+
137
+ username, password, token = mlrun.platforms.add_or_refresh_credentials(
138
+ parsed_url.hostname, username, password, config.httpdb.token
139
+ )
140
+
141
+ self.base_url = base_url
142
+ self.user = username
143
+ self.password = password
144
+ self.token = token
145
+
122
146
  def __repr__(self):
123
147
  cls = self.__class__.__name__
124
148
  return f"{cls}({self.base_url!r})"
@@ -415,6 +439,10 @@ class HTTPRunDB(RunDBInterface):
415
439
  )
416
440
  config.function = server_cfg.get("function") or config.function
417
441
  config.httpdb.logs = server_cfg.get("logs") or config.httpdb.logs
442
+ config.model_endpoint_monitoring.store_type = (
443
+ server_cfg.get("model_endpoint_monitoring_store_type")
444
+ or config.model_endpoint_monitoring.store_type
445
+ )
418
446
 
419
447
  except Exception as exc:
420
448
  logger.warning(
@@ -1352,8 +1380,6 @@ class HTTPRunDB(RunDBInterface):
1352
1380
  namespace=None,
1353
1381
  artifact_path=None,
1354
1382
  ops=None,
1355
- # TODO: deprecated, remove in 1.5.0
1356
- ttl=None,
1357
1383
  cleanup_ttl=None,
1358
1384
  ):
1359
1385
  """Submit a KFP pipeline for execution.
@@ -1366,27 +1392,17 @@ class HTTPRunDB(RunDBInterface):
1366
1392
  :param namespace: Kubernetes namespace to execute the pipeline in.
1367
1393
  :param artifact_path: A path to artifacts used by this pipeline.
1368
1394
  :param ops: Transformers to apply on all ops in the pipeline.
1369
- :param ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the workflow
1370
- and all its resources are deleted) (deprecated, use cleanup_ttl instead)
1371
1395
  :param cleanup_ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
1372
1396
  workflow and all its resources are deleted)
1373
1397
  """
1374
1398
 
1375
- if ttl:
1376
- warnings.warn(
1377
- "'ttl' is deprecated, use 'cleanup_ttl' instead. "
1378
- "This will be removed in 1.5.0",
1379
- # TODO: Remove this in 1.5.0
1380
- FutureWarning,
1381
- )
1382
-
1383
1399
  if isinstance(pipeline, str):
1384
1400
  pipe_file = pipeline
1385
1401
  else:
1386
1402
  pipe_file = tempfile.NamedTemporaryFile(suffix=".yaml", delete=False).name
1387
1403
  conf = new_pipe_metadata(
1388
1404
  artifact_path=artifact_path,
1389
- cleanup_ttl=cleanup_ttl or ttl,
1405
+ cleanup_ttl=cleanup_ttl,
1390
1406
  op_transformers=ops,
1391
1407
  )
1392
1408
  kfp.compiler.Compiler().compile(
@@ -3084,6 +3100,146 @@ class HTTPRunDB(RunDBInterface):
3084
3100
  },
3085
3101
  )
3086
3102
 
3103
+ def store_run_notifications(
3104
+ self,
3105
+ notification_objects: typing.List[mlrun.model.Notification],
3106
+ run_uid: str,
3107
+ project: str = None,
3108
+ mask_params: bool = True,
3109
+ ):
3110
+ """
3111
+ For internal use.
3112
+ The notification mechanism may run "locally" for certain runtimes.
3113
+ However, the updates occur in the API so nothing to do here.
3114
+ """
3115
+ pass
3116
+
3117
+ def submit_workflow(
3118
+ self,
3119
+ project: str,
3120
+ name: str,
3121
+ workflow_spec: Union[
3122
+ mlrun.projects.pipelines.WorkflowSpec,
3123
+ mlrun.common.schemas.WorkflowSpec,
3124
+ dict,
3125
+ ],
3126
+ arguments: Optional[Dict] = None,
3127
+ artifact_path: Optional[str] = None,
3128
+ source: Optional[str] = None,
3129
+ run_name: Optional[str] = None,
3130
+ namespace: Optional[str] = None,
3131
+ ):
3132
+ """
3133
+ Submitting workflow for a remote execution.
3134
+
3135
+ :param project: project name
3136
+ :param name: workflow name
3137
+ :param workflow_spec: the workflow spec to execute
3138
+ :param arguments: arguments for the workflow
3139
+ :param artifact_path: artifact target path of the workflow
3140
+ :param source: source url of the project
3141
+ :param run_name: run name to override the default: 'workflow-runner-<workflow name>'
3142
+ :param namespace: kubernetes namespace if other than default
3143
+
3144
+ :returns: :py:class:`~mlrun.common.schemas.WorkflowResponse`.
3145
+ """
3146
+ image = (
3147
+ workflow_spec.image
3148
+ if hasattr(workflow_spec, "image")
3149
+ else workflow_spec.get("image", None)
3150
+ )
3151
+ req = {
3152
+ "arguments": arguments,
3153
+ "artifact_path": artifact_path,
3154
+ "source": source,
3155
+ "run_name": run_name,
3156
+ "namespace": namespace,
3157
+ }
3158
+ if isinstance(workflow_spec, mlrun.common.schemas.WorkflowSpec):
3159
+ req["spec"] = workflow_spec.dict()
3160
+ elif isinstance(workflow_spec, mlrun.projects.pipelines.WorkflowSpec):
3161
+ req["spec"] = workflow_spec.to_dict()
3162
+ else:
3163
+ req["spec"] = workflow_spec
3164
+ req["spec"]["image"] = image
3165
+ response = self.api_call(
3166
+ "POST",
3167
+ f"projects/{project}/workflows/{name}/submit",
3168
+ json=req,
3169
+ )
3170
+ return mlrun.common.schemas.WorkflowResponse(**response.json())
3171
+
3172
+ def get_workflow_id(
3173
+ self,
3174
+ project: str,
3175
+ name: str,
3176
+ run_id: str,
3177
+ engine: str = "",
3178
+ ):
3179
+ """
3180
+ Retrieve workflow id from the uid of the workflow runner.
3181
+
3182
+ :param project: project name
3183
+ :param name: workflow name
3184
+ :param run_id: the id of the workflow runner - the job that runs the workflow
3185
+ :param engine: pipeline runner
3186
+
3187
+ :returns: :py:class:`~mlrun.common.schemas.GetWorkflowResponse`.
3188
+ """
3189
+ params = {}
3190
+ if engine:
3191
+ params["engine"] = engine
3192
+ response = self.api_call(
3193
+ "GET",
3194
+ f"projects/{project}/workflows/{name}/runs/{run_id}",
3195
+ params=params,
3196
+ )
3197
+ return mlrun.common.schemas.GetWorkflowResponse(**response.json())
3198
+
3199
+ def load_project(
3200
+ self,
3201
+ name: str,
3202
+ url: str,
3203
+ secrets: Optional[Dict] = None,
3204
+ save_secrets: bool = True,
3205
+ ) -> str:
3206
+ """
3207
+ Loading a project remotely from the given source.
3208
+ :param name: project name
3209
+ :param url: git or tar.gz or .zip sources archive path e.g.:
3210
+ git://github.com/mlrun/demo-xgb-project.git
3211
+ http://mysite/archived-project.zip
3212
+ The git project should include the project yaml file.
3213
+ :param secrets: Secrets to store in project in order to load it from the provided url.
3214
+ For more information see :py:func:`mlrun.load_project` function.
3215
+ :param save_secrets: Whether to store secrets in the loaded project.
3216
+ Setting to False will cause waiting for the process completion.
3217
+
3218
+ :returns: The terminal state of load project process.
3219
+ """
3220
+ params = {"url": url}
3221
+ body = None
3222
+ if secrets:
3223
+ provider = mlrun.common.schemas.SecretProviderName.kubernetes
3224
+ secrets_input = mlrun.common.schemas.SecretsData(
3225
+ provider=provider, secrets=secrets
3226
+ )
3227
+ body = secrets_input.dict()
3228
+ response = self.api_call(
3229
+ "POST", f"projects/{name}/load", params=params, body=dict_to_json(body)
3230
+ )
3231
+ response = response.json()
3232
+ run = mlrun.RunObject.from_dict(response["data"])
3233
+ state, _ = run.logs()
3234
+
3235
+ if secrets and not save_secrets:
3236
+ self.delete_project_secrets(project=name, secrets=list(secrets.keys()))
3237
+ if state != "completed":
3238
+ logger.error("Load project task failed, deleting project")
3239
+ self.delete_project(name, mlrun.common.schemas.DeletionStrategy.cascade)
3240
+
3241
+ return state
3242
+
3087
3243
 
3088
3244
  def _as_json(obj):
3089
3245
  fn = getattr(obj, "to_json", None)
mlrun/db/nopdb.py CHANGED
@@ -31,7 +31,7 @@ class NopDB(RunDBInterface):
31
31
  def __getattribute__(self, attr):
32
32
  def nop(*args, **kwargs):
33
33
  env_var_message = (
34
- "MLRUN_DBPATH is not set. Set this environment variable to the URL of the API "
34
+ "MLRUN_DBPATH is misconfigured. Set this environment variable to the URL of the API "
35
35
  "server in order to connect"
36
36
  )
37
37
  if config.httpdb.nop_db.raise_error:
@@ -45,7 +45,8 @@ class NopDB(RunDBInterface):
45
45
 
46
46
  return
47
47
 
48
- if attr == "connect":
48
+ # ignore __class__ because __getattribute__ overrides the parent class's method and it spams logs
49
+ if attr in ["connect", "__class__"]:
49
50
  return super().__getattribute__(attr)
50
51
  else:
51
52
  nop()
@@ -93,6 +94,7 @@ class NopDB(RunDBInterface):
93
94
  mlrun.common.schemas.OrderType, str
94
95
  ] = mlrun.common.schemas.OrderType.desc,
95
96
  max_partitions: int = 0,
97
+ with_notifications: bool = False,
96
98
  ):
97
99
  pass
98
100
 
mlrun/execution.py CHANGED
@@ -1038,8 +1038,10 @@ class MLClientCtx(object):
1038
1038
 
1039
1039
  def _update_run(self, commit=False, message=""):
1040
1040
  """
1041
- update the required fields in the run object (using mlrun.utils.helpers.update_in)
1042
- instead of overwriting existing
1041
+ update the required fields in the run object instead of overwriting existing values with empty ones
1042
+
1043
+ :param commit: commit the changes to the DB if autocommit is not set or update the tmpfile alone
1044
+ :param message: commit message
1043
1045
  """
1044
1046
  self._merge_tmpfile()
1045
1047
  if commit or self._autocommit:
@@ -53,6 +53,7 @@ from .feature_set import FeatureSet
53
53
  from .feature_vector import (
54
54
  FeatureVector,
55
55
  FixedWindowType,
56
+ JoinGraph,
56
57
  OfflineVectorResponse,
57
58
  OnlineVectorService,
58
59
  )
@@ -35,7 +35,6 @@ from ..datastore.targets import (
35
35
  validate_target_list,
36
36
  validate_target_paths_for_engine,
37
37
  )
38
- from ..db import RunDBError
39
38
  from ..model import DataSource, DataTargetBase
40
39
  from ..runtimes import RuntimeKinds
41
40
  from ..runtimes.function_reference import FunctionReference
@@ -417,7 +416,7 @@ def ingest(
417
416
  _, stripped_name = parse_store_uri(featureset)
418
417
  try:
419
418
  featureset = get_feature_set_by_uri(stripped_name)
420
- except RunDBError as exc:
419
+ except mlrun.db.RunDBError as exc:
421
420
  # TODO: this handling is needed because the generic httpdb error handling doesn't raise the correct
422
421
  # error class and doesn't propagate the correct message, until it solved we're manually handling this
423
422
  # case to give better user experience, remove this when the error handling is fixed.
@@ -418,16 +418,6 @@ class FeatureSet(ModelObj):
418
418
  fullname += ":" + self._metadata.tag
419
419
  return fullname
420
420
 
421
- def _override_run_db(
422
- self,
423
- session,
424
- ):
425
- # Import here, since this method only runs in API context. If this import was global, client would need
426
- # API requirements and would fail.
427
- from ..api.api.utils import get_run_db_instance
428
-
429
- self._run_db = get_run_db_instance(session)
430
-
431
421
  def _get_run_db(self):
432
422
  if self._run_db:
433
423
  return self._run_db