mlrun 1.5.0rc1__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (119) hide show
  1. mlrun/__init__.py +2 -35
  2. mlrun/__main__.py +1 -40
  3. mlrun/api/api/api.py +6 -0
  4. mlrun/api/api/endpoints/feature_store.py +0 -4
  5. mlrun/api/api/endpoints/files.py +14 -2
  6. mlrun/api/api/endpoints/functions.py +6 -1
  7. mlrun/api/api/endpoints/logs.py +17 -3
  8. mlrun/api/api/endpoints/pipelines.py +1 -5
  9. mlrun/api/api/endpoints/projects.py +88 -0
  10. mlrun/api/api/endpoints/runs.py +48 -6
  11. mlrun/api/api/endpoints/workflows.py +355 -0
  12. mlrun/api/api/utils.py +1 -1
  13. mlrun/api/crud/__init__.py +1 -0
  14. mlrun/api/crud/client_spec.py +3 -0
  15. mlrun/api/crud/model_monitoring/deployment.py +36 -7
  16. mlrun/api/crud/model_monitoring/grafana.py +1 -1
  17. mlrun/api/crud/model_monitoring/helpers.py +32 -2
  18. mlrun/api/crud/model_monitoring/model_endpoints.py +27 -5
  19. mlrun/api/crud/notifications.py +9 -4
  20. mlrun/api/crud/pipelines.py +4 -9
  21. mlrun/api/crud/runtime_resources.py +4 -3
  22. mlrun/api/crud/secrets.py +21 -0
  23. mlrun/api/crud/workflows.py +352 -0
  24. mlrun/api/db/base.py +16 -1
  25. mlrun/api/db/sqldb/db.py +97 -16
  26. mlrun/api/launcher.py +26 -7
  27. mlrun/api/main.py +3 -4
  28. mlrun/{mlutils → api/rundb}/__init__.py +2 -6
  29. mlrun/{db → api/rundb}/sqldb.py +35 -83
  30. mlrun/api/runtime_handlers/__init__.py +56 -0
  31. mlrun/api/runtime_handlers/base.py +1247 -0
  32. mlrun/api/runtime_handlers/daskjob.py +209 -0
  33. mlrun/api/runtime_handlers/kubejob.py +37 -0
  34. mlrun/api/runtime_handlers/mpijob.py +147 -0
  35. mlrun/api/runtime_handlers/remotesparkjob.py +29 -0
  36. mlrun/api/runtime_handlers/sparkjob.py +148 -0
  37. mlrun/api/utils/builder.py +1 -4
  38. mlrun/api/utils/clients/chief.py +14 -0
  39. mlrun/api/utils/scheduler.py +98 -15
  40. mlrun/api/utils/singletons/db.py +4 -0
  41. mlrun/artifacts/manager.py +1 -2
  42. mlrun/common/schemas/__init__.py +6 -0
  43. mlrun/common/schemas/auth.py +4 -1
  44. mlrun/common/schemas/client_spec.py +1 -1
  45. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  46. mlrun/common/schemas/model_monitoring/constants.py +11 -0
  47. mlrun/common/schemas/project.py +1 -0
  48. mlrun/common/schemas/runs.py +1 -8
  49. mlrun/common/schemas/schedule.py +1 -8
  50. mlrun/common/schemas/workflow.py +54 -0
  51. mlrun/config.py +42 -40
  52. mlrun/datastore/sources.py +1 -1
  53. mlrun/db/__init__.py +4 -68
  54. mlrun/db/base.py +12 -0
  55. mlrun/db/factory.py +65 -0
  56. mlrun/db/httpdb.py +175 -19
  57. mlrun/db/nopdb.py +4 -2
  58. mlrun/execution.py +4 -2
  59. mlrun/feature_store/__init__.py +1 -0
  60. mlrun/feature_store/api.py +1 -2
  61. mlrun/feature_store/feature_set.py +0 -10
  62. mlrun/feature_store/feature_vector.py +340 -2
  63. mlrun/feature_store/ingestion.py +5 -10
  64. mlrun/feature_store/retrieval/base.py +118 -104
  65. mlrun/feature_store/retrieval/dask_merger.py +17 -10
  66. mlrun/feature_store/retrieval/job.py +4 -1
  67. mlrun/feature_store/retrieval/local_merger.py +18 -18
  68. mlrun/feature_store/retrieval/spark_merger.py +21 -14
  69. mlrun/feature_store/retrieval/storey_merger.py +21 -15
  70. mlrun/kfpops.py +3 -9
  71. mlrun/launcher/base.py +3 -3
  72. mlrun/launcher/client.py +3 -2
  73. mlrun/launcher/factory.py +16 -13
  74. mlrun/lists.py +0 -11
  75. mlrun/model.py +9 -15
  76. mlrun/model_monitoring/helpers.py +15 -25
  77. mlrun/model_monitoring/model_monitoring_batch.py +72 -4
  78. mlrun/model_monitoring/prometheus.py +219 -0
  79. mlrun/model_monitoring/stores/__init__.py +15 -9
  80. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +3 -1
  81. mlrun/model_monitoring/stream_processing.py +181 -29
  82. mlrun/package/packager.py +6 -8
  83. mlrun/package/packagers/default_packager.py +121 -10
  84. mlrun/platforms/__init__.py +0 -2
  85. mlrun/platforms/iguazio.py +0 -56
  86. mlrun/projects/pipelines.py +57 -158
  87. mlrun/projects/project.py +6 -32
  88. mlrun/render.py +1 -1
  89. mlrun/run.py +2 -124
  90. mlrun/runtimes/__init__.py +6 -42
  91. mlrun/runtimes/base.py +26 -1241
  92. mlrun/runtimes/daskjob.py +2 -198
  93. mlrun/runtimes/function.py +16 -5
  94. mlrun/runtimes/kubejob.py +5 -29
  95. mlrun/runtimes/mpijob/__init__.py +2 -2
  96. mlrun/runtimes/mpijob/abstract.py +10 -1
  97. mlrun/runtimes/mpijob/v1.py +0 -76
  98. mlrun/runtimes/mpijob/v1alpha1.py +1 -74
  99. mlrun/runtimes/nuclio.py +3 -2
  100. mlrun/runtimes/pod.py +0 -10
  101. mlrun/runtimes/remotesparkjob.py +1 -15
  102. mlrun/runtimes/serving.py +1 -1
  103. mlrun/runtimes/sparkjob/__init__.py +0 -1
  104. mlrun/runtimes/sparkjob/abstract.py +4 -131
  105. mlrun/serving/states.py +1 -1
  106. mlrun/utils/db.py +0 -2
  107. mlrun/utils/helpers.py +19 -13
  108. mlrun/utils/notifications/notification_pusher.py +5 -25
  109. mlrun/utils/regex.py +7 -2
  110. mlrun/utils/version/version.json +2 -2
  111. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/METADATA +24 -23
  112. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/RECORD +116 -107
  113. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/WHEEL +1 -1
  114. mlrun/mlutils/data.py +0 -160
  115. mlrun/mlutils/models.py +0 -78
  116. mlrun/mlutils/plots.py +0 -902
  117. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/LICENSE +0 -0
  118. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/entry_points.txt +0 -0
  119. {mlrun-1.5.0rc1.dist-info → mlrun-1.5.0rc2.dist-info}/top_level.txt +0 -0
@@ -142,14 +142,14 @@ class Scheduler:
142
142
  self._enrich_schedule_notifications(project, name, scheduled_object)
143
143
 
144
144
  get_db().create_schedule(
145
- db_session,
146
- project,
147
- name,
148
- kind,
149
- scheduled_object,
150
- cron_trigger,
151
- concurrency_limit,
152
- labels,
145
+ session=db_session,
146
+ project=project,
147
+ name=name,
148
+ kind=kind,
149
+ scheduled_object=scheduled_object,
150
+ cron_trigger=cron_trigger,
151
+ concurrency_limit=concurrency_limit,
152
+ labels=labels,
153
153
  )
154
154
  job = self._create_schedule_in_scheduler(
155
155
  project,
@@ -217,13 +217,13 @@ class Scheduler:
217
217
  self._enrich_schedule_notifications(project, name, scheduled_object)
218
218
 
219
219
  get_db().update_schedule(
220
- db_session,
221
- project,
222
- name,
223
- scheduled_object,
224
- cron_trigger,
225
- labels,
226
- concurrency_limit,
220
+ session=db_session,
221
+ project=project,
222
+ name=name,
223
+ scheduled_object=scheduled_object,
224
+ cron_trigger=cron_trigger,
225
+ labels=labels,
226
+ concurrency_limit=concurrency_limit,
227
227
  )
228
228
  db_schedule = get_db().get_schedule(db_session, project, name)
229
229
 
@@ -303,6 +303,89 @@ class Scheduler:
303
303
  )
304
304
  get_db().delete_schedules(db_session, project)
305
305
 
306
+ @mlrun.api.utils.helpers.ensure_running_on_chief
307
+ def store_schedule(
308
+ self,
309
+ db_session: Session,
310
+ auth_info: mlrun.common.schemas.AuthInfo,
311
+ project: str,
312
+ name: str,
313
+ scheduled_object: Union[Dict, Callable] = None,
314
+ cron_trigger: Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
315
+ labels: Dict = None,
316
+ concurrency_limit: int = None,
317
+ kind: mlrun.common.schemas.ScheduleKinds = None,
318
+ ):
319
+ if isinstance(cron_trigger, str):
320
+ cron_trigger = mlrun.common.schemas.ScheduleCronTrigger.from_crontab(
321
+ cron_trigger
322
+ )
323
+
324
+ if cron_trigger is not None:
325
+ self._validate_cron_trigger(cron_trigger)
326
+
327
+ logger.debug(
328
+ "Storing schedule",
329
+ project=project,
330
+ name=name,
331
+ kind=kind,
332
+ scheduled_object=scheduled_object,
333
+ cron_trigger=cron_trigger,
334
+ labels=labels,
335
+ concurrency_limit=concurrency_limit,
336
+ )
337
+
338
+ if not kind:
339
+ # TODO: Need to think of a way to not use `get_schedule`
340
+ # in this function or in `get_db().store_function()` in this flow
341
+ # because we must have kind to ensure that auth info has access key.
342
+ db_schedule = get_db().get_schedule(
343
+ db_session, project, name, raise_on_not_found=False
344
+ )
345
+ kind = db_schedule.kind
346
+
347
+ self._ensure_auth_info_has_access_key(auth_info, kind)
348
+ secret_name = self._store_schedule_secrets_using_auth_secret(auth_info)
349
+ labels = self._append_access_key_secret_to_labels(labels, secret_name)
350
+ self._enrich_schedule_notifications(project, name, scheduled_object)
351
+
352
+ db_schedule = get_db().store_schedule(
353
+ session=db_session,
354
+ project=project,
355
+ name=name,
356
+ kind=kind,
357
+ scheduled_object=scheduled_object,
358
+ cron_trigger=cron_trigger,
359
+ labels=labels,
360
+ concurrency_limit=concurrency_limit,
361
+ )
362
+ if db_schedule:
363
+ updated_schedule = self._transform_and_enrich_db_schedule(
364
+ db_session, db_schedule
365
+ )
366
+
367
+ job = self._update_schedule_in_scheduler(
368
+ project,
369
+ name,
370
+ updated_schedule.kind,
371
+ updated_schedule.scheduled_object,
372
+ updated_schedule.cron_trigger,
373
+ updated_schedule.concurrency_limit,
374
+ auth_info,
375
+ )
376
+ else:
377
+ job = self._create_schedule_in_scheduler(
378
+ project,
379
+ name,
380
+ kind,
381
+ scheduled_object,
382
+ cron_trigger,
383
+ concurrency_limit,
384
+ auth_info,
385
+ )
386
+
387
+ self.update_schedule_next_run_time(db_session, name, project, job)
388
+
306
389
  def _remove_schedule_scheduler_resources(self, db_session: Session, project, name):
307
390
  self._remove_schedule_from_scheduler(project, name)
308
391
  # This is kept for backwards compatibility - if schedule was using the "old" format of storing secrets, then
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
+ import mlrun.db
15
16
  from mlrun.api.db.base import DBInterface
16
17
  from mlrun.api.db.sqldb.db import SQLDB
17
18
  from mlrun.common.db.sql_session import create_session
@@ -34,6 +35,9 @@ def initialize_db(override_db=None):
34
35
  return
35
36
  logger.info("Creating sql db")
36
37
  db = SQLDB(config.httpdb.dsn)
38
+ # set the run db path to the sql db dsn
39
+ mlrun.db.get_or_set_dburl(config.httpdb.dsn)
40
+
37
41
  db_session = None
38
42
  try:
39
43
  db_session = create_session()
@@ -17,7 +17,6 @@ from os.path import isdir
17
17
 
18
18
  import mlrun.config
19
19
 
20
- from ..db import RunDBInterface
21
20
  from ..utils import (
22
21
  is_legacy_artifact,
23
22
  is_relative_path,
@@ -111,7 +110,7 @@ def dict_to_artifact(struct: dict) -> Artifact:
111
110
  class ArtifactManager:
112
111
  def __init__(
113
112
  self,
114
- db: RunDBInterface = None,
113
+ db: "mlrun.db.RunDBInterface" = None,
115
114
  calc_hash=True,
116
115
  ):
117
116
  self.calc_hash = calc_hash
@@ -171,3 +171,9 @@ from .secret import (
171
171
  UserSecretCreationRequest,
172
172
  )
173
173
  from .tag import Tag, TagObjects
174
+ from .workflow import (
175
+ GetWorkflowResponse,
176
+ WorkflowRequest,
177
+ WorkflowResponse,
178
+ WorkflowSpec,
179
+ )
@@ -57,6 +57,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
57
57
  model_endpoint = "model-endpoint"
58
58
  pipeline = "pipeline"
59
59
  hub_source = "hub-source"
60
+ workflow = "workflow"
60
61
 
61
62
  def to_resource_string(
62
63
  self,
@@ -87,8 +88,10 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
87
88
  AuthorizationResourceTypes.pipeline: "/projects/{project_name}/pipelines/{resource_name}",
88
89
  # Hub sources are not project-scoped, and auth is globally on the sources endpoint.
89
90
  # TODO - this was reverted to /marketplace since MLRun needs to be able to run with old igz versions. Once
90
- # we only have support for igz versions that support /hub (>=3.5.4), change this to "/hub/sources".
91
+ # we only have support for igz versions that support /hub (>=3.5.4), change this to "/hub/sources".
91
92
  AuthorizationResourceTypes.hub_source: "/marketplace/sources",
93
+ # workflow define how to run a pipeline and can be considered as the specification of a pipeline.
94
+ AuthorizationResourceTypes.workflow: "/projects/{project_name}/workflows/{resource_name}",
92
95
  }[self].format(project_name=project_name, resource_name=resource_name)
93
96
 
94
97
 
@@ -57,7 +57,7 @@ class ClientSpec(pydantic.BaseModel):
57
57
  redis_url: typing.Optional[str]
58
58
  redis_type: typing.Optional[str]
59
59
  sql_url: typing.Optional[str]
60
-
60
+ model_endpoint_monitoring_store_type: typing.Optional[str]
61
61
  # ce_mode is deprecated, we will use the full ce config instead and ce_mode will be removed in 1.6.0
62
62
  ce_mode: typing.Optional[str]
63
63
  ce: typing.Optional[dict]
@@ -25,6 +25,7 @@ from .constants import (
25
25
  ModelMonitoringMode,
26
26
  ModelMonitoringStoreKinds,
27
27
  ProjectSecretKeys,
28
+ PrometheusMetric,
28
29
  TimeSeriesTarget,
29
30
  VersionedModel,
30
31
  )
@@ -49,6 +49,7 @@ class EventFieldType:
49
49
  ENTITIES = "entities"
50
50
  FIRST_REQUEST = "first_request"
51
51
  LAST_REQUEST = "last_request"
52
+ METRIC = "metric"
52
53
  METRICS = "metrics"
53
54
  TIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
54
55
  BATCH_INTERVALS_DICT = "batch_intervals_dict"
@@ -71,6 +72,7 @@ class EventFieldType:
71
72
  MONITOR_CONFIGURATION = "monitor_configuration"
72
73
  FEATURE_SET_URI = "monitoring_feature_set_uri"
73
74
  ALGORITHM = "algorithm"
75
+ VALUE = "value"
74
76
 
75
77
 
76
78
  class EventLiveStats:
@@ -129,6 +131,15 @@ class EndpointType(enum.IntEnum):
129
131
  LEAF_EP = 3 # end point that is a child of a router
130
132
 
131
133
 
134
+ class PrometheusMetric:
135
+ PREDICTIONS_TOTAL = "predictions_total"
136
+ MODEL_LATENCY_SECONDS = "model_latency_seconds"
137
+ INCOME_FEATURES = "income_features"
138
+ ERRORS_TOTAL = "errors_total"
139
+ DRIFT_METRICS = "drift_metrics"
140
+ DRIFT_STATUS = "drift_status"
141
+
142
+
132
143
  @dataclass
133
144
  class FunctionURI:
134
145
  project: str
@@ -84,6 +84,7 @@ class ProjectSpec(pydantic.BaseModel):
84
84
  origin_url: typing.Optional[str] = None
85
85
  desired_state: typing.Optional[ProjectDesiredState] = ProjectDesiredState.online
86
86
  custom_packagers: typing.Optional[typing.List[typing.Tuple[str, bool]]] = None
87
+ default_image: typing.Optional[str] = None
87
88
 
88
89
  class Config:
89
90
  extra = pydantic.Extra.allow
@@ -14,17 +14,10 @@
14
14
 
15
15
  import typing
16
16
 
17
- # TODO: When we remove support for python 3.7, we can use Literal from the typing package.
18
- # Remove the following try/except block with import from typing_extensions.
19
- try:
20
- from typing import Literal
21
- except ImportError:
22
- from typing_extensions import Literal
23
-
24
17
  import pydantic
25
18
 
26
19
 
27
20
  class RunIdentifier(pydantic.BaseModel):
28
- kind: Literal["run"] = "run"
21
+ kind: typing.Literal["run"] = "run"
29
22
  uid: typing.Optional[str]
30
23
  iter: typing.Optional[int]
@@ -13,14 +13,7 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  from datetime import datetime
16
- from typing import Any, List, Optional, Union
17
-
18
- # TODO: When we remove support for python 3.7, we can use Literal from the typing package.
19
- # Remove the following try/except block with import from typing_extensions.
20
- try:
21
- from typing import Literal
22
- except ImportError:
23
- from typing_extensions import Literal
16
+ from typing import Any, List, Literal, Optional, Union
24
17
 
25
18
  from pydantic import BaseModel
26
19
 
@@ -0,0 +1,54 @@
1
+ # Copyright 2018 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ import typing
16
+
17
+ import pydantic
18
+
19
+ from .schedule import ScheduleCronTrigger
20
+
21
+
22
+ class WorkflowSpec(pydantic.BaseModel):
23
+ name: str
24
+ engine: typing.Optional[str] = None
25
+ code: typing.Optional[str] = None
26
+ path: typing.Optional[str] = None
27
+ args: typing.Optional[dict] = None
28
+ handler: typing.Optional[str] = None
29
+ ttl: typing.Optional[int] = None
30
+ args_schema: typing.Optional[list] = None
31
+ schedule: typing.Union[str, ScheduleCronTrigger] = None
32
+ run_local: typing.Optional[bool] = None
33
+ image: typing.Optional[str] = None
34
+
35
+
36
+ class WorkflowRequest(pydantic.BaseModel):
37
+ spec: typing.Optional[WorkflowSpec] = None
38
+ arguments: typing.Optional[typing.Dict] = None
39
+ artifact_path: typing.Optional[str] = None
40
+ source: typing.Optional[str] = None
41
+ run_name: typing.Optional[str] = None
42
+ namespace: typing.Optional[str] = None
43
+
44
+
45
+ class WorkflowResponse(pydantic.BaseModel):
46
+ project: str = None
47
+ name: str = None
48
+ status: str = None
49
+ run_id: typing.Optional[str] = None
50
+ schedule: typing.Union[str, ScheduleCronTrigger] = None
51
+
52
+
53
+ class GetWorkflowResponse(pydantic.BaseModel):
54
+ workflow_id: str = None
mlrun/config.py CHANGED
@@ -27,8 +27,8 @@ import copy
27
27
  import json
28
28
  import os
29
29
  import typing
30
- import urllib.parse
31
30
  from collections.abc import Mapping
31
+ from datetime import timedelta
32
32
  from distutils.util import strtobool
33
33
  from os.path import expanduser
34
34
  from threading import Lock
@@ -149,7 +149,7 @@ default_config = {
149
149
  "timeout_mode": "enabled",
150
150
  # timeout in seconds to wait for background task to be updated / finished by the worker responsible for the task
151
151
  "default_timeouts": {
152
- "operations": {"migrations": "3600"},
152
+ "operations": {"migrations": "3600", "load_project": "60"},
153
153
  "runtimes": {"dask": "600"},
154
154
  },
155
155
  },
@@ -417,6 +417,7 @@ default_config = {
417
417
  "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.mlrun.svc.cluster.local:8080",
418
418
  "batch_processing_function_branch": "master",
419
419
  "parquet_batching_max_events": 10000,
420
+ "parquet_batching_timeout_secs": timedelta(minutes=30).total_seconds(),
420
421
  # See mlrun.model_monitoring.stores.ModelEndpointStoreType for available options
421
422
  "store_type": "v3io-nosql",
422
423
  "endpoint_store_connection": "",
@@ -516,7 +517,11 @@ default_config = {
516
517
  "debug": {
517
518
  "expose_internal_api_endpoints": False,
518
519
  },
519
- "default_workflow_runner_name": "workflow-runner-{}",
520
+ "workflows": {
521
+ "default_workflow_runner_name": "workflow-runner-{}",
522
+ # Default timeout seconds for retrieving workflow id after execution:
523
+ "timeouts": {"local": 120, "kfp": 30},
524
+ },
520
525
  "log_collector": {
521
526
  "address": "localhost:8282",
522
527
  # log collection mode can be one of: "sidecar", "legacy", "best-effort"
@@ -776,7 +781,6 @@ class Config:
776
781
  return semver.VersionInfo.parse(f"{semver_compatible_igz_version}.0")
777
782
 
778
783
  def verify_security_context_enrichment_mode_is_allowed(self):
779
-
780
784
  # TODO: move SecurityContextEnrichmentModes to a different package so that we could use it here without
781
785
  # importing mlrun.api
782
786
  if config.function.spec.security_context.enrichment_mode == "disabled":
@@ -933,36 +937,6 @@ class Config:
933
937
  # when dbpath is set we want to connect to it which will sync configuration from it to the client
934
938
  mlrun.db.get_run_db(value, force_reconnect=True)
935
939
 
936
- @property
937
- def iguazio_api_url(self):
938
- """
939
- we want to be able to run with old versions of the service who runs the API (which doesn't configure this
940
- value) so we're doing best effort to try and resolve it from other configurations
941
- TODO: Remove this hack when 0.6.x is old enough
942
- """
943
- if not self._iguazio_api_url:
944
- if self.httpdb.builder.docker_registry and self.igz_version:
945
- return self._extract_iguazio_api_from_docker_registry_url()
946
- return self._iguazio_api_url
947
-
948
- def _extract_iguazio_api_from_docker_registry_url(self):
949
- docker_registry_url = self.httpdb.builder.docker_registry
950
- # add schema otherwise parsing go wrong
951
- if "://" not in docker_registry_url:
952
- docker_registry_url = f"http://{docker_registry_url}"
953
- parsed_registry_url = urllib.parse.urlparse(docker_registry_url)
954
- registry_hostname = parsed_registry_url.hostname
955
- # replace the first domain section (app service name) with dashboard
956
- first_dot_index = registry_hostname.find(".")
957
- if first_dot_index < 0:
958
- # if not found it's not the format we know - can't resolve the api url from the registry url
959
- return ""
960
- return f"https://dashboard{registry_hostname[first_dot_index:]}"
961
-
962
- @iguazio_api_url.setter
963
- def iguazio_api_url(self, value):
964
- self._iguazio_api_url = value
965
-
966
940
  def is_api_running_on_k8s(self):
967
941
  # determine if the API service is attached to K8s cluster
968
942
  # when there is a cluster the .namespace is set
@@ -1045,6 +1019,40 @@ class Config:
1045
1019
  ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
1046
1020
  )
1047
1021
 
1022
+ def get_s3_storage_options(self) -> typing.Dict[str, typing.Any]:
1023
+ """
1024
+ Generate storage options dictionary as required for handling S3 path in fsspec. The model monitoring stream
1025
+ graph uses this method for generating the storage options for S3 parquet target path.
1026
+ :return: A storage options dictionary in which each key-value pair represents a particular configuration,
1027
+ such as endpoint_url or aws access key.
1028
+ """
1029
+ key = mlrun.get_secret_or_env("AWS_ACCESS_KEY_ID")
1030
+ secret = mlrun.get_secret_or_env("AWS_SECRET_ACCESS_KEY")
1031
+
1032
+ force_non_anonymous = mlrun.get_secret_or_env("S3_NON_ANONYMOUS")
1033
+ profile = mlrun.get_secret_or_env("AWS_PROFILE")
1034
+
1035
+ storage_options = dict(
1036
+ anon=not (force_non_anonymous or (key and secret)),
1037
+ key=key,
1038
+ secret=secret,
1039
+ )
1040
+
1041
+ endpoint_url = mlrun.get_secret_or_env("S3_ENDPOINT_URL")
1042
+ if endpoint_url:
1043
+ client_kwargs = {"endpoint_url": endpoint_url}
1044
+ storage_options["client_kwargs"] = client_kwargs
1045
+
1046
+ if profile:
1047
+ storage_options["profile"] = profile
1048
+
1049
+ return storage_options
1050
+
1051
+ def is_explicit_ack(self) -> bool:
1052
+ return self.httpdb.nuclio.explicit_ack == "enabled" and (
1053
+ not self.nuclio_version or self.nuclio_version >= "1.11.20"
1054
+ )
1055
+
1048
1056
 
1049
1057
  # Global configuration
1050
1058
  config = Config.from_dict(default_config)
@@ -1092,12 +1100,6 @@ def _do_populate(env=None, skip_errors=False):
1092
1100
  if data:
1093
1101
  config.update(data, skip_errors=skip_errors)
1094
1102
 
1095
- # HACK to enable config property to both have dynamic default and to use the value from dict/env like other
1096
- # configurations - we just need a key in the dict that is different than the property name, so simply adding prefix
1097
- # underscore
1098
- config._cfg["_iguazio_api_url"] = config._cfg["iguazio_api_url"]
1099
- del config._cfg["iguazio_api_url"]
1100
-
1101
1103
  _validate_config(config)
1102
1104
 
1103
1105
 
@@ -793,7 +793,7 @@ class OnlineSource(BaseSourceDriver):
793
793
  context=context,
794
794
  key_field=self.key_field,
795
795
  full_event=True,
796
- explicit_ack=mlrun.mlconf.httpdb.nuclio.explicit_ack == "enabled",
796
+ explicit_ack=mlrun.mlconf.is_explicit_ack(),
797
797
  **source_args,
798
798
  )
799
799
 
mlrun/db/__init__.py CHANGED
@@ -12,14 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  from os import environ
15
- from urllib.parse import urlparse
16
15
 
17
16
  from ..config import config
18
- from ..platforms import add_or_refresh_credentials
19
- from ..utils import logger
20
17
  from .base import RunDBError, RunDBInterface # noqa
21
- from .nopdb import NopDB
22
- from .sqldb import SQLDB
23
18
 
24
19
 
25
20
  def get_or_set_dburl(default=""):
@@ -29,69 +24,10 @@ def get_or_set_dburl(default=""):
29
24
  return config.dbpath
30
25
 
31
26
 
32
- def get_httpdb_kwargs(host, username, password):
33
- username = username or config.httpdb.user
34
- password = password or config.httpdb.password
35
-
36
- username, password, token = add_or_refresh_credentials(
37
- host, username, password, config.httpdb.token
38
- )
39
-
40
- return {
41
- "user": username,
42
- "password": password,
43
- "token": token,
44
- }
45
-
46
-
47
- _run_db = None
48
- _last_db_url = None
49
-
50
-
51
27
  def get_run_db(url="", secrets=None, force_reconnect=False):
52
28
  """Returns the runtime database"""
53
- global _run_db, _last_db_url
54
-
55
- if not url:
56
- url = get_or_set_dburl("./")
57
-
58
- if (
59
- _last_db_url is not None
60
- and url == _last_db_url
61
- and _run_db
62
- and not force_reconnect
63
- ):
64
- return _run_db
65
- _last_db_url = url
66
-
67
- parsed_url = urlparse(url)
68
- scheme = parsed_url.scheme.lower()
69
- kwargs = {}
70
- if "://" not in str(url) or scheme in ["file", "s3", "v3io", "v3ios"]:
71
- logger.warning(
72
- "Could not detect path to API server, not connected to API server!"
73
- )
74
- logger.warning(
75
- "MLRUN_DBPATH is not set. Set this environment variable to the URL of the API server"
76
- " in order to connect"
77
- )
78
- cls = NopDB
79
-
80
- elif scheme in ("http", "https"):
81
- # import here to avoid circular imports
82
- from .httpdb import HTTPRunDB
83
-
84
- cls = HTTPRunDB
85
- kwargs = get_httpdb_kwargs(
86
- parsed_url.hostname, parsed_url.username, parsed_url.password
87
- )
88
- endpoint = parsed_url.hostname
89
- if parsed_url.port:
90
- endpoint += f":{parsed_url.port}"
91
- url = f"{parsed_url.scheme}://{endpoint}{parsed_url.path}"
92
- else:
93
- cls = SQLDB
29
+ # import here to avoid circular import
30
+ import mlrun.db.factory
94
31
 
95
- _run_db = cls(url, **kwargs)
96
- _run_db.connect(secrets=secrets)
97
- return _run_db
32
+ run_db_factory = mlrun.db.factory.RunDBFactory()
33
+ return run_db_factory.create_run_db(url, secrets, force_reconnect)
mlrun/db/base.py CHANGED
@@ -621,3 +621,15 @@ class RunDBInterface(ABC):
621
621
  notifications: typing.List[mlrun.model.Notification],
622
622
  ):
623
623
  pass
624
+
625
+ def store_run_notifications(
626
+ self,
627
+ notification_objects: typing.List[mlrun.model.Notification],
628
+ run_uid: str,
629
+ project: str = None,
630
+ mask_params: bool = True,
631
+ ):
632
+ pass
633
+
634
+ def watch_log(self, uid, project="", watch=True, offset=0):
635
+ pass
mlrun/db/factory.py ADDED
@@ -0,0 +1,65 @@
1
+ # Copyright 2023 MLRun Authors
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from dependency_injector import containers, providers
15
+
16
+ import mlrun.db
17
+ import mlrun.db.httpdb
18
+ import mlrun.db.nopdb
19
+ import mlrun.utils.singleton
20
+ from mlrun.utils import logger
21
+
22
+
23
+ class RunDBFactory(
24
+ metaclass=mlrun.utils.singleton.AbstractSingleton,
25
+ ):
26
+ def __init__(self):
27
+ self._run_db = None
28
+ self._last_db_url = None
29
+ self._rundb_container = RunDBContainer()
30
+
31
+ def create_run_db(self, url="", secrets=None, force_reconnect=False):
32
+ """Returns the runtime database"""
33
+ if not url:
34
+ url = mlrun.db.get_or_set_dburl("./")
35
+
36
+ if (
37
+ self._last_db_url is not None
38
+ and url == self._last_db_url
39
+ and self._run_db
40
+ and not force_reconnect
41
+ ):
42
+ return self._run_db
43
+
44
+ self._last_db_url = url
45
+
46
+ if "://" not in str(url):
47
+ logger.warning(
48
+ "Could not detect path to API server, not connected to API server!"
49
+ )
50
+ logger.warning(
51
+ "MLRUN_DBPATH is misconfigured. Set this environment variable to the URL of the API server"
52
+ " in order to connect"
53
+ )
54
+ self._run_db = self._rundb_container.nop(url)
55
+
56
+ else:
57
+ self._run_db = self._rundb_container.run_db(url)
58
+
59
+ self._run_db.connect(secrets=secrets)
60
+ return self._run_db
61
+
62
+
63
+ class RunDBContainer(containers.DeclarativeContainer):
64
+ nop = providers.Factory(mlrun.db.nopdb.NopDB)
65
+ run_db = providers.Factory(mlrun.db.httpdb.HTTPRunDB)