PyPI - mlrun - Versions diffs - 1.4.0rc25__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl - Mend

mlrun 1.4.0rc25py3-none-any.whl → 1.5.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (184) hide show

mlrun/__init__.py +2 -35
mlrun/__main__.py +3 -41
mlrun/api/api/api.py +6 -0
mlrun/api/api/endpoints/feature_store.py +0 -4
mlrun/api/api/endpoints/files.py +14 -2
mlrun/api/api/endpoints/frontend_spec.py +2 -1
mlrun/api/api/endpoints/functions.py +95 -59
mlrun/api/api/endpoints/grafana_proxy.py +9 -9
mlrun/api/api/endpoints/logs.py +17 -3
mlrun/api/api/endpoints/model_endpoints.py +3 -2
mlrun/api/api/endpoints/pipelines.py +1 -5
mlrun/api/api/endpoints/projects.py +88 -0
mlrun/api/api/endpoints/runs.py +48 -6
mlrun/api/api/endpoints/submit.py +2 -1
mlrun/api/api/endpoints/workflows.py +355 -0
mlrun/api/api/utils.py +3 -4
mlrun/api/crud/__init__.py +1 -0
mlrun/api/crud/client_spec.py +6 -2
mlrun/api/crud/feature_store.py +5 -0
mlrun/api/crud/model_monitoring/__init__.py +1 -0
mlrun/api/crud/model_monitoring/deployment.py +497 -0
mlrun/api/crud/model_monitoring/grafana.py +96 -42
mlrun/api/crud/model_monitoring/helpers.py +159 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +202 -476
mlrun/api/crud/notifications.py +9 -4
mlrun/api/crud/pipelines.py +6 -11
mlrun/api/crud/projects.py +2 -2
mlrun/api/crud/runtime_resources.py +4 -3
mlrun/api/crud/runtimes/nuclio/helpers.py +5 -1
mlrun/api/crud/secrets.py +21 -0
mlrun/api/crud/workflows.py +352 -0
mlrun/api/db/base.py +16 -1
mlrun/api/db/init_db.py +2 -4
mlrun/api/db/session.py +1 -1
mlrun/api/db/sqldb/db.py +129 -31
mlrun/api/db/sqldb/models/models_mysql.py +15 -1
mlrun/api/db/sqldb/models/models_sqlite.py +16 -2
mlrun/api/launcher.py +38 -6
mlrun/api/main.py +3 -2
mlrun/api/rundb/__init__.py +13 -0
mlrun/{db → api/rundb}/sqldb.py +36 -84
mlrun/api/runtime_handlers/__init__.py +56 -0
mlrun/api/runtime_handlers/base.py +1247 -0
mlrun/api/runtime_handlers/daskjob.py +209 -0
mlrun/api/runtime_handlers/kubejob.py +37 -0
mlrun/api/runtime_handlers/mpijob.py +147 -0
mlrun/api/runtime_handlers/remotesparkjob.py +29 -0
mlrun/api/runtime_handlers/sparkjob.py +148 -0
mlrun/api/schemas/__init__.py +17 -6
mlrun/api/utils/builder.py +1 -4
mlrun/api/utils/clients/chief.py +14 -0
mlrun/api/utils/clients/iguazio.py +33 -33
mlrun/api/utils/clients/nuclio.py +2 -2
mlrun/api/utils/periodic.py +9 -2
mlrun/api/utils/projects/follower.py +14 -7
mlrun/api/utils/projects/leader.py +2 -1
mlrun/api/utils/projects/remotes/nop_follower.py +2 -2
mlrun/api/utils/projects/remotes/nop_leader.py +2 -2
mlrun/api/utils/runtimes/__init__.py +14 -0
mlrun/api/utils/runtimes/nuclio.py +43 -0
mlrun/api/utils/scheduler.py +98 -15
mlrun/api/utils/singletons/db.py +5 -1
mlrun/api/utils/singletons/project_member.py +4 -1
mlrun/api/utils/singletons/scheduler.py +1 -1
mlrun/artifacts/base.py +6 -6
mlrun/artifacts/dataset.py +4 -4
mlrun/artifacts/manager.py +2 -3
mlrun/artifacts/model.py +2 -2
mlrun/artifacts/plots.py +8 -8
mlrun/common/db/__init__.py +14 -0
mlrun/common/helpers.py +37 -0
mlrun/{mlutils → common/model_monitoring}/__init__.py +3 -2
mlrun/common/model_monitoring/helpers.py +69 -0
mlrun/common/schemas/__init__.py +13 -1
mlrun/common/schemas/auth.py +4 -1
mlrun/common/schemas/client_spec.py +1 -1
mlrun/common/schemas/function.py +17 -0
mlrun/common/schemas/model_monitoring/__init__.py +48 -0
mlrun/common/{model_monitoring.py → schemas/model_monitoring/constants.py} +11 -23
mlrun/common/schemas/model_monitoring/grafana.py +55 -0
mlrun/common/schemas/{model_endpoints.py → model_monitoring/model_endpoints.py} +32 -65
mlrun/common/schemas/notification.py +1 -0
mlrun/common/schemas/object.py +4 -0
mlrun/common/schemas/project.py +1 -0
mlrun/common/schemas/regex.py +1 -1
mlrun/common/schemas/runs.py +1 -8
mlrun/common/schemas/schedule.py +1 -8
mlrun/common/schemas/workflow.py +54 -0
mlrun/config.py +45 -42
mlrun/datastore/__init__.py +21 -0
mlrun/datastore/base.py +1 -1
mlrun/datastore/datastore.py +9 -0
mlrun/datastore/dbfs_store.py +168 -0
mlrun/datastore/helpers.py +18 -0
mlrun/datastore/sources.py +1 -0
mlrun/datastore/store_resources.py +2 -5
mlrun/datastore/v3io.py +1 -2
mlrun/db/__init__.py +4 -68
mlrun/db/base.py +12 -0
mlrun/db/factory.py +65 -0
mlrun/db/httpdb.py +175 -20
mlrun/db/nopdb.py +4 -2
mlrun/execution.py +4 -2
mlrun/feature_store/__init__.py +1 -0
mlrun/feature_store/api.py +1 -2
mlrun/feature_store/common.py +2 -1
mlrun/feature_store/feature_set.py +1 -11
mlrun/feature_store/feature_vector.py +340 -2
mlrun/feature_store/ingestion.py +5 -10
mlrun/feature_store/retrieval/base.py +118 -104
mlrun/feature_store/retrieval/dask_merger.py +17 -10
mlrun/feature_store/retrieval/job.py +4 -1
mlrun/feature_store/retrieval/local_merger.py +18 -18
mlrun/feature_store/retrieval/spark_merger.py +21 -14
mlrun/feature_store/retrieval/storey_merger.py +22 -16
mlrun/kfpops.py +3 -9
mlrun/launcher/base.py +57 -53
mlrun/launcher/client.py +5 -4
mlrun/launcher/factory.py +24 -13
mlrun/launcher/local.py +6 -6
mlrun/launcher/remote.py +4 -4
mlrun/lists.py +0 -11
mlrun/model.py +11 -17
mlrun/model_monitoring/__init__.py +2 -22
mlrun/model_monitoring/features_drift_table.py +1 -1
mlrun/model_monitoring/helpers.py +22 -210
mlrun/model_monitoring/model_endpoint.py +1 -1
mlrun/model_monitoring/model_monitoring_batch.py +127 -50
mlrun/model_monitoring/prometheus.py +219 -0
mlrun/model_monitoring/stores/__init__.py +16 -11
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +95 -23
mlrun/model_monitoring/stores/models/mysql.py +47 -29
mlrun/model_monitoring/stores/models/sqlite.py +47 -29
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +31 -19
mlrun/model_monitoring/{stream_processing_fs.py → stream_processing.py} +206 -64
mlrun/model_monitoring/tracking_policy.py +104 -0
mlrun/package/packager.py +6 -8
mlrun/package/packagers/default_packager.py +121 -10
mlrun/package/packagers/numpy_packagers.py +1 -1
mlrun/platforms/__init__.py +0 -2
mlrun/platforms/iguazio.py +0 -56
mlrun/projects/pipelines.py +53 -159
mlrun/projects/project.py +10 -37
mlrun/render.py +1 -1
mlrun/run.py +8 -124
mlrun/runtimes/__init__.py +6 -42
mlrun/runtimes/base.py +29 -1249
mlrun/runtimes/daskjob.py +2 -198
mlrun/runtimes/funcdoc.py +0 -9
mlrun/runtimes/function.py +25 -29
mlrun/runtimes/kubejob.py +5 -29
mlrun/runtimes/local.py +1 -1
mlrun/runtimes/mpijob/__init__.py +2 -2
mlrun/runtimes/mpijob/abstract.py +10 -1
mlrun/runtimes/mpijob/v1.py +0 -76
mlrun/runtimes/mpijob/v1alpha1.py +1 -74
mlrun/runtimes/nuclio.py +3 -2
mlrun/runtimes/pod.py +28 -18
mlrun/runtimes/remotesparkjob.py +1 -15
mlrun/runtimes/serving.py +14 -6
mlrun/runtimes/sparkjob/__init__.py +0 -1
mlrun/runtimes/sparkjob/abstract.py +4 -131
mlrun/runtimes/utils.py +0 -26
mlrun/serving/routers.py +7 -7
mlrun/serving/server.py +11 -8
mlrun/serving/states.py +7 -1
mlrun/serving/v2_serving.py +6 -6
mlrun/utils/helpers.py +23 -42
mlrun/utils/notifications/notification/__init__.py +4 -0
mlrun/utils/notifications/notification/webhook.py +61 -0
mlrun/utils/notifications/notification_pusher.py +5 -25
mlrun/utils/regex.py +7 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/METADATA +26 -25
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/RECORD +180 -158
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/WHEEL +1 -1
mlrun/mlutils/data.py +0 -160
mlrun/mlutils/models.py +0 -78
mlrun/mlutils/plots.py +0 -902
mlrun/utils/model_monitoring.py +0 -249
/mlrun/{api/db/sqldb/session.py → common/db/sql_session.py} +0 -0
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/LICENSE +0 -0
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/entry_points.txt +0 -0
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/top_level.txt +0 -0

mlrun/config.py CHANGED Viewed

@@ -27,8 +27,8 @@ import copy
 import json
 import os
 import typing
-import urllib.parse
 from collections.abc import Mapping
+from datetime import timedelta
 from distutils.util import strtobool
 from os.path import expanduser
 from threading import Lock
@@ -149,7 +149,7 @@ default_config = {
         "timeout_mode": "enabled",
         # timeout in seconds to wait for background task to be updated / finished by the worker responsible for the task
         "default_timeouts": {
-            "operations": {"migrations": "3600"},
+            "operations": {"migrations": "3600", "load_project": "60"},
             "runtimes": {"dask": "600"},
         },
     },
@@ -286,6 +286,7 @@ default_config = {
             # - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
             # - mlrun.runtimes.function.enrich_function_with_ingress
             "add_templated_ingress_host_mode": "never",
+            "explicit_ack": "enabled",
         },
         "logs": {
             "decode": {
@@ -416,7 +417,8 @@ default_config = {
         "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.mlrun.svc.cluster.local:8080",
         "batch_processing_function_branch": "master",
         "parquet_batching_max_events": 10000,
-        # See mlrun.common.schemas.ModelEndpointStoreType for available options
+        "parquet_batching_timeout_secs": timedelta(minutes=30).total_seconds(),
+        # See mlrun.model_monitoring.stores.ModelEndpointStoreType for available options
         "store_type": "v3io-nosql",
         "endpoint_store_connection": "",
     },
@@ -456,7 +458,7 @@ default_config = {
         },
         "default_targets": "parquet,nosql",
         "default_job_image": "mlrun/mlrun",
-        "flush_interval": 300,
+        "flush_interval": None,
     },
     "ui": {
         "projects_prefix": "projects",  # The UI link prefix for projects
@@ -515,7 +517,11 @@ default_config = {
     "debug": {
         "expose_internal_api_endpoints": False,
     },
-    "default_workflow_runner_name": "workflow-runner-{}",
+    "workflows": {
+        "default_workflow_runner_name": "workflow-runner-{}",
+        # Default timeout seconds for retrieving workflow id after execution:
+        "timeouts": {"local": 120, "kfp": 30},
+    },
     "log_collector": {
         "address": "localhost:8282",
         # log collection mode can be one of: "sidecar", "legacy", "best-effort"
@@ -775,7 +781,6 @@ class Config:
             return semver.VersionInfo.parse(f"{semver_compatible_igz_version}.0")
     def verify_security_context_enrichment_mode_is_allowed(self):
         # TODO: move SecurityContextEnrichmentModes to a different package so that we could use it here without
         #  importing mlrun.api
         if config.function.spec.security_context.enrichment_mode == "disabled":
@@ -932,36 +937,6 @@ class Config:
             # when dbpath is set we want to connect to it which will sync configuration from it to the client
             mlrun.db.get_run_db(value, force_reconnect=True)
-    @property
-    def iguazio_api_url(self):
-        """
-        we want to be able to run with old versions of the service who runs the API (which doesn't configure this
-        value) so we're doing best effort to try and resolve it from other configurations
-        TODO: Remove this hack when 0.6.x is old enough
-        """
-        if not self._iguazio_api_url:
-            if self.httpdb.builder.docker_registry and self.igz_version:
-                return self._extract_iguazio_api_from_docker_registry_url()
-        return self._iguazio_api_url
-    def _extract_iguazio_api_from_docker_registry_url(self):
-        docker_registry_url = self.httpdb.builder.docker_registry
-        # add schema otherwise parsing go wrong
-        if "://" not in docker_registry_url:
-            docker_registry_url = f"http://{docker_registry_url}"
-        parsed_registry_url = urllib.parse.urlparse(docker_registry_url)
-        registry_hostname = parsed_registry_url.hostname
-        # replace the first domain section (app service name) with dashboard
-        first_dot_index = registry_hostname.find(".")
-        if first_dot_index < 0:
-            # if not found it's not the format we know - can't resolve the api url from the registry url
-            return ""
-        return f"https://dashboard{registry_hostname[first_dot_index:]}"
-    @iguazio_api_url.setter
-    def iguazio_api_url(self, value):
-        self._iguazio_api_url = value
     def is_api_running_on_k8s(self):
         # determine if the API service is attached to K8s cluster
         # when there is a cluster the .namespace is set
@@ -1044,6 +1019,40 @@ class Config:
             ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
         )
+    def get_s3_storage_options(self) -> typing.Dict[str, typing.Any]:
+        """
+        Generate storage options dictionary as required for handling S3 path in fsspec. The model monitoring stream
+        graph uses this method for generating the storage options for S3 parquet target path.
+        :return: A storage options dictionary in which each key-value pair  represents a particular configuration,
+        such as endpoint_url or aws access key.
+        """
+        key = mlrun.get_secret_or_env("AWS_ACCESS_KEY_ID")
+        secret = mlrun.get_secret_or_env("AWS_SECRET_ACCESS_KEY")
+        force_non_anonymous = mlrun.get_secret_or_env("S3_NON_ANONYMOUS")
+        profile = mlrun.get_secret_or_env("AWS_PROFILE")
+        storage_options = dict(
+            anon=not (force_non_anonymous or (key and secret)),
+            key=key,
+            secret=secret,
+        )
+        endpoint_url = mlrun.get_secret_or_env("S3_ENDPOINT_URL")
+        if endpoint_url:
+            client_kwargs = {"endpoint_url": endpoint_url}
+            storage_options["client_kwargs"] = client_kwargs
+        if profile:
+            storage_options["profile"] = profile
+        return storage_options
+    def is_explicit_ack(self) -> bool:
+        return self.httpdb.nuclio.explicit_ack == "enabled" and (
+            not self.nuclio_version or self.nuclio_version >= "1.11.20"
+        )
 # Global configuration
 config = Config.from_dict(default_config)
@@ -1091,12 +1100,6 @@ def _do_populate(env=None, skip_errors=False):
     if data:
         config.update(data, skip_errors=skip_errors)
-    # HACK to enable config property to both have dynamic default and to use the value from dict/env like other
-    # configurations - we just need a key in the dict that is different than the property name, so simply adding prefix
-    # underscore
-    config._cfg["_iguazio_api_url"] = config._cfg["iguazio_api_url"]
-    del config._cfg["iguazio_api_url"]
     _validate_config(config)

mlrun/datastore/__init__.py CHANGED Viewed

@@ -29,8 +29,12 @@ __all__ = [
     "StreamSource",
     "KafkaSource",
     "RedisStore",
+    "DatabricksFileSystemDisableCache",
+    "DatabricksFileBugFixed",
 ]
+import fsspec
 import mlrun.datastore.wasbfs
 from ..platforms.iguazio import (
@@ -42,6 +46,7 @@ from ..platforms.iguazio import (
 from ..utils import logger
 from .base import DataItem
 from .datastore import StoreManager, in_memory_store, uri_to_ipython
+from .dbfs_store import DatabricksFileBugFixed, DatabricksFileSystemDisableCache
 from .s3 import parse_s3_bucket_and_key
 from .sources import (
     BigQuerySource,
@@ -62,6 +67,22 @@ from .utils import parse_kafka_url
 store_manager = StoreManager()
+if hasattr(fsspec, "register_implementation"):
+    fsspec.register_implementation(
+        "dbfs", DatabricksFileSystemDisableCache, clobber=True
+    )
+else:
+    from fsspec.registry import known_implementations
+    known_implementations["dbfs"] = {
+        "class": "mlrun.datastore.dbfs_store.DatabricksFileSystemDisableCache",
+        "err": "Please make sure your fsspec version supports dbfs",
+    }
+    del known_implementations
+del fsspec  # clear the module namespace
 def set_in_memory_item(key, value):
     item = store_manager.object(f"memory://{key}")

mlrun/datastore/base.py CHANGED Viewed

@@ -261,7 +261,7 @@ class DataStore:
                             updated_args = [f"{base_path}/{filename}"]
                             updated_args.extend(args[1:])
                             dfs.append(df_module.read_csv(*updated_args, **kwargs))
-                        return pd.concat(dfs)
+                        return df_module.concat(dfs)
         elif (
             file_url.endswith(".parquet")

mlrun/datastore/datastore.py CHANGED Viewed

@@ -86,6 +86,10 @@ def schema_to_store(schema):
                 "Google cloud storage packages are missing, use pip install mlrun[google-cloud-storage]"
             )
         return GoogleCloudStorageStore
+    elif schema == "dbfs":
+        from .dbfs_store import DBFSStore
+        return DBFSStore
     else:
         raise ValueError(f"unsupported store scheme ({schema})")
@@ -175,6 +179,11 @@ class StoreManager:
             )
         store, subpath = self.get_or_create_store(url, secrets=secrets)
+        schema, endpoint, parsed_url = parse_url(url)
+        #  TODO: Modify the URL replacement to be outside of the dataitem. Dataitem class should
+        #   be implemented as a generic class.
+        if endpoint and schema == "dbfs":
+            url = url.replace(endpoint, "", 1)
         return DataItem(key, store, subpath, url, meta=meta, artifact_url=artifact_url)
     def get_or_create_store(self, url, secrets: dict = None) -> (DataStore, str):

mlrun/datastore/dbfs_store.py ADDED Viewed

@@ -0,0 +1,168 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pathlib
+import fsspec
+from fsspec.implementations.dbfs import DatabricksFile, DatabricksFileSystem
+import mlrun.errors
+from .base import DataStore, FileStats
+class DatabricksFileBugFixed(DatabricksFile):
+    """Overrides DatabricksFile to add the following fix: https://github.com/fsspec/filesystem_spec/pull/1278"""
+    def _upload_chunk(self, final=False):
+        """Internal function to add a chunk of data to a started upload"""
+        self.buffer.seek(0)
+        data = self.buffer.getvalue()
+        data_chunks = [
+            data[start:end] for start, end in self._to_sized_blocks(end=len(data))
+        ]
+        for data_chunk in data_chunks:
+            self.fs._add_data(handle=self.handle, data=data_chunk)
+        if final:
+            self.fs._close_handle(handle=self.handle)
+            return True
+    def _fetch_range(self, start, end):
+        """Internal function to download a block of data"""
+        return_buffer = b""
+        for chunk_start, chunk_end in self._to_sized_blocks(start, end):
+            return_buffer += self.fs._get_data(
+                path=self.path, start=chunk_start, end=chunk_end
+            )
+        return return_buffer
+    def _to_sized_blocks(self, start=0, end=100):
+        """Helper function to split a range from 0 to total_length into blocksizes"""
+        for data_chunk in range(start, end, self.blocksize):
+            data_start = data_chunk
+            data_end = min(end, data_chunk + self.blocksize)
+            yield data_start, data_end
+class DatabricksFileSystemDisableCache(DatabricksFileSystem):
+    root_marker = "/"
+    protocol = "dbfs"
+    def _open(self, path, mode="rb", block_size="default", **kwargs):
+        """
+        Overwrite the base class method to make sure to create a DBFile.
+        All arguments are copied from the base method.
+        Only the default blocksize is allowed.
+        """
+        return DatabricksFileBugFixed(
+            self, path, mode=mode, block_size=block_size, **kwargs
+        )
+    #  _ls_from_cache is not working properly, so we disable it.
+    def _ls_from_cache(self, path):
+        pass
+# dbfs objects will be represented with the following URL: dbfs://<path>
+class DBFSStore(DataStore):
+    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+        super().__init__(parent, name, schema, endpoint, secrets=secrets)
+        self.get_filesystem(silent=False)
+    def get_filesystem(self, silent=True):
+        """return fsspec file system object, if supported"""
+        if not self._filesystem:
+            self._filesystem = fsspec.filesystem("dbfs", **self.get_storage_options())
+        return self._filesystem
+    def get_storage_options(self):
+        return dict(
+            token=self._get_secret_or_env("DATABRICKS_TOKEN"), instance=self.endpoint
+        )
+    def _verify_filesystem_and_key(self, key: str):
+        if not self._filesystem:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Performing actions on data-item without a valid filesystem"
+            )
+        if not key.startswith("/"):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Invalid key parameter - key must start with '/'"
+            )
+    def get(self, key: str, size=None, offset=0) -> bytes:
+        self._verify_filesystem_and_key(key)
+        if size is not None and size <= 0:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "size cannot be negative or zero"
+            )
+        start = offset or None
+        end = offset + size if size is not None else None
+        return self._filesystem.cat_file(key, start=start, end=end)
+    def put(self, key, data, append=False):
+        self._verify_filesystem_and_key(key)
+        if append:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Append mode not supported for Databricks file system"
+            )
+        #  can not use append mode because it overrides data.
+        mode = "w"
+        if isinstance(data, bytes):
+            mode += "b"
+        elif not isinstance(data, str):
+            raise TypeError(f"Unknown data type {type(data)}")
+        with self._filesystem.open(key, mode) as f:
+            f.write(data)
+    def upload(self, key: str, src_path: str):
+        self._verify_filesystem_and_key(key)
+        self._filesystem.put_file(src_path, key, overwrite=True)
+    def stat(self, key: str):
+        self._verify_filesystem_and_key(key)
+        file = self._filesystem.stat(key)
+        if file["type"] == "file":
+            size = file["size"]
+        elif file["type"] == "directory":
+            raise FileNotFoundError("Operation expects a file not a directory!")
+        return FileStats(size, None)
+    def listdir(self, key: str):
+        """
+        Basic ls of file/dir - without recursion.
+        """
+        self._verify_filesystem_and_key(key)
+        if self._filesystem.isfile(key):
+            return key
+        remote_path = f"{key}/*"
+        files = self._filesystem.glob(remote_path)
+        # Get only the files and directories under key path, without the key path itself.
+        # for example in a filesystem that has this path: /test_mlrun_dbfs_objects/test.txt
+        # listdir with the input /test_mlrun_dbfs_objects as a key will return ['test.txt'].
+        files = [pathlib.Path(file).name for file in files if "/" in file]
+        return files
+    def rm(self, path, recursive=False, maxdepth=None):
+        if maxdepth:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "dbfs file system does not support maxdepth option in rm function"
+            )
+        self.get_filesystem().rm(path=path, recursive=recursive)

mlrun/datastore/helpers.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+ONE_GB = 1024 * 1024 * 1024
+ONE_MB = 1024 * 1024

mlrun/datastore/sources.py CHANGED Viewed

@@ -793,6 +793,7 @@ class OnlineSource(BaseSourceDriver):
             context=context,
             key_field=self.key_field,
             full_event=True,
+            explicit_ack=mlrun.mlconf.is_explicit_ack(),
             **source_args,
         )

mlrun/datastore/store_resources.py CHANGED Viewed

@@ -16,12 +16,9 @@
 import mlrun
 from mlrun.config import config
-from mlrun.utils.helpers import (
-    is_legacy_artifact,
-    parse_artifact_uri,
-    parse_versioned_object_uri,
-)
+from mlrun.utils.helpers import is_legacy_artifact, parse_artifact_uri
+from ..common.helpers import parse_versioned_object_uri
 from ..platforms.iguazio import parse_path
 from ..utils import DB_SCHEMA, StorePrefix
 from .targets import get_online_target

mlrun/datastore/v3io.py CHANGED Viewed

@@ -22,6 +22,7 @@ import fsspec
 import v3io.dataplane
 import mlrun
+from mlrun.datastore.helpers import ONE_GB, ONE_MB
 from ..platforms.iguazio import parse_path, split_path
 from .base import (
@@ -36,8 +37,6 @@ from .base import (
 )
 V3IO_LOCAL_ROOT = "v3io"
-ONE_GB = 1024 * 1024 * 1024
-ONE_MB = 1024 * 1024
 class V3ioStore(DataStore):

mlrun/db/__init__.py CHANGED Viewed

@@ -12,14 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from os import environ
-from urllib.parse import urlparse
 from ..config import config
-from ..platforms import add_or_refresh_credentials
-from ..utils import logger
 from .base import RunDBError, RunDBInterface  # noqa
-from .nopdb import NopDB
-from .sqldb import SQLDB
 def get_or_set_dburl(default=""):
@@ -29,69 +24,10 @@ def get_or_set_dburl(default=""):
     return config.dbpath
-def get_httpdb_kwargs(host, username, password):
-    username = username or config.httpdb.user
-    password = password or config.httpdb.password
-    username, password, token = add_or_refresh_credentials(
-        host, username, password, config.httpdb.token
-    )
-    return {
-        "user": username,
-        "password": password,
-        "token": token,
-    }
-_run_db = None
-_last_db_url = None
 def get_run_db(url="", secrets=None, force_reconnect=False):
     """Returns the runtime database"""
-    global _run_db, _last_db_url
-    if not url:
-        url = get_or_set_dburl("./")
-    if (
-        _last_db_url is not None
-        and url == _last_db_url
-        and _run_db
-        and not force_reconnect
-    ):
-        return _run_db
-    _last_db_url = url
-    parsed_url = urlparse(url)
-    scheme = parsed_url.scheme.lower()
-    kwargs = {}
-    if "://" not in str(url) or scheme in ["file", "s3", "v3io", "v3ios"]:
-        logger.warning(
-            "Could not detect path to API server, not connected to API server!"
-        )
-        logger.warning(
-            "MLRUN_DBPATH is not set. Set this environment variable to the URL of the API server"
-            " in order to connect"
-        )
-        cls = NopDB
-    elif scheme in ("http", "https"):
-        # import here to avoid circular imports
-        from .httpdb import HTTPRunDB
-        cls = HTTPRunDB
-        kwargs = get_httpdb_kwargs(
-            parsed_url.hostname, parsed_url.username, parsed_url.password
-        )
-        endpoint = parsed_url.hostname
-        if parsed_url.port:
-            endpoint += f":{parsed_url.port}"
-        url = f"{parsed_url.scheme}://{endpoint}{parsed_url.path}"
-    else:
-        cls = SQLDB
+    # import here to avoid circular import
+    import mlrun.db.factory
-    _run_db = cls(url, **kwargs)
-    _run_db.connect(secrets=secrets)
-    return _run_db
+    run_db_factory = mlrun.db.factory.RunDBFactory()
+    return run_db_factory.create_run_db(url, secrets, force_reconnect)

mlrun/db/base.py CHANGED Viewed

@@ -621,3 +621,15 @@ class RunDBInterface(ABC):
         notifications: typing.List[mlrun.model.Notification],
     ):
         pass
+    def store_run_notifications(
+        self,
+        notification_objects: typing.List[mlrun.model.Notification],
+        run_uid: str,
+        project: str = None,
+        mask_params: bool = True,
+    ):
+        pass
+    def watch_log(self, uid, project="", watch=True, offset=0):
+        pass

mlrun/db/factory.py ADDED Viewed

@@ -0,0 +1,65 @@
+# Copyright 2023 MLRun Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dependency_injector import containers, providers
+import mlrun.db
+import mlrun.db.httpdb
+import mlrun.db.nopdb
+import mlrun.utils.singleton
+from mlrun.utils import logger
+class RunDBFactory(
+    metaclass=mlrun.utils.singleton.AbstractSingleton,
+):
+    def __init__(self):
+        self._run_db = None
+        self._last_db_url = None
+        self._rundb_container = RunDBContainer()
+    def create_run_db(self, url="", secrets=None, force_reconnect=False):
+        """Returns the runtime database"""
+        if not url:
+            url = mlrun.db.get_or_set_dburl("./")
+        if (
+            self._last_db_url is not None
+            and url == self._last_db_url
+            and self._run_db
+            and not force_reconnect
+        ):
+            return self._run_db
+        self._last_db_url = url
+        if "://" not in str(url):
+            logger.warning(
+                "Could not detect path to API server, not connected to API server!"
+            )
+            logger.warning(
+                "MLRUN_DBPATH is misconfigured. Set this environment variable to the URL of the API server"
+                " in order to connect"
+            )
+            self._run_db = self._rundb_container.nop(url)
+        else:
+            self._run_db = self._rundb_container.run_db(url)
+        self._run_db.connect(secrets=secrets)
+        return self._run_db
+class RunDBContainer(containers.DeclarativeContainer):
+    nop = providers.Factory(mlrun.db.nopdb.NopDB)
+    run_db = providers.Factory(mlrun.db.httpdb.HTTPRunDB)

mlrun 1.4.0rc25__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl

Potentially problematic release.

mlrun 1.4.0rc25py3-none-any.whl → 1.5.0rc2py3-none-any.whl