PyPI - mlrun - Versions diffs - 1.7.0rc3__py3-none-any.whl → 1.7.0rc5__py3-none-any.whl - Mend

mlrun 1.7.0rc3py3-none-any.whl → 1.7.0rc5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (76) hide show

mlrun/artifacts/manager.py +6 -1
mlrun/common/constants.py +2 -0
mlrun/common/model_monitoring/helpers.py +12 -6
mlrun/common/schemas/__init__.py +11 -0
mlrun/common/schemas/api_gateway.py +85 -0
mlrun/common/schemas/auth.py +2 -2
mlrun/common/schemas/client_spec.py +1 -0
mlrun/common/schemas/common.py +40 -0
mlrun/common/schemas/model_monitoring/constants.py +4 -1
mlrun/common/schemas/project.py +2 -0
mlrun/config.py +31 -17
mlrun/datastore/azure_blob.py +22 -9
mlrun/datastore/base.py +15 -25
mlrun/datastore/datastore.py +19 -8
mlrun/datastore/datastore_profile.py +47 -5
mlrun/datastore/google_cloud_storage.py +10 -6
mlrun/datastore/hdfs.py +51 -0
mlrun/datastore/redis.py +4 -0
mlrun/datastore/s3.py +4 -0
mlrun/datastore/sources.py +29 -43
mlrun/datastore/targets.py +59 -53
mlrun/datastore/utils.py +2 -49
mlrun/datastore/v3io.py +4 -0
mlrun/db/base.py +50 -0
mlrun/db/httpdb.py +121 -50
mlrun/db/nopdb.py +13 -0
mlrun/execution.py +3 -3
mlrun/feature_store/feature_vector.py +2 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
mlrun/frameworks/tf_keras/model_handler.py +7 -7
mlrun/k8s_utils.py +10 -5
mlrun/kfpops.py +19 -10
mlrun/model.py +5 -0
mlrun/model_monitoring/api.py +3 -3
mlrun/model_monitoring/application.py +1 -1
mlrun/model_monitoring/applications/__init__.py +13 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +218 -0
mlrun/model_monitoring/batch.py +9 -111
mlrun/model_monitoring/controller.py +73 -55
mlrun/model_monitoring/controller_handler.py +13 -5
mlrun/model_monitoring/features_drift_table.py +62 -53
mlrun/model_monitoring/helpers.py +30 -21
mlrun/model_monitoring/metrics/__init__.py +13 -0
mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +14 -14
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
mlrun/package/packagers/pandas_packagers.py +3 -3
mlrun/package/utils/_archiver.py +3 -1
mlrun/platforms/iguazio.py +8 -65
mlrun/projects/pipelines.py +21 -11
mlrun/projects/project.py +180 -42
mlrun/run.py +1 -1
mlrun/runtimes/base.py +25 -2
mlrun/runtimes/kubejob.py +5 -3
mlrun/runtimes/local.py +2 -2
mlrun/runtimes/mpijob/abstract.py +6 -6
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/api_gateway.py +300 -0
mlrun/runtimes/nuclio/function.py +9 -9
mlrun/runtimes/nuclio/serving.py +3 -3
mlrun/runtimes/pod.py +3 -3
mlrun/runtimes/sparkjob/spark3job.py +3 -3
mlrun/serving/remote.py +4 -2
mlrun/serving/server.py +2 -8
mlrun/utils/async_http.py +3 -3
mlrun/utils/helpers.py +27 -5
mlrun/utils/http.py +3 -3
mlrun/utils/logger.py +2 -2
mlrun/utils/notifications/notification_pusher.py +6 -6
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/METADATA +13 -16
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/RECORD +76 -68
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/WHEEL +1 -1
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc5.dist-info}/top_level.txt +0 -0

mlrun/artifacts/manager.py CHANGED Viewed

@@ -17,7 +17,11 @@ from os.path import exists, isdir
 from urllib.parse import urlparse
 import mlrun.config
-from mlrun.utils.helpers import get_local_file_schema, template_artifact_path
+from mlrun.utils.helpers import (
+    get_local_file_schema,
+    template_artifact_path,
+    validate_inline_artifact_body_size,
+)
 from ..utils import (
     is_legacy_artifact,
@@ -212,6 +216,7 @@ class ArtifactManager:
             target_path = target_path or item.target_path
         validate_artifact_key_name(key, "artifact.key")
+        validate_inline_artifact_body_size(item.spec.inline)
         src_path = local_path or item.src_path  # TODO: remove src_path
         self.ensure_artifact_source_file_exists(item=item, path=src_path, body=body)
         if format == "html" or (src_path and pathlib.Path(src_path).suffix == "html"):

mlrun/common/constants.py CHANGED Viewed

@@ -13,3 +13,5 @@
 # limitations under the License.
 #
 IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "."  # prefix for image name to enrich with registry
+MLRUN_CREATED_LABEL = "mlrun-created"
+MYSQL_MEDIUMBLOB_SIZE_BYTES = 16 * 1024 * 1024

mlrun/common/model_monitoring/helpers.py CHANGED Viewed

@@ -16,6 +16,7 @@ import sys
 import typing
 import mlrun.common
+import mlrun.common.schemas.model_monitoring.constants as mm_constants
 from mlrun.common.schemas.model_monitoring import (
     EndpointUID,
     FunctionURI,
@@ -64,7 +65,7 @@ def parse_model_endpoint_store_prefix(store_prefix: str):
 def parse_monitoring_stream_path(
-    stream_uri: str, project: str, application_name: str = None
+    stream_uri: str, project: str, function_name: str = None
 ):
     if stream_uri.startswith("kafka://"):
         if "?topic" in stream_uri:
@@ -72,23 +73,28 @@ def parse_monitoring_stream_path(
                 "Custom kafka topic is not allowed"
             )
         # Add topic to stream kafka uri
-        if application_name is None:
+        if (
+            function_name is None
+            or function_name == mm_constants.MonitoringFunctionNames.STREAM
+        ):
             stream_uri += f"?topic=monitoring_stream_{project}"
         else:
-            stream_uri += f"?topic=monitoring_stream_{project}_{application_name}"
+            stream_uri += f"?topic=monitoring_stream_{project}_{function_name}"
     elif stream_uri.startswith("v3io://") and mlrun.mlconf.is_ce_mode():
         # V3IO is not supported in CE mode, generating a default http stream path
-        if application_name is None:
+        if function_name is None:
             stream_uri = (
                 mlrun.mlconf.model_endpoint_monitoring.default_http_sink.format(
-                    project=project
+                    project=project, namespace=mlrun.mlconf.namespace
                 )
             )
         else:
             stream_uri = (
                 mlrun.mlconf.model_endpoint_monitoring.default_http_sink_app.format(
-                    project=project, application_name=application_name
+                    project=project,
+                    application_name=function_name,
+                    namespace=mlrun.mlconf.namespace,
                 )
             )
     return stream_uri

mlrun/common/schemas/__init__.py CHANGED Viewed

@@ -14,6 +14,16 @@
 #
 # flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
+from .api_gateway import (
+    APIGateway,
+    APIGatewayAuthenticationMode,
+    APIGatewayBasicAuth,
+    APIGatewayMetadata,
+    APIGatewaysOutput,
+    APIGatewaySpec,
+    APIGatewayStatus,
+    APIGatewayUpstream,
+)
 from .artifact import (
     Artifact,
     ArtifactCategories,
@@ -43,6 +53,7 @@ from .clusterization_spec import (
     ClusterizationSpec,
     WaitForChiefToReachOnlineStateFeatureFlag,
 )
+from .common import ImageBuilder
 from .constants import (
     APIStates,
     ClusterizationRole,

mlrun/common/schemas/api_gateway.py ADDED Viewed

@@ -0,0 +1,85 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import typing
+from typing import Optional
+import pydantic
+import mlrun.common.types
+class APIGatewayAuthenticationMode(mlrun.common.types.StrEnum):
+    basic = "basicAuth"
+    none = "none"
+    @classmethod
+    def from_str(cls, authentication_mode: str):
+        if authentication_mode == "none":
+            return cls.none
+        elif authentication_mode == "basicAuth":
+            return cls.basic
+        else:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"Authentication mode `{authentication_mode}` is not supported",
+            )
+class _APIGatewayBaseModel(pydantic.BaseModel):
+    class Config:
+        extra = pydantic.Extra.allow
+class APIGatewayMetadata(_APIGatewayBaseModel):
+    name: str
+    namespace: Optional[str]
+    labels: Optional[dict] = {}
+class APIGatewayBasicAuth(_APIGatewayBaseModel):
+    username: str
+    password: str
+class APIGatewayUpstream(_APIGatewayBaseModel):
+    kind: Optional[str] = "nucliofunction"
+    nucliofunction: dict[str, str]
+    percentage: Optional[int] = 0
+class APIGatewaySpec(_APIGatewayBaseModel):
+    name: str
+    description: Optional[str]
+    path: Optional[str] = "/"
+    authenticationMode: Optional[APIGatewayAuthenticationMode] = (
+        APIGatewayAuthenticationMode.none
+    )
+    upstreams: list[APIGatewayUpstream]
+    authentication: Optional[dict[str, Optional[APIGatewayBasicAuth]]]
+    host: Optional[str]
+class APIGatewayStatus(_APIGatewayBaseModel):
+    name: Optional[str]
+    state: Optional[str]
+class APIGateway(_APIGatewayBaseModel):
+    metadata: APIGatewayMetadata
+    spec: APIGatewaySpec
+    status: Optional[APIGatewayStatus]
+class APIGatewaysOutput(_APIGatewayBaseModel):
+    api_gateways: typing.Optional[dict[str, APIGateway]] = {}

mlrun/common/schemas/auth.py CHANGED Viewed

@@ -59,7 +59,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
     hub_source = "hub-source"
     workflow = "workflow"
     datastore_profile = "datastore-profile"
-    api_gateways = "api-gateways"
+    api_gateway = "api-gateway"
     def to_resource_string(
         self,
@@ -94,7 +94,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
             AuthorizationResourceTypes.hub_source: "/marketplace/sources",
             # workflow define how to run a pipeline and can be considered as the specification of a pipeline.
             AuthorizationResourceTypes.workflow: "/projects/{project_name}/workflows/{resource_name}",
-            AuthorizationResourceTypes.api_gateways: "/projects/{project_name}/api-gateways",
+            AuthorizationResourceTypes.api_gateway: "/projects/{project_name}/api-gateways/{resource_name}",
         }[self].format(project_name=project_name, resource_name=resource_name)

mlrun/common/schemas/client_spec.py CHANGED Viewed

@@ -29,6 +29,7 @@ class ClientSpec(pydantic.BaseModel):
     ui_url: typing.Optional[str]
     artifact_path: typing.Optional[str]
     feature_store_data_prefixes: typing.Optional[dict[str, str]]
+    feature_store_default_targets: typing.Optional[str]
     spark_app_image: typing.Optional[str]
     spark_app_image_tag: typing.Optional[str]
     spark_history_server_path: typing.Optional[str]

mlrun/common/schemas/common.py ADDED Viewed

@@ -0,0 +1,40 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import typing
+import pydantic
+class ImageBuilder(pydantic.BaseModel):
+    functionSourceCode: typing.Optional[str] = None
+    codeEntryType: typing.Optional[str] = None
+    codeEntryAttributes: typing.Optional[str] = None
+    source: typing.Optional[str] = None
+    code_origin: typing.Optional[str] = None
+    origin_filename: typing.Optional[str] = None
+    image: typing.Optional[str] = None
+    base_image: typing.Optional[str] = None
+    commands: typing.Optional[list] = None
+    extra: typing.Optional[str] = None
+    extra_args: typing.Optional[dict] = None
+    builder_env: typing.Optional[dict] = None
+    secret: typing.Optional[str] = None
+    registry: typing.Optional[str] = None
+    load_source_on_run: typing.Optional[bool] = None
+    with_mlrun: typing.Optional[bool] = None
+    auto_build: typing.Optional[bool] = None
+    build_pod: typing.Optional[str] = None
+    requirements: typing.Optional[list] = None
+    source_code_target_dir: typing.Optional[str] = None

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -181,7 +181,7 @@ class MonitoringFunctionNames:
     WRITER = "model-monitoring-writer"
     BATCH = "model-monitoring-batch"
     APPLICATION_CONTROLLER = "model-monitoring-controller"
-    STREAM = None
+    STREAM = "model-monitoring-stream"
     @staticmethod
     def all():
@@ -289,3 +289,6 @@ class ModelMonitoringAppLabel:
 class ControllerPolicy:
     BASE_PERIOD = "base_period"
+MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME = "histogram-data-drift"

mlrun/common/schemas/project.py CHANGED Viewed

@@ -19,6 +19,7 @@ import pydantic
 import mlrun.common.types
+from .common import ImageBuilder
 from .object import ObjectKind, ObjectStatus
@@ -85,6 +86,7 @@ class ProjectSpec(pydantic.BaseModel):
     desired_state: typing.Optional[ProjectDesiredState] = ProjectDesiredState.online
     custom_packagers: typing.Optional[list[tuple[str, bool]]] = None
     default_image: typing.Optional[str] = None
+    build: typing.Optional[ImageBuilder] = None
     class Config:
         extra = pydantic.Extra.allow

mlrun/config.py CHANGED Viewed

@@ -287,6 +287,12 @@ default_config = {
         "state": "online",
         "retry_api_call_on_exception": "enabled",
         "http_connection_timeout_keep_alive": 11,
+        # http client used by httpdb
+        "http": {
+            # when True, the client will verify the server's TLS
+            # set to False for backwards compatibility.
+            "verify": False,
+        },
         "db": {
             "commit_retry_timeout": 30,
             "commit_retry_interval": 3,
@@ -484,8 +490,8 @@ default_config = {
         "offline_storage_path": "model-endpoints/{kind}",
         # Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
         # when the user is working in CE environment and has not provided any stream path.
-        "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.mlrun.svc.cluster.local:8080",
-        "default_http_sink_app": "http://nuclio-{project}-{application_name}.mlrun.svc.cluster.local:8080",
+        "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
+        "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
         "batch_processing_function_branch": "master",
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
@@ -605,7 +611,7 @@ default_config = {
     "workflows": {
         "default_workflow_runner_name": "workflow-runner-{}",
         # Default timeout seconds for retrieving workflow id after execution:
-        "timeouts": {"local": 120, "kfp": 30, "remote": 30},
+        "timeouts": {"local": 120, "kfp": 30, "remote": 90},
     },
     "log_collector": {
         "address": "localhost:8282",
@@ -957,10 +963,10 @@ class Config:
             with_gpu = (
                 with_gpu_requests if requirement == "requests" else with_gpu_limits
             )
-            resources[
-                requirement
-            ] = self.get_default_function_pod_requirement_resources(
-                requirement, with_gpu
+            resources[requirement] = (
+                self.get_default_function_pod_requirement_resources(
+                    requirement, with_gpu
+                )
             )
         return resources
@@ -1053,7 +1059,7 @@ class Config:
         kind: str = "",
         target: str = "online",
         artifact_path: str = None,
-        application_name: str = None,
+        function_name: str = None,
     ) -> str:
         """Get the full path from the configuration based on the provided project and kind.
@@ -1068,7 +1074,7 @@ class Config:
                                 artifact path instead.
         :param artifact_path:   Optional artifact path that will be used as a relative path. If not provided, the
                                 relative artifact path will be taken from the global MLRun artifact path.
-        :param application_name:    Application name, None for model_monitoring_stream.
+        :param function_name:    Application name, None for model_monitoring_stream.
         :return:                Full configured path for the provided kind.
         """
@@ -1082,20 +1088,19 @@ class Config:
                 return store_prefix_dict[kind].format(project=project)
             if (
-                application_name
+                function_name
+                and function_name
                 != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.STREAM
             ):
                 return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
                     project=project,
                     kind=kind
-                    if application_name is None
-                    else f"{kind}-{application_name.lower()}",
+                    if function_name is None
+                    else f"{kind}-{function_name.lower()}",
                 )
             return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
                 project=project,
-                kind=kind
-                if application_name is None
-                else f"{kind}-{application_name.lower()}",
+                kind=kind,
             )
         # Get the current offline path from the configuration
@@ -1343,12 +1348,21 @@ def read_env(env=None, prefix=env_prefix):
         if igz_domain:
             config["ui_url"] = f"https://mlrun-ui.{igz_domain}"
-    if config.get("log_level"):
+    if log_level := config.get("log_level"):
         import mlrun.utils.logger
         # logger created (because of imports mess) before the config is loaded (in tests), therefore we're changing its
         # level manually
-        mlrun.utils.logger.set_logger_level(config["log_level"])
+        mlrun.utils.logger.set_logger_level(log_level)
+    if log_formatter_name := config.get("log_formatter"):
+        import mlrun.utils.logger
+        log_formatter = mlrun.utils.create_formatter_instance(
+            mlrun.utils.FormatterKinds(log_formatter_name)
+        )
+        mlrun.utils.logger.get_handler("default").setFormatter(log_formatter)
     # The default function pod resource values are of type str; however, when reading from environment variable numbers,
     # it converts them to type int if contains only number, so we want to convert them to str.
     _convert_resources_to_str(config)

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -175,9 +175,9 @@ class AzureBlobStore(DataStore):
         if "client_secret" in st or "client_id" in st or "tenant_id" in st:
             res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "OAuth"
-            res[
-                f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"
-            ] = "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
+            res[f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"] = (
+                "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
+            )
             if "client_id" in st:
                 res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
                     "client_id"
@@ -188,14 +188,27 @@ class AzureBlobStore(DataStore):
                 ]
             if "tenant_id" in st:
                 tenant_id = st["tenant_id"]
-                res[
-                    f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"
-                ] = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
+                res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
+                    f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
+                )
         if "sas_token" in st:
             res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "SAS"
-            res[
-                f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"
-            ] = "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
+            res[f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"] = (
+                "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
+            )
             res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
         return res
+    @property
+    def spark_url(self):
+        spark_options = self.get_spark_options()
+        url = f"wasbs://{self.endpoint}"
+        prefix = "spark.hadoop.fs.azure.account.key."
+        if spark_options:
+            for key in spark_options:
+                if key.startswith(prefix):
+                    account_key = key[len(prefix) :]
+                    url += f"@{account_key}"
+                    break
+        return url

mlrun/datastore/base.py CHANGED Viewed

@@ -147,6 +147,10 @@ class DataStore:
     def url(self):
         return f"{self.kind}://{self.endpoint}"
+    @property
+    def spark_url(self):
+        return self.url
     def get(self, key, size=None, offset=0):
         pass
@@ -320,31 +324,17 @@ class DataStore:
             raise Exception(f"File type unhandled {url}")
         if file_system:
-            if (
-                self.supports_isdir()
-                and file_system.isdir(file_url)
-                or self._is_dd(df_module)
-            ):
-                storage_options = self.get_storage_options()
-                if url.startswith("ds://"):
-                    parsed_url = urllib.parse.urlparse(url)
-                    url = parsed_url.path
-                    if self.using_bucket:
-                        url = url[1:]
-                    # Pass the underlying file system
-                    kwargs["filesystem"] = file_system
-                elif storage_options:
-                    kwargs["storage_options"] = storage_options
-                df = reader(url, **kwargs)
-            else:
-                file = url
-                # Workaround for ARROW-12472 affecting pyarrow 3.x and 4.x.
-                if file_system.protocol != "file":
-                    # If not dir, use file_system.open() to avoid regression when pandas < 1.2 and does not
-                    # support the storage_options parameter.
-                    file = file_system.open(url)
-                df = reader(file, **kwargs)
+            storage_options = self.get_storage_options()
+            if url.startswith("ds://"):
+                parsed_url = urllib.parse.urlparse(url)
+                url = parsed_url.path
+                if self.using_bucket:
+                    url = url[1:]
+                # Pass the underlying file system
+                kwargs["filesystem"] = file_system
+            elif storage_options:
+                kwargs["storage_options"] = storage_options
+            df = reader(url, **kwargs)
         else:
             temp_file = tempfile.NamedTemporaryFile(delete=False)
             self.download(self._join(subpath), temp_file.name)

mlrun/datastore/datastore.py CHANGED Viewed

@@ -94,6 +94,10 @@ def schema_to_store(schema):
         from .dbfs_store import DBFSStore
         return DBFSStore
+    elif schema == "hdfs":
+        from .hdfs import HdfsStore
+        return HdfsStore
     else:
         raise ValueError(f"unsupported store scheme ({schema})")
@@ -170,7 +174,7 @@ class StoreManager:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 f"resource {url} does not have a valid/persistent offline target"
             )
-        return resource, target
+        return resource, target or ""
     def object(
         self, url, key="", project="", allow_empty_resources=None, secrets: dict = None
@@ -182,14 +186,21 @@ class StoreManager:
                 url, project, allow_empty_resources, secrets
             )
-        store, subpath = self.get_or_create_store(
+        store, subpath, url = self.get_or_create_store(
             url, secrets=secrets, project_name=project
         )
-        return DataItem(key, store, subpath, url, meta=meta, artifact_url=artifact_url)
+        return DataItem(
+            key,
+            store,
+            subpath,
+            url,
+            meta=meta,
+            artifact_url=artifact_url,
+        )
     def get_or_create_store(
         self, url, secrets: dict = None, project_name=""
-    ) -> (DataStore, str):
+    ) -> (DataStore, str, str):
         schema, endpoint, parsed_url = parse_url(url)
         subpath = parsed_url.path
         store_key = f"{schema}://{endpoint}"
@@ -206,17 +217,17 @@ class StoreManager:
         if schema == "memory":
             subpath = url[len("memory://") :]
-            return in_memory_store, subpath
+            return in_memory_store, subpath, url
         if not schema and endpoint:
             if endpoint in self._stores.keys():
-                return self._stores[endpoint], subpath
+                return self._stores[endpoint], subpath, url
             else:
                 raise ValueError(f"no such store ({endpoint})")
         if not secrets and not mlrun.config.is_running_as_api():
             if store_key in self._stores.keys():
-                return self._stores[store_key], subpath
+                return self._stores[store_key], subpath, url
         # support u/p embedding in url (as done in redis) by setting netloc as the "endpoint" parameter
         # when running on server we don't cache the datastore, because there are multiple users and we don't want to
@@ -227,7 +238,7 @@ class StoreManager:
         if not secrets and not mlrun.config.is_running_as_api():
             self._stores[store_key] = store
         # in file stores in windows path like c:\a\b the drive letter is dropped from the path, so we return the url
-        return store, url if store.kind == "file" else subpath
+        return store, url if store.kind == "file" else subpath, url
     def reset_secrets(self):
         self._secrets = {}

mlrun 1.7.0rc3__py3-none-any.whl → 1.7.0rc5__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc3py3-none-any.whl → 1.7.0rc5py3-none-any.whl