PyPI - mlrun - Versions diffs - 1.6.0rc26__py3-none-any.whl → 1.6.3rc1__py3-none-any.whl - Mend

mlrun 1.6.0rc26py3-none-any.whl → 1.6.3rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (66) hide show

mlrun/artifacts/manager.py +6 -0
mlrun/artifacts/model.py +28 -22
mlrun/common/db/sql_session.py +3 -0
mlrun/common/model_monitoring/helpers.py +4 -2
mlrun/common/schemas/__init__.py +2 -0
mlrun/common/schemas/common.py +40 -0
mlrun/common/schemas/model_monitoring/__init__.py +1 -0
mlrun/common/schemas/model_monitoring/constants.py +21 -5
mlrun/common/schemas/project.py +2 -0
mlrun/config.py +43 -17
mlrun/data_types/data_types.py +4 -0
mlrun/datastore/azure_blob.py +9 -9
mlrun/datastore/base.py +22 -44
mlrun/datastore/datastore.py +7 -3
mlrun/datastore/datastore_profile.py +15 -3
mlrun/datastore/google_cloud_storage.py +7 -7
mlrun/datastore/sources.py +17 -4
mlrun/datastore/targets.py +3 -1
mlrun/datastore/utils.py +11 -1
mlrun/datastore/v3io.py +70 -46
mlrun/db/base.py +18 -0
mlrun/db/httpdb.py +41 -36
mlrun/execution.py +3 -3
mlrun/feature_store/api.py +133 -132
mlrun/feature_store/feature_set.py +89 -0
mlrun/feature_store/feature_vector.py +120 -0
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
mlrun/frameworks/tf_keras/model_handler.py +7 -7
mlrun/k8s_utils.py +56 -0
mlrun/kfpops.py +19 -10
mlrun/model.py +6 -0
mlrun/model_monitoring/api.py +8 -8
mlrun/model_monitoring/batch.py +1 -1
mlrun/model_monitoring/controller.py +0 -7
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
mlrun/model_monitoring/stream_processing.py +52 -38
mlrun/package/packagers/pandas_packagers.py +3 -3
mlrun/package/utils/_archiver.py +3 -1
mlrun/platforms/iguazio.py +6 -65
mlrun/projects/pipelines.py +29 -12
mlrun/projects/project.py +100 -61
mlrun/run.py +2 -0
mlrun/runtimes/base.py +24 -1
mlrun/runtimes/function.py +14 -15
mlrun/runtimes/kubejob.py +5 -3
mlrun/runtimes/local.py +2 -2
mlrun/runtimes/mpijob/abstract.py +6 -6
mlrun/runtimes/pod.py +3 -3
mlrun/runtimes/serving.py +7 -14
mlrun/runtimes/sparkjob/spark3job.py +3 -3
mlrun/serving/remote.py +4 -2
mlrun/serving/routers.py +14 -8
mlrun/utils/async_http.py +3 -3
mlrun/utils/helpers.py +59 -3
mlrun/utils/http.py +3 -3
mlrun/utils/logger.py +2 -2
mlrun/utils/notifications/notification_pusher.py +6 -6
mlrun/utils/regex.py +5 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/METADATA +21 -23
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/RECORD +66 -65
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/WHEEL +1 -1
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/LICENSE +0 -0
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.0rc26.dist-info → mlrun-1.6.3rc1.dist-info}/top_level.txt +0 -0

mlrun/artifacts/manager.py CHANGED Viewed

@@ -132,6 +132,12 @@ class ArtifactManager:
         #  ModelArtifact is a directory.
         if isinstance(item, ModelArtifact):
             return
+        # Could happen in the import artifact scenario - that path is None.
+        if item.target_path:
+            return
+        #  in DatasetArtifact
+        if hasattr(item, "df") and item.df is not None:
+            return
         parsed_url = urlparse(path)
         schema = parsed_url.scheme
         #  we are not checking remote paths yet.

mlrun/artifacts/model.py CHANGED Viewed

@@ -13,8 +13,9 @@
 # limitations under the License.
 import tempfile
 from os import path
-from typing import List
+from typing import Any
+import pandas as pd
 import yaml
 from deprecated import deprecated
@@ -68,8 +69,8 @@ class ModelArtifactSpec(ArtifactSpec):
         model_file=None,
         metrics=None,
         paraemeters=None,
-        inputs: List[Feature] = None,
-        outputs: List[Feature] = None,
+        inputs: list[Feature] = None,
+        outputs: list[Feature] = None,
         framework=None,
         algorithm=None,
         feature_vector=None,
@@ -91,8 +92,8 @@ class ModelArtifactSpec(ArtifactSpec):
         self.model_file = model_file
         self.metrics = metrics or {}
         self.parameters = paraemeters or {}
-        self.inputs: List[Feature] = inputs or []
-        self.outputs: List[Feature] = outputs or []
+        self.inputs: list[Feature] = inputs or []
+        self.outputs: list[Feature] = outputs or []
         self.framework = framework
         self.algorithm = algorithm
         self.feature_vector = feature_vector
@@ -101,21 +102,21 @@ class ModelArtifactSpec(ArtifactSpec):
         self.model_target_file = model_target_file
     @property
-    def inputs(self) -> List[Feature]:
+    def inputs(self) -> list[Feature]:
         """input feature list"""
         return self._inputs
     @inputs.setter
-    def inputs(self, inputs: List[Feature]):
+    def inputs(self, inputs: list[Feature]):
         self._inputs = ObjectList.from_list(Feature, inputs)
     @property
-    def outputs(self) -> List[Feature]:
+    def outputs(self) -> list[Feature]:
         """output feature list"""
         return self._outputs
     @outputs.setter
-    def outputs(self, outputs: List[Feature]):
+    def outputs(self, outputs: list[Feature]):
         self._outputs = ObjectList.from_list(Feature, outputs)
@@ -175,22 +176,22 @@ class ModelArtifact(Artifact):
         self._spec = self._verify_dict(spec, "spec", ModelArtifactSpec)
     @property
-    def inputs(self) -> List[Feature]:
+    def inputs(self) -> list[Feature]:
         """input feature list"""
         return self.spec.inputs
     @inputs.setter
-    def inputs(self, inputs: List[Feature]):
+    def inputs(self, inputs: list[Feature]):
         """input feature list"""
         self.spec.inputs = inputs
     @property
-    def outputs(self) -> List[Feature]:
+    def outputs(self) -> list[Feature]:
         """input feature list"""
         return self.spec.outputs
     @outputs.setter
-    def outputs(self, outputs: List[Feature]):
+    def outputs(self, outputs: list[Feature]):
         """input feature list"""
         self.spec.outputs = outputs
@@ -260,6 +261,7 @@ class ModelArtifact(Artifact):
         """
         subset = df
         inferer = get_infer_interface(subset)
+        numeric_columns = self._extract_numeric_features(df)
         if label_columns:
             if not isinstance(label_columns, list):
                 label_columns = [label_columns]
@@ -273,9 +275,13 @@ class ModelArtifact(Artifact):
             )
         if with_stats:
             self.spec.feature_stats = inferer.get_stats(
-                df, options=InferOptions.Histogram, num_bins=num_bins
+                df[numeric_columns], options=InferOptions.Histogram, num_bins=num_bins
             )
+    @staticmethod
+    def _extract_numeric_features(df: pd.DataFrame) -> list[Any]:
+        return [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
     @property
     def is_dir(self):
         return True
@@ -445,8 +451,8 @@ class LegacyModelArtifact(LegacyArtifact):
         self.model_file = model_file
         self.parameters = parameters or {}
         self.metrics = metrics or {}
-        self.inputs: List[Feature] = inputs or []
-        self.outputs: List[Feature] = outputs or []
+        self.inputs: list[Feature] = inputs or []
+        self.outputs: list[Feature] = outputs or []
         self.extra_data = extra_data or {}
         self.framework = framework
         self.algorithm = algorithm
@@ -456,21 +462,21 @@ class LegacyModelArtifact(LegacyArtifact):
         self.model_target_file = model_target_file
     @property
-    def inputs(self) -> List[Feature]:
+    def inputs(self) -> list[Feature]:
         """input feature list"""
         return self._inputs
     @inputs.setter
-    def inputs(self, inputs: List[Feature]):
+    def inputs(self, inputs: list[Feature]):
         self._inputs = ObjectList.from_list(Feature, inputs)
     @property
-    def outputs(self) -> List[Feature]:
+    def outputs(self) -> list[Feature]:
         """output feature list"""
         return self._outputs
     @outputs.setter
-    def outputs(self, outputs: List[Feature]):
+    def outputs(self, outputs: list[Feature]):
         self._outputs = ObjectList.from_list(Feature, outputs)
     def infer_from_df(self, df, label_columns=None, with_stats=True, num_bins=None):
@@ -642,8 +648,8 @@ def update_model(
     parameters: dict = None,
     metrics: dict = None,
     extra_data: dict = None,
-    inputs: List[Feature] = None,
-    outputs: List[Feature] = None,
+    inputs: list[Feature] = None,
+    outputs: list[Feature] = None,
     feature_vector: str = None,
     feature_weights: list = None,
     key_prefix: str = "",

mlrun/common/db/sql_session.py CHANGED Viewed

@@ -63,9 +63,12 @@ def _init_engine(dsn=None):
         max_overflow = config.httpdb.db.connections_pool_max_overflow
         if max_overflow is None:
             max_overflow = config.httpdb.max_workers
         kwargs = {
             "pool_size": pool_size,
             "max_overflow": max_overflow,
+            "pool_pre_ping": config.httpdb.db.connections_pool_pre_ping,
+            "pool_recycle": config.httpdb.db.connections_pool_recycle,
         }
     engine = create_engine(dsn, **kwargs)
     _engines[dsn] = engine

mlrun/common/model_monitoring/helpers.py CHANGED Viewed

@@ -82,13 +82,15 @@ def parse_monitoring_stream_path(
         if application_name is None:
             stream_uri = (
                 mlrun.mlconf.model_endpoint_monitoring.default_http_sink.format(
-                    project=project
+                    project=project, namespace=mlrun.mlconf.namespace
                 )
             )
         else:
             stream_uri = (
                 mlrun.mlconf.model_endpoint_monitoring.default_http_sink_app.format(
-                    project=project, application_name=application_name
+                    project=project,
+                    application_name=application_name,
+                    namespace=mlrun.mlconf.namespace,
                 )
             )
     return stream_uri

mlrun/common/schemas/__init__.py CHANGED Viewed

@@ -43,6 +43,7 @@ from .clusterization_spec import (
     ClusterizationSpec,
     WaitForChiefToReachOnlineStateFeatureFlag,
 )
+from .common import ImageBuilder
 from .constants import (
     APIStates,
     ClusterizationRole,
@@ -113,6 +114,7 @@ from .model_monitoring import (
     EventFieldType,
     EventKeyMetrics,
     Features,
+    FeatureSetFeatures,
     FeatureValues,
     GrafanaColumn,
     GrafanaDataPoint,

mlrun/common/schemas/common.py ADDED Viewed

@@ -0,0 +1,40 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import typing
+import pydantic
+class ImageBuilder(pydantic.BaseModel):
+    functionSourceCode: typing.Optional[str] = None
+    codeEntryType: typing.Optional[str] = None
+    codeEntryAttributes: typing.Optional[str] = None
+    source: typing.Optional[str] = None
+    code_origin: typing.Optional[str] = None
+    origin_filename: typing.Optional[str] = None
+    image: typing.Optional[str] = None
+    base_image: typing.Optional[str] = None
+    commands: typing.Optional[list] = None
+    extra: typing.Optional[str] = None
+    extra_args: typing.Optional[dict] = None
+    builder_env: typing.Optional[dict] = None
+    secret: typing.Optional[str] = None
+    registry: typing.Optional[str] = None
+    load_source_on_run: typing.Optional[bool] = None
+    with_mlrun: typing.Optional[bool] = None
+    auto_build: typing.Optional[bool] = None
+    build_pod: typing.Optional[str] = None
+    requirements: typing.Optional[list] = None
+    source_code_target_dir: typing.Optional[str] = None

mlrun/common/schemas/model_monitoring/__init__.py CHANGED Viewed

@@ -22,6 +22,7 @@ from .constants import (
     EventFieldType,
     EventKeyMetrics,
     EventLiveStats,
+    FeatureSetFeatures,
     FileTargetKind,
     FunctionURI,
     ModelEndpointTarget,

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -77,6 +77,26 @@ class EventFieldType:
     SAMPLE_PARQUET_PATH = "sample_parquet_path"
+class MonitoringStrEnum(StrEnum):
+    @classmethod
+    def list(cls):
+        return list(map(lambda c: c.value, cls))
+class FeatureSetFeatures(MonitoringStrEnum):
+    LATENCY = EventFieldType.LATENCY
+    ERROR_COUNT = EventFieldType.ERROR_COUNT
+    METRICS = EventFieldType.METRICS
+    @classmethod
+    def time_stamp(cls):
+        return EventFieldType.TIMESTAMP
+    @classmethod
+    def entity(cls):
+        return EventFieldType.ENDPOINT_ID
 class ApplicationEvent:
     APPLICATION_NAME = "application_name"
     CURRENT_STATS = "current_stats"
@@ -89,7 +109,7 @@ class ApplicationEvent:
     OUTPUT_STREAM_URI = "output_stream_uri"
-class WriterEvent(StrEnum):
+class WriterEvent(MonitoringStrEnum):
     APPLICATION_NAME = "application_name"
     ENDPOINT_ID = "endpoint_id"
     START_INFER_TIME = "start_infer_time"
@@ -101,10 +121,6 @@ class WriterEvent(StrEnum):
     RESULT_EXTRA_DATA = "result_extra_data"
     CURRENT_STATS = "current_stats"
-    @classmethod
-    def list(cls):
-        return list(map(lambda c: c.value, cls))
 class EventLiveStats:
     LATENCY_AVG_5M = "latency_avg_5m"

mlrun/common/schemas/project.py CHANGED Viewed

@@ -19,6 +19,7 @@ import pydantic
 import mlrun.common.types
+from .common import ImageBuilder
 from .object import ObjectKind, ObjectStatus
@@ -85,6 +86,7 @@ class ProjectSpec(pydantic.BaseModel):
     desired_state: typing.Optional[ProjectDesiredState] = ProjectDesiredState.online
     custom_packagers: typing.Optional[typing.List[typing.Tuple[str, bool]]] = None
     default_image: typing.Optional[str] = None
+    build: typing.Optional[ImageBuilder] = None
     class Config:
         extra = pydantic.Extra.allow

mlrun/config.py CHANGED Viewed

@@ -109,7 +109,10 @@ default_config = {
         "runs": {
             # deleting runs is a heavy operation that includes deleting runtime resources, therefore we do it in chunks
             "batch_delete_runs_chunk_size": 10,
-        }
+        },
+        "resources": {
+            "delete_crd_resources_timeout": "5 minutes",
+        },
     },
     # the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
     # before deleting them (4 hours)
@@ -285,6 +288,12 @@ default_config = {
         "state": "online",
         "retry_api_call_on_exception": "enabled",
         "http_connection_timeout_keep_alive": 11,
+        # http client used by httpdb
+        "http": {
+            # when True, the client will verify the server's TLS
+            # set to False for backwards compatibility.
+            "verify": False,
+        },
         "db": {
             "commit_retry_timeout": 30,
             "commit_retry_interval": 3,
@@ -303,7 +312,11 @@ default_config = {
                 # default is 16MB, max 1G, for more info https://dev.mysql.com/doc/refman/8.0/en/packet-too-large.html
                 "max_allowed_packet": 64000000,  # 64MB
             },
-            # None will set this to be equal to the httpdb.max_workers
+            # tests connections for liveness upon each checkout
+            "connections_pool_pre_ping": True,
+            # this setting causes the pool to recycle connections after the given number of seconds has passed
+            "connections_pool_recycle": 60 * 60,
+            # None defaults to httpdb.max_workers
             "connections_pool_size": None,
             "connections_pool_max_overflow": None,
             # below is a db-specific configuration
@@ -408,6 +421,8 @@ default_config = {
             "iguazio_access_key": "",
             "iguazio_list_projects_default_page_size": 200,
             "iguazio_client_job_cache_ttl": "20 minutes",
+            "nuclio_project_deletion_verification_timeout": "300 seconds",
+            "nuclio_project_deletion_verification_interval": "5 seconds",
         },
         # The API needs to know what is its k8s svc url so it could enrich it in the jobs it creates
         "api_url": "",
@@ -429,7 +444,7 @@ default_config = {
             # pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
             # git+https://github.com/mlrun/mlrun@development. by default uses the version
             "mlrun_version_specifier": "",
-            "kaniko_image": "gcr.io/kaniko-project/executor:v1.8.0",  # kaniko builder image
+            "kaniko_image": "gcr.io/kaniko-project/executor:v1.21.1",  # kaniko builder image
             "kaniko_init_container_image": "alpine:3.18",
             # image for kaniko init container when docker registry is ECR
             "kaniko_aws_cli_image": "amazon/aws-cli:2.7.10",
@@ -476,8 +491,8 @@ default_config = {
         "offline_storage_path": "model-endpoints/{kind}",
         # Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
         # when the user is working in CE environment and has not provided any stream path.
-        "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.mlrun.svc.cluster.local:8080",
-        "default_http_sink_app": "http://nuclio-{project}-{application_name}.mlrun.svc.cluster.local:8080",
+        "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
+        "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
         "batch_processing_function_branch": "master",
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
@@ -596,8 +611,9 @@ default_config = {
     },
     "workflows": {
         "default_workflow_runner_name": "workflow-runner-{}",
-        # Default timeout seconds for retrieving workflow id after execution:
-        "timeouts": {"local": 120, "kfp": 30, "remote": 30},
+        # Default timeout seconds for retrieving workflow id after execution
+        # Remote workflow timeout is the maximum between remote and the inner engine timeout
+        "timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
     },
     "log_collector": {
         "address": "localhost:8282",
@@ -949,10 +965,10 @@ class Config:
             with_gpu = (
                 with_gpu_requests if requirement == "requests" else with_gpu_limits
             )
-            resources[
-                requirement
-            ] = self.get_default_function_pod_requirement_resources(
-                requirement, with_gpu
+            resources[requirement] = (
+                self.get_default_function_pod_requirement_resources(
+                    requirement, with_gpu
+                )
             )
         return resources
@@ -1146,11 +1162,12 @@ class Config:
         return storage_options
-    def is_explicit_ack(self) -> bool:
+    def is_explicit_ack(self, version=None) -> bool:
+        if not version:
+            version = self.nuclio_version
         return self.httpdb.nuclio.explicit_ack == "enabled" and (
-            not self.nuclio_version
-            or semver.VersionInfo.parse(self.nuclio_version)
-            >= semver.VersionInfo.parse("1.12.10")
+            not version
+            or semver.VersionInfo.parse(version) >= semver.VersionInfo.parse("1.12.10")
         )
@@ -1334,12 +1351,21 @@ def read_env(env=None, prefix=env_prefix):
         if igz_domain:
             config["ui_url"] = f"https://mlrun-ui.{igz_domain}"
-    if config.get("log_level"):
+    if log_level := config.get("log_level"):
         import mlrun.utils.logger
         # logger created (because of imports mess) before the config is loaded (in tests), therefore we're changing its
         # level manually
-        mlrun.utils.logger.set_logger_level(config["log_level"])
+        mlrun.utils.logger.set_logger_level(log_level)
+    if log_formatter_name := config.get("log_formatter"):
+        import mlrun.utils.logger
+        log_formatter = mlrun.utils.create_formatter_instance(
+            mlrun.utils.FormatterKinds(log_formatter_name)
+        )
+        mlrun.utils.logger.get_handler("default").setFormatter(log_formatter)
     # The default function pod resource values are of type str; however, when reading from environment variable numbers,
     # it converts them to type int if contains only number, so we want to convert them to str.
     _convert_resources_to_str(config)

mlrun/data_types/data_types.py CHANGED Viewed

@@ -41,6 +41,7 @@ class ValueType(str, Enum):
     BYTES = "bytes"
     STRING = "str"
     DATETIME = "datetime"
+    LIST = "List"
     BYTES_LIST = "List[bytes]"
     STRING_LIST = "List[string]"
     INT32_LIST = "List[int32]"
@@ -48,6 +49,7 @@ class ValueType(str, Enum):
     DOUBLE_LIST = "List[float]"
     FLOAT_LIST = "List[float32]"
     BOOL_LIST = "List[bool]"
+    Tuple = "Tuple"
 def pd_schema_to_value_type(value):
@@ -102,6 +104,8 @@ def python_type_to_value_type(value_type):
         "datetime64[ns]": ValueType.INT64,
         "datetime64[ns, tz]": ValueType.INT64,
         "category": ValueType.STRING,
+        "list": ValueType.LIST,
+        "tuple": ValueType.Tuple,
     }
     if type_name in type_map:

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -175,9 +175,9 @@ class AzureBlobStore(DataStore):
         if "client_secret" in st or "client_id" in st or "tenant_id" in st:
             res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "OAuth"
-            res[
-                f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"
-            ] = "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
+            res[f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"] = (
+                "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
+            )
             if "client_id" in st:
                 res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
                     "client_id"
@@ -188,14 +188,14 @@ class AzureBlobStore(DataStore):
                 ]
             if "tenant_id" in st:
                 tenant_id = st["tenant_id"]
-                res[
-                    f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"
-                ] = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
+                res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
+                    f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
+                )
         if "sas_token" in st:
             res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "SAS"
-            res[
-                f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"
-            ] = "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
+            res[f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"] = (
+                "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
+            )
             res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
         return res

mlrun/datastore/base.py CHANGED Viewed

@@ -27,6 +27,7 @@ import requests
 import urllib3
 from deprecated import deprecated
+import mlrun.config
 import mlrun.errors
 from mlrun.errors import err_to_str
 from mlrun.utils import StorePrefix, is_ipython, logger
@@ -34,10 +35,6 @@ from mlrun.utils import StorePrefix, is_ipython, logger
 from .store_resources import is_store_uri, parse_store_uri
 from .utils import filter_df_start_end_time, select_columns_from_df
-verify_ssl = False
-if not verify_ssl:
-    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 class FileStats:
     def __init__(self, size, modified, content_type=None):
@@ -643,45 +640,6 @@ def basic_auth_header(user, password):
     return {"Authorization": authstr}
-def http_get(url, headers=None, auth=None):
-    try:
-        response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
-    except OSError as exc:
-        raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
-    mlrun.errors.raise_for_status(response)
-    return response.content
-def http_head(url, headers=None, auth=None):
-    try:
-        response = requests.head(url, headers=headers, auth=auth, verify=verify_ssl)
-    except OSError as exc:
-        raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
-    mlrun.errors.raise_for_status(response)
-    return response.headers
-def http_put(url, data, headers=None, auth=None, session=None):
-    try:
-        put_api = session.put if session else requests.put
-        response = put_api(
-            url, data=data, headers=headers, auth=auth, verify=verify_ssl
-        )
-    except OSError as exc:
-        raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}") from exc
-    mlrun.errors.raise_for_status(response)
-def http_upload(url, file_path, headers=None, auth=None):
-    with open(file_path, "rb") as data:
-        http_put(url, data, headers, auth)
 class HttpStore(DataStore):
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
         super().__init__(parent, name, schema, endpoint, secrets)
@@ -709,7 +667,7 @@ class HttpStore(DataStore):
         raise ValueError("unimplemented")
     def get(self, key, size=None, offset=0):
-        data = http_get(self.url + self._join(key), self._headers, self.auth)
+        data = self._http_get(self.url + self._join(key), self._headers, self.auth)
         if offset:
             data = data[offset:]
         if size:
@@ -729,6 +687,26 @@ class HttpStore(DataStore):
                 f"schema as it is not secure and is not recommended."
             )
+    def _http_get(
+        self,
+        url,
+        headers=None,
+        auth=None,
+    ):
+        # import here to prevent import cycle
+        from mlrun.config import config as mlconf
+        verify_ssl = mlconf.httpdb.http.verify
+        try:
+            if not verify_ssl:
+                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+            response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
+        except OSError as exc:
+            raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
+        mlrun.errors.raise_for_status(response)
+        return response.content
 # This wrapper class is designed to extract the 'ds' schema and profile name from URL-formatted paths.
 # Within fsspec, the AbstractFileSystem::_strip_protocol() internal method is used to handle complete URL paths.

mlrun/datastore/datastore.py CHANGED Viewed

@@ -182,16 +182,20 @@ class StoreManager:
                 url, project, allow_empty_resources, secrets
             )
-        store, subpath = self.get_or_create_store(url, secrets=secrets)
+        store, subpath = self.get_or_create_store(
+            url, secrets=secrets, project_name=project
+        )
         return DataItem(key, store, subpath, url, meta=meta, artifact_url=artifact_url)
-    def get_or_create_store(self, url, secrets: dict = None) -> (DataStore, str):
+    def get_or_create_store(
+        self, url, secrets: dict = None, project_name=""
+    ) -> (DataStore, str):
         schema, endpoint, parsed_url = parse_url(url)
         subpath = parsed_url.path
         store_key = f"{schema}://{endpoint}"
         if schema == "ds":
-            datastore_profile = datastore_profile_read(url)
+            datastore_profile = datastore_profile_read(url, project_name, secrets)
             if secrets and datastore_profile.secrets():
                 secrets = merge(secrets, datastore_profile.secrets())
             else:

mlrun 1.6.0rc26__py3-none-any.whl → 1.6.3rc1__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.0rc26py3-none-any.whl → 1.6.3rc1py3-none-any.whl