PyPI - mlrun - Versions diffs - 1.7.0rc5__py3-none-any.whl → 1.7.0rc7__py3-none-any.whl - Mend

mlrun 1.7.0rc5py3-none-any.whl → 1.7.0rc7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (75) hide show

mlrun/artifacts/base.py +2 -1
mlrun/artifacts/plots.py +9 -5
mlrun/common/constants.py +6 -0
mlrun/common/schemas/__init__.py +2 -0
mlrun/common/schemas/model_monitoring/__init__.py +4 -0
mlrun/common/schemas/model_monitoring/constants.py +35 -18
mlrun/common/schemas/project.py +1 -0
mlrun/common/types.py +7 -1
mlrun/config.py +19 -6
mlrun/data_types/data_types.py +4 -0
mlrun/datastore/alibaba_oss.py +130 -0
mlrun/datastore/azure_blob.py +4 -5
mlrun/datastore/base.py +22 -16
mlrun/datastore/datastore.py +4 -0
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/sources.py +7 -7
mlrun/db/base.py +14 -6
mlrun/db/factory.py +1 -1
mlrun/db/httpdb.py +61 -56
mlrun/db/nopdb.py +3 -0
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +1 -1
mlrun/launcher/client.py +1 -1
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +1 -1
mlrun/launcher/remote.py +1 -1
mlrun/model.py +1 -0
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +104 -301
mlrun/model_monitoring/application.py +21 -21
mlrun/model_monitoring/applications/histogram_data_drift.py +130 -40
mlrun/model_monitoring/controller.py +26 -33
mlrun/model_monitoring/db/__init__.py +16 -0
mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
mlrun/model_monitoring/features_drift_table.py +34 -22
mlrun/model_monitoring/helpers.py +45 -6
mlrun/model_monitoring/stream_processing.py +43 -9
mlrun/model_monitoring/tracking_policy.py +7 -1
mlrun/model_monitoring/writer.py +4 -36
mlrun/projects/pipelines.py +13 -1
mlrun/projects/project.py +279 -117
mlrun/run.py +72 -74
mlrun/runtimes/__init__.py +35 -0
mlrun/runtimes/base.py +7 -1
mlrun/runtimes/nuclio/api_gateway.py +188 -61
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +283 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +87 -0
mlrun/runtimes/nuclio/function.py +53 -1
mlrun/runtimes/nuclio/serving.py +28 -32
mlrun/runtimes/pod.py +27 -1
mlrun/serving/server.py +4 -6
mlrun/serving/states.py +41 -33
mlrun/utils/helpers.py +34 -0
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/METADATA +14 -5
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/RECORD +71 -64
mlrun/model_monitoring/batch.py +0 -974
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/models/mysql.py +0 -34
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.0rc7.dist-info}/top_level.txt +0 -0

mlrun/artifacts/base.py CHANGED Viewed

@@ -88,9 +88,10 @@ class ArtifactSpec(ModelObj):
         "db_key",
         "extra_data",
         "unpackaging_instructions",
+        "producer",
     ]
-    _extra_fields = ["annotations", "producer", "sources", "license", "encoding"]
+    _extra_fields = ["annotations", "sources", "license", "encoding"]
     _exclude_fields_from_uid_hash = [
         # if the artifact is first created, it will not have a db_key,
         # exclude it so further updates of the artifacts will have the same hash

mlrun/artifacts/plots.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import base64
+import typing
 from io import BytesIO
 from deprecated import deprecated
@@ -21,6 +22,9 @@ import mlrun
 from ..utils import dict_to_json
 from .base import Artifact, LegacyArtifact
+if typing.TYPE_CHECKING:
+    from plotly.graph_objs import Figure
 class PlotArtifact(Artifact):
     kind = "plot"
@@ -207,10 +211,10 @@ class PlotlyArtifact(Artifact):
     def __init__(
         self,
-        figure=None,
-        key: str = None,
-        target_path: str = None,
-    ):
+        figure: typing.Optional["Figure"] = None,
+        key: typing.Optional[str] = None,
+        target_path: typing.Optional[str] = None,
+    ) -> None:
         """
         Initialize a Plotly artifact with the given figure.
@@ -247,7 +251,7 @@ class PlotlyArtifact(Artifact):
         self._figure = figure
         self.spec.format = "html"
-    def get_body(self):
+    def get_body(self) -> str:
         """
         Get the artifact's body - the Plotly figure's html code.

mlrun/common/constants.py CHANGED Viewed

@@ -14,4 +14,10 @@
 #
 IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "."  # prefix for image name to enrich with registry
 MLRUN_CREATED_LABEL = "mlrun-created"
+MLRUN_MODEL_CONF = "model-conf"
+MLRUN_SERVING_SPEC_MOUNT_PATH = f"/tmp/mlrun/{MLRUN_MODEL_CONF}"
+MLRUN_SERVING_SPEC_FILENAME = "serving_spec.json"
+MLRUN_SERVING_SPEC_PATH = (
+    f"{MLRUN_SERVING_SPEC_MOUNT_PATH}/{MLRUN_SERVING_SPEC_FILENAME}"
+)
 MYSQL_MEDIUMBLOB_SIZE_BYTES = 16 * 1024 * 1024

mlrun/common/schemas/__init__.py CHANGED Viewed

@@ -124,6 +124,7 @@ from .model_monitoring import (
     EventFieldType,
     EventKeyMetrics,
     Features,
+    FeatureSetFeatures,
     FeatureValues,
     GrafanaColumn,
     GrafanaDataPoint,
@@ -139,6 +140,7 @@ from .model_monitoring import (
     ModelMonitoringMode,
     ModelMonitoringStoreKinds,
     MonitoringFunctionNames,
+    PrometheusEndpoints,
     TimeSeriesTarget,
 )
 from .notification import (

mlrun/common/schemas/model_monitoring/__init__.py CHANGED Viewed

@@ -22,6 +22,7 @@ from .constants import (
     EventFieldType,
     EventKeyMetrics,
     EventLiveStats,
+    FeatureSetFeatures,
     FileTargetKind,
     FunctionURI,
     ModelEndpointTarget,
@@ -29,9 +30,12 @@ from .constants import (
     ModelMonitoringStoreKinds,
     MonitoringFunctionNames,
     ProjectSecretKeys,
+    PrometheusEndpoints,
     PrometheusMetric,
+    SchedulingKeys,
     TimeSeriesTarget,
     VersionedModel,
+    WriterEvent,
 )
 from .grafana import (
     GrafanaColumn,

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -21,6 +21,12 @@ import mlrun.common.helpers
 from mlrun.common.types import StrEnum
+class MonitoringStrEnum(StrEnum):
+    @classmethod
+    def list(cls):
+        return list(map(lambda c: c.value, cls))
 class EventFieldType:
     FUNCTION_URI = "function_uri"
     FUNCTION = "function"
@@ -77,6 +83,20 @@ class EventFieldType:
     SAMPLE_PARQUET_PATH = "sample_parquet_path"
+class FeatureSetFeatures(MonitoringStrEnum):
+    LATENCY = EventFieldType.LATENCY
+    ERROR_COUNT = EventFieldType.ERROR_COUNT
+    METRICS = EventFieldType.METRICS
+    @classmethod
+    def time_stamp(cls):
+        return EventFieldType.TIMESTAMP
+    @classmethod
+    def entity(cls):
+        return EventFieldType.ENDPOINT_ID
 class ApplicationEvent:
     APPLICATION_NAME = "application_name"
     CURRENT_STATS = "current_stats"
@@ -89,7 +109,7 @@ class ApplicationEvent:
     OUTPUT_STREAM_URI = "output_stream_uri"
-class WriterEvent(StrEnum):
+class WriterEvent(MonitoringStrEnum):
     APPLICATION_NAME = "application_name"
     ENDPOINT_ID = "endpoint_id"
     START_INFER_TIME = "start_infer_time"
@@ -101,10 +121,6 @@ class WriterEvent(StrEnum):
     RESULT_EXTRA_DATA = "result_extra_data"
     CURRENT_STATS = "current_stats"
-    @classmethod
-    def list(cls):
-        return list(map(lambda c: c.value, cls))
 class EventLiveStats:
     LATENCY_AVG_5M = "latency_avg_5m"
@@ -146,6 +162,9 @@ class ModelMonitoringStoreKinds:
 class SchedulingKeys:
     LAST_ANALYZED = "last_analyzed"
+    ENDPOINT_ID = "endpoint_id"
+    APPLICATION_NAME = "application_name"
+    UID = "uid"
 class FileTargetKind:
@@ -155,6 +174,8 @@ class FileTargetKind:
     PARQUET = "parquet"
     APPS_PARQUET = "apps_parquet"
     LOG_STREAM = "log_stream"
+    APP_RESULTS = "app_results"
+    MONITORING_SCHEDULES = "monitoring_schedules"
 class ModelMonitoringMode(str, Enum):
@@ -177,20 +198,16 @@ class PrometheusMetric:
     DRIFT_STATUS = "drift_status"
-class MonitoringFunctionNames:
-    WRITER = "model-monitoring-writer"
-    BATCH = "model-monitoring-batch"
-    APPLICATION_CONTROLLER = "model-monitoring-controller"
-    STREAM = "model-monitoring-stream"
+class PrometheusEndpoints(MonitoringStrEnum):
+    MODEL_MONITORING_METRICS = "/model-monitoring-metrics"
+    MONITORING_BATCH_METRICS = "/monitoring-batch-metrics"
+    MONITORING_DRIFT_STATUS = "/monitoring-drift-status"
-    @staticmethod
-    def all():
-        return [
-            MonitoringFunctionNames.WRITER,
-            MonitoringFunctionNames.STREAM,
-            MonitoringFunctionNames.BATCH,
-            MonitoringFunctionNames.APPLICATION_CONTROLLER,
-        ]
+class MonitoringFunctionNames(MonitoringStrEnum):
+    STREAM = "model-monitoring-stream"
+    APPLICATION_CONTROLLER = "model-monitoring-controller"
+    WRITER = "model-monitoring-writer"
 @dataclass

mlrun/common/schemas/project.py CHANGED Viewed

@@ -87,6 +87,7 @@ class ProjectSpec(pydantic.BaseModel):
     custom_packagers: typing.Optional[list[tuple[str, bool]]] = None
     default_image: typing.Optional[str] = None
     build: typing.Optional[ImageBuilder] = None
+    default_function_node_selector: typing.Optional[dict] = {}
     class Config:
         extra = pydantic.Extra.allow

mlrun/common/types.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 import enum
@@ -23,3 +22,10 @@ class StrEnum(str, enum.Enum):
     def __repr__(self):
         return self.value
+# Partial backport from Python 3.11
+# https://docs.python.org/3/library/http.html#http.HTTPMethod
+class HTTPMethod(StrEnum):
+    GET = "GET"
+    POST = "POST"

mlrun/config.py CHANGED Viewed

@@ -324,7 +324,13 @@ default_config = {
                 # optional values (as per https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sql-mode-full):
                 #
                 # if set to "nil" or "none", nothing would be set
-                "modes": "STRICT_TRANS_TABLES",
+                "modes": (
+                    "STRICT_TRANS_TABLES"
+                    ",NO_ZERO_IN_DATE"
+                    ",NO_ZERO_DATE"
+                    ",ERROR_FOR_DIVISION_BY_ZERO"
+                    ",NO_ENGINE_SUBSTITUTION",
+                )
             },
         },
         "jobs": {
@@ -356,6 +362,8 @@ default_config = {
             # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
             "add_templated_ingress_host_mode": "never",
             "explicit_ack": "enabled",
+            # size of serving spec to move to config maps
+            "serving_spec_env_cutoff": 4096,
         },
         "logs": {
             "decode": {
@@ -443,7 +451,7 @@ default_config = {
             # pip install <requirement_specifier>, e.g. mlrun==0.5.4, mlrun~=0.5,
             # git+https://github.com/mlrun/mlrun@development. by default uses the version
             "mlrun_version_specifier": "",
-            "kaniko_image": "gcr.io/kaniko-project/executor:v1.8.0",  # kaniko builder image
+            "kaniko_image": "gcr.io/kaniko-project/executor:v1.21.1",  # kaniko builder image
             "kaniko_init_container_image": "alpine:3.18",
             # image for kaniko init container when docker registry is ECR
             "kaniko_aws_cli_image": "amazon/aws-cli:2.7.10",
@@ -473,6 +481,11 @@ default_config = {
             # if set to true, will log a warning for trying to use run db functionality while in nop db mode
             "verbose": True,
         },
+        "pagination_cache": {
+            "interval": 60,
+            "ttl": 3600,
+            "max_size": 10000,
+        },
     },
     "model_endpoint_monitoring": {
         "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
@@ -492,10 +505,9 @@ default_config = {
         # when the user is working in CE environment and has not provided any stream path.
         "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
         "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
-        "batch_processing_function_branch": "master",
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
-        # See mlrun.model_monitoring.stores.ModelEndpointStoreType for available options
+        # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
         "store_type": "v3io-nosql",
         "endpoint_store_connection": "",
     },
@@ -610,8 +622,9 @@ default_config = {
     },
     "workflows": {
         "default_workflow_runner_name": "workflow-runner-{}",
-        # Default timeout seconds for retrieving workflow id after execution:
-        "timeouts": {"local": 120, "kfp": 30, "remote": 90},
+        # Default timeout seconds for retrieving workflow id after execution
+        # Remote workflow timeout is the maximum between remote and the inner engine timeout
+        "timeouts": {"local": 120, "kfp": 60, "remote": 60 * 5},
     },
     "log_collector": {
         "address": "localhost:8282",

mlrun/data_types/data_types.py CHANGED Viewed

@@ -41,6 +41,7 @@ class ValueType(str, Enum):
     BYTES = "bytes"
     STRING = "str"
     DATETIME = "datetime"
+    LIST = "List"
     BYTES_LIST = "List[bytes]"
     STRING_LIST = "List[string]"
     INT32_LIST = "List[int32]"
@@ -48,6 +49,7 @@ class ValueType(str, Enum):
     DOUBLE_LIST = "List[float]"
     FLOAT_LIST = "List[float32]"
     BOOL_LIST = "List[bool]"
+    Tuple = "Tuple"
 def pd_schema_to_value_type(value):
@@ -102,6 +104,8 @@ def python_type_to_value_type(value_type):
         "datetime64[ns]": ValueType.INT64,
         "datetime64[ns, tz]": ValueType.INT64,
         "category": ValueType.STRING,
+        "list": ValueType.LIST,
+        "tuple": ValueType.Tuple,
     }
     if type_name in type_map:

mlrun/datastore/alibaba_oss.py ADDED Viewed

@@ -0,0 +1,130 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+from datetime import datetime
+from pathlib import Path
+from urllib.parse import urlparse
+import oss2
+from fsspec.registry import get_filesystem_class
+import mlrun.errors
+from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
+class OSSStore(DataStore):
+    using_bucket = True
+    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+        super().__init__(parent, name, schema, endpoint, secrets)
+        # will be used in case user asks to assume a role and work through fsspec
+        access_key_id = self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID")
+        secret_key = self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY")
+        endpoint_url = self._get_secret_or_env("ALIBABA_ENDPOINT_URL")
+        if access_key_id and secret_key and endpoint_url:
+            self.auth = oss2.Auth(access_key_id, secret_key)
+            self.endpoint_url = endpoint_url
+        else:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "missing ALIBABA_ACCESS_KEY_ID or ALIBABA_SECRET_ACCESS_KEY ALIBABA_ENDPOINT_URL in environment"
+            )
+    @property
+    def filesystem(self):
+        """return fsspec file system object, if supported"""
+        if self._filesystem:
+            return self._filesystem
+        try:
+            import ossfs  # noqa
+        except ImportError as exc:
+            raise ImportError("ALIBABA ossfs not installed") from exc
+        filesystem_class = get_filesystem_class(protocol=self.kind)
+        self._filesystem = makeDatastoreSchemaSanitizer(
+            filesystem_class,
+            using_bucket=self.using_bucket,
+            **self.get_storage_options(),
+        )
+        return self._filesystem
+    def get_storage_options(self):
+        res = dict(
+            endpoint=self._get_secret_or_env("ALIBABA_ENDPOINT_URL"),
+            key=self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID"),
+            secret=self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY"),
+        )
+        return self._sanitize_storage_options(res)
+    def get_bucket_and_key(self, key):
+        path = self._join(key)[1:]
+        return self.endpoint, path
+    def upload(self, key, src_path):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        oss.put_object(key, open(src_path, "rb"))
+    def get(self, key, size=None, offset=0):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        if size or offset:
+            return oss.get_object(key, byte_range=self.get_range(size, offset)).read()
+        return oss.get_object(key).read()
+    def put(self, key, data, append=False):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        oss.put_object(key, data)
+    def stat(self, key):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        obj = oss.get_object_meta(key)
+        size = obj.content_length
+        modified = datetime.fromtimestamp(obj.last_modified)
+        return FileStats(size, time.mktime(modified.timetuple()))
+    def listdir(self, key):
+        remote_path = self._convert_key_to_remote_path(key)
+        if self.filesystem.isfile(remote_path):
+            return key
+        remote_path = f"{remote_path}/**"
+        files = self.filesystem.glob(remote_path)
+        key_length = len(key)
+        files = [
+            f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
+        ]
+        return files
+    def delete(self, key):
+        bucket, key = self.get_bucket_and_key(key)
+        oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
+        oss.delete_object(key)
+    def _convert_key_to_remote_path(self, key):
+        key = key.strip("/")
+        schema = urlparse(key).scheme
+        #  if called without passing dataitem - like in fset.purge_targets,
+        #  key will include schema.
+        if not schema:
+            key = Path(self.endpoint, key).as_posix()
+        return key
+    @staticmethod
+    def get_range(size, offset):
+        if size:
+            return [offset, size]
+        return [offset, None]

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -158,18 +158,17 @@ class AzureBlobStore(DataStore):
                     st[key] = parsed_value
         account_name = st.get("account_name")
-        if not account_name:
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                "Property 'account_name' is absent both in storage settings and connection string"
-            )
         if primary_url:
             if primary_url.startswith("http://"):
                 primary_url = primary_url[len("http://") :]
             if primary_url.startswith("https://"):
                 primary_url = primary_url[len("https://") :]
             host = primary_url
-        else:
+        elif account_name:
             host = f"{account_name}.{service}.core.windows.net"
+        else:
+            return res
         if "account_key" in st:
             res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]

mlrun/datastore/base.py CHANGED Viewed

@@ -27,6 +27,7 @@ import requests
 import urllib3
 from deprecated import deprecated
+import mlrun.config
 import mlrun.errors
 from mlrun.errors import err_to_str
 from mlrun.utils import StorePrefix, is_ipython, logger
@@ -34,10 +35,6 @@ from mlrun.utils import StorePrefix, is_ipython, logger
 from .store_resources import is_store_uri, parse_store_uri
 from .utils import filter_df_start_end_time, select_columns_from_df
-verify_ssl = False
-if not verify_ssl:
-    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 class FileStats:
     def __init__(self, size, modified, content_type=None):
@@ -633,17 +630,6 @@ def basic_auth_header(user, password):
     return {"Authorization": authstr}
-def http_get(url, headers=None, auth=None):
-    try:
-        response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
-    except OSError as exc:
-        raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
-    mlrun.errors.raise_for_status(response)
-    return response.content
 class HttpStore(DataStore):
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
         super().__init__(parent, name, schema, endpoint, secrets)
@@ -671,7 +657,7 @@ class HttpStore(DataStore):
         raise ValueError("unimplemented")
     def get(self, key, size=None, offset=0):
-        data = http_get(self.url + self._join(key), self._headers, self.auth)
+        data = self._http_get(self.url + self._join(key), self._headers, self.auth)
         if offset:
             data = data[offset:]
         if size:
@@ -691,6 +677,26 @@ class HttpStore(DataStore):
                 f"schema as it is not secure and is not recommended."
             )
+    def _http_get(
+        self,
+        url,
+        headers=None,
+        auth=None,
+    ):
+        # import here to prevent import cycle
+        from mlrun.config import config as mlconf
+        verify_ssl = mlconf.httpdb.http.verify
+        try:
+            if not verify_ssl:
+                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+            response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
+        except OSError as exc:
+            raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
+        mlrun.errors.raise_for_status(response)
+        return response.content
 # This wrapper class is designed to extract the 'ds' schema and profile name from URL-formatted paths.
 # Within fsspec, the AbstractFileSystem::_strip_protocol() internal method is used to handle complete URL paths.

mlrun/datastore/datastore.py CHANGED Viewed

@@ -98,6 +98,10 @@ def schema_to_store(schema):
         from .hdfs import HdfsStore
         return HdfsStore
+    elif schema == "oss":
+        from .alibaba_oss import OSSStore
+        return OSSStore
     else:
         raise ValueError(f"unsupported store scheme ({schema})")

mlrun/datastore/google_cloud_storage.py CHANGED Viewed

@@ -132,7 +132,7 @@ class GoogleCloudStorageStore(DataStore):
         self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
     def get_spark_options(self):
-        res = None
+        res = {}
         st = self.get_storage_options()
         if "token" in st:
             res = {"spark.hadoop.google.cloud.auth.service.account.enable": "true"}

mlrun/datastore/sources.py CHANGED Viewed

@@ -204,11 +204,11 @@ class CSVSource(BaseSourceDriver):
         )
     def get_spark_options(self):
-        store, path, url = mlrun.store_manager.get_or_create_store(self.path)
+        store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
         spark_options = store.get_spark_options()
         spark_options.update(
             {
-                "path": url,
+                "path": store.spark_url + path,
                 "format": "csv",
                 "header": "true",
                 "inferSchema": "true",
@@ -357,7 +357,7 @@ class ParquetSource(BaseSourceDriver):
         )
     def get_spark_options(self):
-        store, path, url = mlrun.store_manager.get_or_create_store(self.path)
+        store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
         spark_options = store.get_spark_options()
         spark_options.update(
             {
@@ -794,7 +794,8 @@ class OnlineSource(BaseSourceDriver):
         explicit_ack = (
             is_explicit_ack_supported(context) and mlrun.mlconf.is_explicit_ack()
         )
-        src_class = storey.AsyncEmitSource(
+        # TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
+        src_class = storey.SyncEmitSource(
             context=context,
             key_field=self.key_field or key_field,
             full_event=True,
@@ -853,12 +854,11 @@ class StreamSource(OnlineSource):
         super().__init__(name, attributes=attrs, **kwargs)
     def add_nuclio_trigger(self, function):
-        store, path, url = mlrun.store_manager.get_or_create_store(self.path)
+        store, _, url = mlrun.store_manager.get_or_create_store(self.path)
         if store.kind != "v3io":
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Only profiles that reference the v3io datastore can be used with StreamSource"
             )
-        path = "v3io:/" + path
         storage_options = store.get_storage_options()
         access_key = storage_options.get("v3io_access_key")
         endpoint, stream_path = parse_path(url)
@@ -882,7 +882,7 @@ class StreamSource(OnlineSource):
             kwargs["worker_allocation_mode"] = "static"
         function.add_v3io_stream_trigger(
-            path,
+            url,
             self.name,
             self.attributes["group"],
             self.attributes["seek_to"],

mlrun/db/base.py CHANGED Viewed

@@ -17,7 +17,7 @@ from abc import ABC, abstractmethod
 from typing import Optional, Union
 import mlrun.common.schemas
-import mlrun.model_monitoring.model_endpoint
+import mlrun.model_monitoring
 class RunDBError(Exception):
@@ -509,9 +509,7 @@ class RunDBInterface(ABC):
         self,
         project: str,
         endpoint_id: str,
-        model_endpoint: Union[
-            mlrun.model_monitoring.model_endpoint.ModelEndpoint, dict
-        ],
+        model_endpoint: Union[mlrun.model_monitoring.ModelEndpoint, dict],
     ):
         pass
@@ -632,6 +630,10 @@ class RunDBInterface(ABC):
     def get_api_gateway(self, name, project=None) -> mlrun.common.schemas.APIGateway:
         pass
+    @abstractmethod
+    def delete_api_gateway(self, name, project=None):
+        pass
     def get_builder_status(
         self,
         func: "mlrun.runtimes.BaseRuntime",
@@ -724,5 +726,11 @@ class RunDBInterface(ABC):
         project: str,
         base_period: int = 10,
         image: str = "mlrun/mlrun",
-    ):
-        pass
+        deploy_histogram_data_drift_app: bool = True,
+    ) -> None:
+        raise NotImplementedError
+    def deploy_histogram_data_drift_app(
+        self, project: str, image: str = "mlrun/mlrun"
+    ) -> None:
+        raise NotImplementedError

mlrun 1.7.0rc5__py3-none-any.whl → 1.7.0rc7__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc5py3-none-any.whl → 1.7.0rc7py3-none-any.whl