PyPI - mlrun - Versions diffs - 1.7.0rc9__py3-none-any.whl → 1.7.0rc12__py3-none-any.whl - Mend

mlrun 1.7.0rc9py3-none-any.whl → 1.7.0rc12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (68) hide show

mlrun/__init__.py +1 -0
mlrun/artifacts/manager.py +17 -6
mlrun/artifacts/model.py +29 -25
mlrun/common/schemas/__init__.py +1 -0
mlrun/common/schemas/alert.py +122 -0
mlrun/common/schemas/auth.py +4 -0
mlrun/common/schemas/client_spec.py +1 -0
mlrun/common/schemas/model_monitoring/constants.py +3 -1
mlrun/config.py +6 -3
mlrun/datastore/__init__.py +4 -3
mlrun/datastore/base.py +6 -5
mlrun/datastore/sources.py +9 -4
mlrun/datastore/targets.py +11 -3
mlrun/datastore/utils.py +16 -0
mlrun/datastore/v3io.py +27 -50
mlrun/db/base.py +44 -2
mlrun/db/httpdb.py +192 -20
mlrun/db/nopdb.py +36 -1
mlrun/execution.py +21 -14
mlrun/feature_store/api.py +6 -3
mlrun/feature_store/feature_set.py +39 -23
mlrun/feature_store/feature_vector.py +2 -1
mlrun/feature_store/steps.py +30 -19
mlrun/features.py +4 -13
mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/pytorch/__init__.py +2 -2
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/tf_keras/__init__.py +1 -1
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/model.py +2 -2
mlrun/model_monitoring/application.py +11 -2
mlrun/model_monitoring/applications/histogram_data_drift.py +3 -3
mlrun/model_monitoring/controller.py +2 -3
mlrun/model_monitoring/stream_processing.py +0 -1
mlrun/model_monitoring/writer.py +32 -0
mlrun/package/packagers_manager.py +1 -0
mlrun/platforms/__init__.py +1 -1
mlrun/platforms/other.py +1 -1
mlrun/projects/operations.py +11 -4
mlrun/projects/project.py +168 -62
mlrun/run.py +72 -40
mlrun/runtimes/mpijob/abstract.py +8 -8
mlrun/runtimes/nuclio/function.py +9 -5
mlrun/runtimes/nuclio/serving.py +12 -14
mlrun/runtimes/pod.py +3 -3
mlrun/secrets.py +6 -2
mlrun/serving/routers.py +3 -1
mlrun/serving/states.py +9 -35
mlrun/serving/v2_serving.py +4 -4
mlrun/utils/helpers.py +1 -1
mlrun/utils/notifications/notification/base.py +12 -0
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +3 -1
mlrun/utils/notifications/notification/ipython.py +2 -0
mlrun/utils/notifications/notification/slack.py +41 -13
mlrun/utils/notifications/notification/webhook.py +11 -1
mlrun/utils/retryer.py +2 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/METADATA +1 -1
{mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/RECORD +67 -67
mlrun/datastore/helpers.py +0 -18
{mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc9.dist-info → mlrun-1.7.0rc12.dist-info}/top_level.txt +0 -0

mlrun/__init__.py CHANGED Viewed

@@ -97,6 +97,7 @@ def set_environment(
     example::
         from os import path
         project_name, artifact_path = set_environment()
         set_environment("http://localhost:8080", artifact_path="./")
         set_environment(env_file="mlrun.env")

mlrun/artifacts/manager.py CHANGED Viewed

@@ -180,11 +180,13 @@ class ArtifactManager:
         upload=None,
         labels=None,
         db_key=None,
+        project=None,
+        is_retained_producer=None,
         **kwargs,
     ) -> Artifact:
         """
         Log an artifact to the DB and upload it to the artifact store.
-        :param producer: The producer of the artifact, the producer depends from where the artifact is being logged.
+        :param producer: The producer of the artifact, the producer depends on where the artifact is being logged.
         :param item: The artifact to log.
         :param body: The body of the artifact.
         :param target_path: The target path of the artifact. (cannot be a relative path)
@@ -202,6 +204,9 @@ class ArtifactManager:
         :param labels: Labels to add to the artifact.
         :param db_key: The key to use when logging the artifact to the DB.
         If not provided, will generate a key based on the producer name and the artifact key.
+        :param project: The project to log the artifact to. If not provided, will use the producer's project.
+        :param is_retained_producer: Whether the producer is retained or not. Relevant to register artifacts flow
+        where a project may log artifacts which were produced by another producer.
         :param kwargs: Arguments to pass to the artifact class.
         :return: The logged artifact.
         """
@@ -226,7 +231,7 @@ class ArtifactManager:
         if db_key is None:
             # set the default artifact db key
-            if producer.kind == "run":
+            if producer.kind == "run" and not is_retained_producer:
                 # When the producer's type is "run,"
                 # we generate a different db_key than the one we obtained in the request.
                 # As a result, a new artifact for the requested key will be created,
@@ -251,8 +256,11 @@ class ArtifactManager:
             item.labels.update({"workflow-id": item.producer.get("workflow")})
         item.iter = producer.iteration
-        project = producer.project
+        project = project or producer.project
         item.project = project
+        if is_retained_producer:
+            # if the producer is retained, we want to use the original target path
+            target_path = target_path or item.target_path
         # if target_path is provided and not relative, then no need to upload the artifact as it already exists
         if target_path:
@@ -260,7 +268,8 @@ class ArtifactManager:
                 raise ValueError(
                     f"target_path ({target_path}) param cannot be relative"
                 )
-            upload = False
+            if upload is None:
+                upload = False
         # if target_path wasn't provided, but src_path is not relative, then no need to upload the artifact as it
         # already exists. In this case set the target_path to the src_path and set upload to False
@@ -287,7 +296,9 @@ class ArtifactManager:
         if target_path and item.is_dir and not target_path.endswith("/"):
             target_path += "/"
-        target_path = template_artifact_path(artifact_path=target_path, project=project)
+        target_path = template_artifact_path(
+            artifact_path=target_path, project=producer.project
+        )
         item.target_path = target_path
         item.before_log()
@@ -303,7 +314,7 @@ class ArtifactManager:
                 item.upload(artifact_path=artifact_path)
         if db_key:
-            self._log_to_db(db_key, producer.project, producer.inputs, item)
+            self._log_to_db(db_key, project, producer.inputs, item)
         size = str(item.size) or "?"
         db_str = "Y" if (self.artifact_db and db_key) else "N"
         logger.debug(

mlrun/artifacts/model.py CHANGED Viewed

@@ -11,9 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import tempfile
 from os import path
-from typing import Any
+from typing import Any, Optional
 import pandas as pd
 import yaml
@@ -69,8 +70,8 @@ class ModelArtifactSpec(ArtifactSpec):
         model_file=None,
         metrics=None,
         paraemeters=None,
-        inputs: list[Feature] = None,
-        outputs: list[Feature] = None,
+        inputs: Optional[list[Feature]] = None,
+        outputs: Optional[list[Feature]] = None,
         framework=None,
         algorithm=None,
         feature_vector=None,
@@ -92,8 +93,8 @@ class ModelArtifactSpec(ArtifactSpec):
         self.model_file = model_file
         self.metrics = metrics or {}
         self.parameters = paraemeters or {}
-        self.inputs: list[Feature] = inputs or []
-        self.outputs: list[Feature] = outputs or []
+        self.inputs = inputs or []
+        self.outputs = outputs or []
         self.framework = framework
         self.algorithm = algorithm
         self.feature_vector = feature_vector
@@ -102,21 +103,21 @@ class ModelArtifactSpec(ArtifactSpec):
         self.model_target_file = model_target_file
     @property
-    def inputs(self) -> list[Feature]:
+    def inputs(self) -> ObjectList:
         """input feature list"""
         return self._inputs
     @inputs.setter
-    def inputs(self, inputs: list[Feature]):
+    def inputs(self, inputs: list[Feature]) -> None:
         self._inputs = ObjectList.from_list(Feature, inputs)
     @property
-    def outputs(self) -> list[Feature]:
+    def outputs(self) -> ObjectList:
         """output feature list"""
         return self._outputs
     @outputs.setter
-    def outputs(self, outputs: list[Feature]):
+    def outputs(self, outputs: list[Feature]) -> None:
         self._outputs = ObjectList.from_list(Feature, outputs)
@@ -176,22 +177,22 @@ class ModelArtifact(Artifact):
         self._spec = self._verify_dict(spec, "spec", ModelArtifactSpec)
     @property
-    def inputs(self) -> list[Feature]:
+    def inputs(self) -> ObjectList:
         """input feature list"""
         return self.spec.inputs
     @inputs.setter
-    def inputs(self, inputs: list[Feature]):
+    def inputs(self, inputs: list[Feature]) -> None:
         """input feature list"""
         self.spec.inputs = inputs
     @property
-    def outputs(self) -> list[Feature]:
+    def outputs(self) -> ObjectList:
         """input feature list"""
         return self.spec.outputs
     @outputs.setter
-    def outputs(self, outputs: list[Feature]):
+    def outputs(self, outputs: list[Feature]) -> None:
         """input feature list"""
         self.spec.outputs = outputs
@@ -445,14 +446,14 @@ class LegacyModelArtifact(LegacyArtifact):
         **kwargs,
     ):
         super().__init__(key, body, format=format, target_path=target_path, **kwargs)
-        self._inputs: ObjectList = None
-        self._outputs: ObjectList = None
+        self._inputs: Optional[ObjectList] = None
+        self._outputs: Optional[ObjectList] = None
         self.model_file = model_file
         self.parameters = parameters or {}
         self.metrics = metrics or {}
-        self.inputs: list[Feature] = inputs or []
-        self.outputs: list[Feature] = outputs or []
+        self.inputs = inputs or []
+        self.outputs = outputs or []
         self.extra_data = extra_data or {}
         self.framework = framework
         self.algorithm = algorithm
@@ -462,21 +463,21 @@ class LegacyModelArtifact(LegacyArtifact):
         self.model_target_file = model_target_file
     @property
-    def inputs(self) -> list[Feature]:
+    def inputs(self) -> Optional[ObjectList]:
         """input feature list"""
         return self._inputs
     @inputs.setter
-    def inputs(self, inputs: list[Feature]):
+    def inputs(self, inputs: list[Feature]) -> None:
         self._inputs = ObjectList.from_list(Feature, inputs)
     @property
-    def outputs(self) -> list[Feature]:
+    def outputs(self) -> Optional[ObjectList]:
         """output feature list"""
         return self._outputs
     @outputs.setter
-    def outputs(self, outputs: list[Feature]):
+    def outputs(self, outputs: list[Feature]) -> None:
         self._outputs = ObjectList.from_list(Feature, outputs)
     def infer_from_df(self, df, label_columns=None, with_stats=True, num_bins=None):
@@ -552,9 +553,9 @@ def get_model(model_dir, suffix=""):
     example::
-        model_file, model_artifact, extra_data = get_model(models_path, suffix='.pkl')
+        model_file, model_artifact, extra_data = get_model(models_path, suffix=".pkl")
         model = load(open(model_file, "rb"))
-        categories = extra_data['categories'].as_df()
+        categories = extra_data["categories"].as_df()
     :param model_dir:       model dir or artifact path (store://..) or DataItem
     :param suffix:          model filename suffix (when using a dir)
@@ -663,8 +664,11 @@ def update_model(
     example::
-        update_model(model_path, metrics={'speed': 100},
-                     extra_data={'my_data': b'some text', 'file': 's3://mybucket/..'})
+        update_model(
+            model_path,
+            metrics={"speed": 100},
+            extra_data={"my_data": b"some text", "file": "s3://mybucket/.."},
+        )
     :param model_artifact:  model artifact object or path (store://..) or DataItem
     :param parameters:      parameters dict

mlrun/common/schemas/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@
 #
 # flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
+from .alert import AlertActiveState, AlertConfig, Event
 from .api_gateway import (
     APIGateway,
     APIGatewayAuthenticationMode,

mlrun/common/schemas/alert.py ADDED Viewed

@@ -0,0 +1,122 @@
+# Copyright 2023 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from datetime import datetime
+from typing import Annotated, Optional, Union
+import pydantic
+from mlrun.common.schemas.notification import Notification
+from mlrun.common.types import StrEnum
+class EventEntityKind(StrEnum):
+    MODEL = "model"
+    JOB = "job"
+class EventEntity(pydantic.BaseModel):
+    kind: EventEntityKind
+    project: str
+    id: str
+class EventKind(StrEnum):
+    DRIFT_DETECTED = "drift_detected"
+    DRIFT_SUSPECTED = "drift_suspected"
+    FAILED = "failed"
+_event_kind_entity_map = {
+    EventKind.DRIFT_SUSPECTED: [EventEntityKind.MODEL],
+    EventKind.DRIFT_DETECTED: [EventEntityKind.MODEL],
+    EventKind.FAILED: [EventEntityKind.JOB],
+}
+class Event(pydantic.BaseModel):
+    kind: EventKind
+    timestamp: Union[str, datetime] = None  # occurrence time
+    entity: EventEntity
+    value: Optional[Union[float, str]] = None
+    def is_valid(self):
+        return self.entity.kind in _event_kind_entity_map[self.kind]
+class AlertActiveState(StrEnum):
+    ACTIVE = "active"
+    INACTIVE = "inactive"
+class AlertSeverity(StrEnum):
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+# what should trigger the alert. must be either event (at least 1), or prometheus query
+class AlertTrigger(pydantic.BaseModel):
+    events: list[EventKind] = []
+    prometheus_alert: str = None
+class AlertCriteria(pydantic.BaseModel):
+    count: Annotated[
+        int,
+        pydantic.Field(
+            description="Number of events to wait until notification is sent"
+        ),
+    ] = 0
+    period: Annotated[
+        str,
+        pydantic.Field(
+            description="Time period during which event occurred. e.g. 1d, 3h, 5m, 15s"
+        ),
+    ] = None
+class ResetPolicy(StrEnum):
+    MANUAL = "manual"
+    AUTO = "auto"
+class AlertConfig(pydantic.BaseModel):
+    project: str
+    id: int = None
+    name: str
+    description: Optional[str] = ""
+    summary: Annotated[
+        str,
+        pydantic.Field(
+            description=(
+                "String to be sent in the notifications generated."
+                "e.g. 'Model {{ $project }}/{{ $entity }} is drifting.'"
+            )
+        ),
+    ]
+    created: Union[str, datetime] = None
+    severity: AlertSeverity
+    entity: EventEntity
+    trigger: AlertTrigger
+    criteria: Optional[AlertCriteria]
+    reset_policy: ResetPolicy = ResetPolicy.MANUAL
+    notifications: pydantic.conlist(Notification, min_items=1)
+    state: AlertActiveState = AlertActiveState.INACTIVE
+    count: Optional[int] = 0
+class AlertsModes(StrEnum):
+    enabled = "enabled"
+    disabled = "disabled"

mlrun/common/schemas/auth.py CHANGED Viewed

@@ -58,6 +58,8 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
     pipeline = "pipeline"
     hub_source = "hub-source"
     workflow = "workflow"
+    alert = "alert"
+    event = "event"
     datastore_profile = "datastore-profile"
     api_gateway = "api-gateway"
@@ -83,6 +85,8 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
             AuthorizationResourceTypes.schedule: "/projects/{project_name}/schedules/{resource_name}",
             AuthorizationResourceTypes.secret: "/projects/{project_name}/secrets/{resource_name}",
             AuthorizationResourceTypes.run: "/projects/{project_name}/runs/{resource_name}",
+            AuthorizationResourceTypes.event: "/projects/{project_name}/events/{resource_name}",
+            AuthorizationResourceTypes.alert: "/projects/{project_name}/alerts/{resource_name}",
             # runtime resource doesn't have an identifier, we don't need any auth granularity behind project level
             AuthorizationResourceTypes.runtime_resource: "/projects/{project_name}/runtime-resources",
             AuthorizationResourceTypes.model_endpoint: "/projects/{project_name}/model-endpoints/{resource_name}",

mlrun/common/schemas/client_spec.py CHANGED Viewed

@@ -66,3 +66,4 @@ class ClientSpec(pydantic.BaseModel):
     logs: typing.Optional[dict]
     packagers: typing.Optional[dict]
     external_platform_tracking: typing.Optional[dict]
+    alerts_mode: typing.Optional[str]

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -308,4 +308,6 @@ class ControllerPolicy:
     BASE_PERIOD = "base_period"
-MLRUN_HISTOGRAM_DATA_DRIFT_APP_NAME = "histogram-data-drift"
+class HistogramDataDriftApplicationConstants:
+    NAME = "histogram-data-drift"
+    GENERAL_RESULT_NAME = "general_drift"

mlrun/config.py CHANGED Viewed

@@ -549,10 +549,9 @@ default_config = {
     "feature_store": {
         "data_prefixes": {
             "default": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
-            "nosql": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
+            "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
             # "authority" is optional and generalizes [userinfo "@"] host [":" port]
-            "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/{kind}",
-            "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
+            "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
         },
         "default_targets": "parquet,nosql",
         "default_job_image": "mlrun/mlrun",
@@ -688,6 +687,10 @@ default_config = {
         "access_key": "",
     },
     "grafana_url": "",
+    "alerts": {
+        # supported modes: "enabled", "disabled".
+        "mode": "disabled"
+    },
 }
 _is_running_as_api = None

mlrun/datastore/__init__.py CHANGED Viewed

@@ -64,7 +64,7 @@ from .store_resources import (
     parse_store_uri,
 )
 from .targets import CSVTarget, NoSqlTarget, ParquetTarget, StreamTarget
-from .utils import parse_kafka_url
+from .utils import get_kafka_brokers_from_dict, parse_kafka_url
 store_manager = StoreManager()
@@ -107,8 +107,9 @@ def get_stream_pusher(stream_path: str, **kwargs):
     :param stream_path:        path/url of stream
     """
-    if stream_path.startswith("kafka://") or "kafka_brokers" in kwargs:
-        topic, brokers = parse_kafka_url(stream_path, kwargs.get("kafka_brokers"))
+    kafka_brokers = get_kafka_brokers_from_dict(kwargs)
+    if stream_path.startswith("kafka://") or kafka_brokers:
+        topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
         return KafkaOutputStream(topic, brokers, kwargs.get("kafka_producer_options"))
     elif stream_path.startswith("http://") or stream_path.startswith("https://"):
         return HTTPOutputStream(stream_path=stream_path)

mlrun/datastore/base.py CHANGED Viewed

@@ -389,14 +389,15 @@ class DataItem:
         # reading run results using DataItem (run.artifact())
-        train_run = train_iris_func.run(inputs={'dataset': dataset},
-                                        params={'label_column': 'label'})
+        train_run = train_iris_func.run(
+            inputs={"dataset": dataset}, params={"label_column": "label"}
+        )
-        train_run.artifact('confusion-matrix').show()
-        test_set = train_run.artifact('test_set').as_df()
+        train_run.artifact("confusion-matrix").show()
+        test_set = train_run.artifact("test_set").as_df()
         # create and use DataItem from uri
-        data = mlrun.get_dataitem('http://xyz/data.json').get()
+        data = mlrun.get_dataitem("http://xyz/data.json").get()
     """
     def __init__(

mlrun/datastore/sources.py CHANGED Viewed

@@ -406,12 +406,17 @@ class BigQuerySource(BaseSourceDriver):
          # use sql query
          query_string = "SELECT * FROM `the-psf.pypi.downloads20210328` LIMIT 5000"
-         source = BigQuerySource("bq1", query=query_string,
-                                 gcp_project="my_project",
-                                 materialization_dataset="dataviews")
+         source = BigQuerySource(
+             "bq1",
+             query=query_string,
+             gcp_project="my_project",
+             materialization_dataset="dataviews",
+         )
          # read a table
-         source = BigQuerySource("bq2", table="the-psf.pypi.downloads20210328", gcp_project="my_project")
+         source = BigQuerySource(
+             "bq2", table="the-psf.pypi.downloads20210328", gcp_project="my_project"
+         )
     :parameter name: source name

mlrun/datastore/targets.py CHANGED Viewed

@@ -1532,15 +1532,23 @@ class KafkaTarget(BaseStoreTarget):
         **kwargs,
     ):
         attrs = {}
+        # TODO: Remove this in 1.9.0
         if bootstrap_servers:
+            if brokers:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "KafkaTarget cannot be created with both the 'brokers' parameter and the deprecated "
+                    "'bootstrap_servers' parameter. Please use 'brokers' only."
+                )
             warnings.warn(
                 "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
                 "use 'brokers' instead.",
-                # TODO: Remove this in 1.9.0
                 FutureWarning,
             )
-        if bootstrap_servers is not None:
-            attrs["brokers"] = brokers or bootstrap_servers
+            brokers = bootstrap_servers
+        if brokers:
+            attrs["brokers"] = brokers
         if producer_options is not None:
             attrs["producer_options"] = producer_options

mlrun/datastore/utils.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import tarfile
 import tempfile
 import typing
+import warnings
 from urllib.parse import parse_qs, urlparse
 import pandas as pd
@@ -164,3 +165,18 @@ def _generate_sql_query_with_time_filter(
             query = query.filter(getattr(table.c, time_column) <= end_time)
     return query, parse_dates
+def get_kafka_brokers_from_dict(options: dict, pop=False) -> typing.Optional[str]:
+    get_or_pop = options.pop if pop else options.get
+    kafka_brokers = get_or_pop("kafka_brokers", None)
+    if kafka_brokers:
+        return kafka_brokers
+    kafka_bootstrap_servers = get_or_pop("kafka_bootstrap_servers", None)
+    if kafka_bootstrap_servers:
+        warnings.warn(
+            "The 'kafka_bootstrap_servers' parameter is deprecated and will be removed in "
+            "1.9.0. Please pass the 'kafka_brokers' parameter instead.",
+            FutureWarning,
+        )
+    return kafka_bootstrap_servers

mlrun/datastore/v3io.py CHANGED Viewed

@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import mmap
-import os
 import time
 from datetime import datetime
@@ -22,7 +20,6 @@ import v3io
 from v3io.dataplane.response import HttpResponseError
 import mlrun
-from mlrun.datastore.helpers import ONE_GB, ONE_MB
 from ..platforms.iguazio import parse_path, split_path
 from .base import (
@@ -32,6 +29,7 @@ from .base import (
 )
 V3IO_LOCAL_ROOT = "v3io"
+V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 100
 class V3ioStore(DataStore):
@@ -98,46 +96,28 @@ class V3ioStore(DataStore):
         )
         return self._sanitize_storage_options(res)
-    def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
+    def _upload(
+        self,
+        key: str,
+        src_path: str,
+        max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
+    ):
         """helper function for upload method, allows for controlling max_chunk_size in testing"""
         container, path = split_path(self._join(key))
-        file_size = os.path.getsize(src_path)  # in bytes
-        if file_size <= ONE_MB:
-            with open(src_path, "rb") as source_file:
-                data = source_file.read()
-            self._do_object_request(
-                self.object.put,
-                container=container,
-                path=path,
-                body=data,
-                append=False,
-            )
-            return
-        # chunk must be a multiple of the ALLOCATIONGRANULARITY
-        # https://docs.python.org/3/library/mmap.html
-        if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
-            # round down to the nearest multiple of ALLOCATIONGRANULARITY
-            max_chunk_size -= residue
         with open(src_path, "rb") as file_obj:
-            file_offset = 0
-            while file_offset < file_size:
-                chunk_size = min(file_size - file_offset, max_chunk_size)
-                with mmap.mmap(
-                    file_obj.fileno(),
-                    length=chunk_size,
-                    access=mmap.ACCESS_READ,
-                    offset=file_offset,
-                ) as mmap_obj:
-                    append = file_offset != 0
-                    self._do_object_request(
-                        self.object.put,
-                        container=container,
-                        path=path,
-                        body=mmap_obj,
-                        append=append,
-                    )
-                    file_offset += chunk_size
+            append = False
+            while True:
+                data = memoryview(file_obj.read(max_chunk_size))
+                if not data:
+                    break
+                self._do_object_request(
+                    self.object.put,
+                    container=container,
+                    path=path,
+                    body=data,
+                    append=append,
+                )
+                append = True
     def upload(self, key, src_path):
         return self._upload(key, src_path)
@@ -152,19 +132,16 @@ class V3ioStore(DataStore):
             num_bytes=size,
         ).body
-    def _put(self, key, data, append=False, max_chunk_size: int = ONE_GB):
+    def _put(
+        self,
+        key,
+        data,
+        append=False,
+        max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
+    ):
         """helper function for put method, allows for controlling max_chunk_size in testing"""
         container, path = split_path(self._join(key))
         buffer_size = len(data)  # in bytes
-        if buffer_size <= ONE_MB:
-            self._do_object_request(
-                self.object.put,
-                container=container,
-                path=path,
-                body=data,
-                append=append,
-            )
-            return
         buffer_offset = 0
         try:
             data = memoryview(data)

mlrun 1.7.0rc9__py3-none-any.whl → 1.7.0rc12__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc9py3-none-any.whl → 1.7.0rc12py3-none-any.whl