PyPI - mlrun - Versions diffs - 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show

mlrun/__init__.py +26 -22
mlrun/__main__.py +15 -16
mlrun/alerts/alert.py +150 -15
mlrun/api/schemas/__init__.py +1 -9
mlrun/artifacts/__init__.py +2 -3
mlrun/artifacts/base.py +62 -19
mlrun/artifacts/dataset.py +17 -17
mlrun/artifacts/document.py +454 -0
mlrun/artifacts/manager.py +28 -18
mlrun/artifacts/model.py +91 -59
mlrun/artifacts/plots.py +2 -2
mlrun/common/constants.py +8 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +1 -1
mlrun/common/formatters/feature_set.py +2 -0
mlrun/common/formatters/function.py +1 -0
mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/formatters/project.py +9 -0
mlrun/common/model_monitoring/__init__.py +0 -5
mlrun/common/model_monitoring/helpers.py +12 -62
mlrun/common/runtimes/constants.py +25 -4
mlrun/common/schemas/__init__.py +9 -5
mlrun/common/schemas/alert.py +114 -19
mlrun/common/schemas/api_gateway.py +3 -3
mlrun/common/schemas/artifact.py +22 -9
mlrun/common/schemas/auth.py +8 -4
mlrun/common/schemas/background_task.py +7 -7
mlrun/common/schemas/client_spec.py +4 -4
mlrun/common/schemas/clusterization_spec.py +2 -2
mlrun/common/schemas/common.py +53 -3
mlrun/common/schemas/constants.py +15 -0
mlrun/common/schemas/datastore_profile.py +1 -1
mlrun/common/schemas/feature_store.py +9 -9
mlrun/common/schemas/frontend_spec.py +4 -4
mlrun/common/schemas/function.py +10 -10
mlrun/common/schemas/hub.py +1 -1
mlrun/common/schemas/k8s.py +3 -3
mlrun/common/schemas/memory_reports.py +3 -3
mlrun/common/schemas/model_monitoring/__init__.py +4 -8
mlrun/common/schemas/model_monitoring/constants.py +127 -46
mlrun/common/schemas/model_monitoring/grafana.py +18 -12
mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
mlrun/common/schemas/notification.py +24 -3
mlrun/common/schemas/object.py +1 -1
mlrun/common/schemas/pagination.py +4 -4
mlrun/common/schemas/partition.py +142 -0
mlrun/common/schemas/pipeline.py +3 -3
mlrun/common/schemas/project.py +26 -18
mlrun/common/schemas/runs.py +3 -3
mlrun/common/schemas/runtime_resource.py +5 -5
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/secret.py +1 -1
mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
mlrun/common/schemas/tag.py +3 -3
mlrun/common/schemas/workflow.py +6 -5
mlrun/common/types.py +1 -0
mlrun/config.py +157 -89
mlrun/data_types/__init__.py +5 -3
mlrun/data_types/infer.py +13 -3
mlrun/data_types/spark.py +2 -1
mlrun/datastore/__init__.py +59 -18
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +19 -24
mlrun/datastore/datastore.py +10 -4
mlrun/datastore/datastore_profile.py +178 -45
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +14 -3
mlrun/datastore/sources.py +89 -92
mlrun/datastore/store_resources.py +7 -4
mlrun/datastore/storeytargets.py +51 -16
mlrun/datastore/targets.py +38 -31
mlrun/datastore/utils.py +87 -4
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/vectorstore.py +291 -0
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +286 -100
mlrun/db/httpdb.py +1562 -490
mlrun/db/nopdb.py +250 -83
mlrun/errors.py +6 -2
mlrun/execution.py +194 -50
mlrun/feature_store/__init__.py +2 -10
mlrun/feature_store/api.py +20 -458
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +105 -479
mlrun/feature_store/feature_vector_utils.py +466 -0
mlrun/feature_store/retrieval/base.py +15 -11
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/retrieval/storey_merger.py +1 -1
mlrun/feature_store/steps.py +3 -3
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +31 -31
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/k8s_utils.py +2 -5
mlrun/launcher/base.py +3 -4
mlrun/launcher/client.py +2 -2
mlrun/launcher/local.py +6 -2
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +8 -4
mlrun/model.py +132 -46
mlrun/model_monitoring/__init__.py +3 -5
mlrun/model_monitoring/api.py +113 -98
mlrun/model_monitoring/applications/__init__.py +0 -5
mlrun/model_monitoring/applications/_application_steps.py +81 -50
mlrun/model_monitoring/applications/base.py +467 -14
mlrun/model_monitoring/applications/context.py +212 -134
mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
mlrun/model_monitoring/applications/evidently/base.py +146 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
mlrun/model_monitoring/applications/results.py +67 -15
mlrun/model_monitoring/controller.py +701 -315
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/_schedules.py +242 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
mlrun/model_monitoring/db/tsdb/base.py +243 -49
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
mlrun/model_monitoring/helpers.py +356 -114
mlrun/model_monitoring/stream_processing.py +190 -345
mlrun/model_monitoring/tracking_policy.py +11 -4
mlrun/model_monitoring/writer.py +49 -90
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +2 -2
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +35 -32
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +47 -16
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/operations.py +30 -30
mlrun/projects/pipelines.py +116 -47
mlrun/projects/project.py +1292 -329
mlrun/render.py +5 -9
mlrun/run.py +57 -14
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +30 -22
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
mlrun/runtimes/function_reference.py +5 -2
mlrun/runtimes/generators.py +3 -2
mlrun/runtimes/kubejob.py +6 -7
mlrun/runtimes/mounts.py +574 -0
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -13
mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
mlrun/runtimes/nuclio/function.py +127 -70
mlrun/runtimes/nuclio/serving.py +105 -37
mlrun/runtimes/pod.py +159 -54
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +22 -12
mlrun/runtimes/utils.py +7 -6
mlrun/secrets.py +2 -2
mlrun/serving/__init__.py +8 -0
mlrun/serving/merger.py +7 -5
mlrun/serving/remote.py +35 -22
mlrun/serving/routers.py +186 -240
mlrun/serving/server.py +41 -10
mlrun/serving/states.py +432 -118
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +161 -203
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +35 -22
mlrun/utils/clones.py +7 -4
mlrun/utils/helpers.py +511 -58
mlrun/utils/logger.py +119 -13
mlrun/utils/notifications/notification/__init__.py +22 -19
mlrun/utils/notifications/notification/base.py +39 -15
mlrun/utils/notifications/notification/console.py +6 -6
mlrun/utils/notifications/notification/git.py +11 -11
mlrun/utils/notifications/notification/ipython.py +10 -9
mlrun/utils/notifications/notification/mail.py +176 -0
mlrun/utils/notifications/notification/slack.py +16 -8
mlrun/utils/notifications/notification/webhook.py +24 -8
mlrun/utils/notifications/notification_pusher.py +191 -200
mlrun/utils/regex.py +12 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
mlrun-1.8.0.dist-info/RECORD +351 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
mlrun/model_monitoring/applications/evidently_base.py +0 -137
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/store.py +0 -213
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
mlrun/model_monitoring/model_endpoint.py +0 -118
mlrun-1.7.2rc3.dist-info/RECORD +0 -351
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0

mlrun/datastore/inmem.py CHANGED Viewed

@@ -17,6 +17,7 @@ from io import BytesIO, StringIO
 import pandas as pd
 import mlrun
+import mlrun.utils.helpers
 from .base import DataStore, FileStats
@@ -35,7 +36,9 @@ class InMemoryStore(DataStore):
     def _get_item(self, key):
         if key not in self._items:
-            raise ValueError(f"item {key} not found in memory store")
+            raise mlrun.errors.MLRunNotFoundError(
+                f"item {key} not found in memory store"
+            )
         return self._items[key]
     def get(self, key, size=None, offset=0):

mlrun/datastore/redis.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Optional
 from urllib.parse import urlparse
 import redis
@@ -30,7 +31,9 @@ class RedisStore(DataStore):
     - key and value sizes are limited to 512MB
     """
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         redis_default_port = "6379"
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
         self.headers = None

mlrun/datastore/s3.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import time
+from typing import Optional
 import boto3
 from boto3.s3.transfer import TransferConfig
@@ -20,13 +21,15 @@ from fsspec.registry import get_filesystem_class
 import mlrun.errors
-from .base import DataStore, FileStats, get_range, make_datastore_schema_sanitizer
+from .base import DataStore, FileStats, make_datastore_schema_sanitizer
 class S3Store(DataStore):
     using_bucket = True
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, schema, endpoint, secrets)
         # will be used in case user asks to assume a role and work through fsspec
         self._temp_credentials = None
@@ -105,6 +108,13 @@ class S3Store(DataStore):
                     "choose-signer.s3.*", disable_signing
                 )
+    @staticmethod
+    def get_range(size, offset):
+        byterange = f"bytes={offset}-"
+        if size:
+            byterange += str(offset + size - 1)
+        return byterange
     def get_spark_options(self):
         res = {}
         st = self.get_storage_options()
@@ -155,6 +165,7 @@ class S3Store(DataStore):
             key=access_key_id,
             secret=secret,
             token=token,
+            use_listings_cache=False,
         )
         if endpoint_url:
@@ -182,7 +193,7 @@ class S3Store(DataStore):
         bucket, key = self.get_bucket_and_key(key)
         obj = self.s3.Object(bucket, key)
         if size or offset:
-            return obj.get(Range=get_range(size, offset))["Body"].read()
+            return obj.get(Range=S3Store.get_range(size, offset))["Body"].read()
         return obj.get()["Body"].read()
     def put(self, key, data, append=False):

mlrun/datastore/sources.py CHANGED Viewed

@@ -18,7 +18,7 @@ import warnings
 from base64 import b64encode
 from copy import copy
 from datetime import datetime
-from typing import Optional, Union
+from typing import Any, Optional, Union
 import pandas as pd
 import semver
@@ -34,6 +34,7 @@ from mlrun.datastore.utils import transform_list_filters_to_tuple
 from mlrun.secrets import SecretsStore
 from mlrun.utils import logger
+from ..common.schemas.function import Function
 from ..model import DataSource
 from ..platforms.iguazio import parse_path
 from ..utils import get_class, is_explicit_ack_supported
@@ -181,10 +182,10 @@ class CSVSource(BaseSourceDriver):
     def __init__(
         self,
         name: str = "",
-        path: str = None,
-        attributes: dict[str, object] = None,
-        key_field: str = None,
-        schedule: str = None,
+        path: Optional[str] = None,
+        attributes: Optional[dict[str, object]] = None,
+        key_field: Optional[str] = None,
+        schedule: Optional[str] = None,
         parse_dates: Union[None, int, str, list[int], list[str]] = None,
         **kwargs,
     ):
@@ -308,11 +309,11 @@ class ParquetSource(BaseSourceDriver):
     def __init__(
         self,
         name: str = "",
-        path: str = None,
-        attributes: dict[str, object] = None,
-        key_field: str = None,
-        time_field: str = None,
-        schedule: str = None,
+        path: Optional[str] = None,
+        attributes: Optional[dict[str, object]] = None,
+        key_field: Optional[str] = None,
+        time_field: Optional[str] = None,
+        schedule: Optional[str] = None,
         start_time: Optional[Union[datetime, str]] = None,
         end_time: Optional[Union[datetime, str]] = None,
         additional_filters: Optional[list[Union[tuple, list]]] = None,
@@ -392,7 +393,9 @@ class ParquetSource(BaseSourceDriver):
         )
     @classmethod
-    def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
+    def from_dict(
+        cls, struct=None, fields=None, deprecated_fields: Optional[dict] = None
+    ):
         new_obj = super().from_dict(
             struct=struct, fields=fields, deprecated_fields=deprecated_fields
         )
@@ -564,18 +567,18 @@ class BigQuerySource(BaseSourceDriver):
     def __init__(
         self,
         name: str = "",
-        table: str = None,
-        max_results_for_table: int = None,
-        query: str = None,
-        materialization_dataset: str = None,
-        chunksize: int = None,
-        key_field: str = None,
-        time_field: str = None,
-        schedule: str = None,
+        table: Optional[str] = None,
+        max_results_for_table: Optional[int] = None,
+        query: Optional[str] = None,
+        materialization_dataset: Optional[str] = None,
+        chunksize: Optional[int] = None,
+        key_field: Optional[str] = None,
+        time_field: Optional[str] = None,
+        schedule: Optional[str] = None,
         start_time=None,
         end_time=None,
-        gcp_project: str = None,
-        spark_options: dict = None,
+        gcp_project: Optional[str] = None,
+        spark_options: Optional[dict] = None,
         **kwargs,
     ):
         if query and table:
@@ -776,27 +779,27 @@ class SnowflakeSource(BaseSourceDriver):
     def __init__(
         self,
         name: str = "",
-        key_field: str = None,
-        attributes: dict[str, object] = None,
-        time_field: str = None,
-        schedule: str = None,
+        key_field: Optional[str] = None,
+        attributes: Optional[dict[str, object]] = None,
+        time_field: Optional[str] = None,
+        schedule: Optional[str] = None,
         start_time=None,
         end_time=None,
-        query: str = None,
-        url: str = None,
-        user: str = None,
-        database: str = None,
-        schema: str = None,
-        db_schema: str = None,
-        warehouse: str = None,
+        query: Optional[str] = None,
+        url: Optional[str] = None,
+        user: Optional[str] = None,
+        database: Optional[str] = None,
+        schema: Optional[str] = None,
+        db_schema: Optional[str] = None,
+        warehouse: Optional[str] = None,
         **kwargs,
     ):
-        # TODO: Remove in 1.9.0
+        # TODO: Remove in 1.10.0
         if schema:
             warnings.warn(
-                "schema is deprecated in 1.7.0, and will be removed in 1.9.0, please use db_schema"
+                "schema is deprecated in 1.7.0, and will be removed in 1.10.0, please use db_schema"
             )
-        db_schema = db_schema or schema  # TODO: Remove in 1.9.0
+        db_schema = db_schema or schema  # TODO: Remove in 1.10.0
         attributes = attributes or {}
         if url:
@@ -850,9 +853,9 @@ class CustomSource(BaseSourceDriver):
     def __init__(
         self,
-        class_name: str = None,
+        class_name: Optional[str] = None,
         name: str = "",
-        schedule: str = None,
+        schedule: Optional[str] = None,
         **attributes,
     ):
         attributes = attributes or {}
@@ -930,12 +933,12 @@ class OnlineSource(BaseSourceDriver):
     def __init__(
         self,
-        name: str = None,
-        path: str = None,
-        attributes: dict[str, object] = None,
-        key_field: str = None,
-        time_field: str = None,
-        workers: int = None,
+        name: Optional[str] = None,
+        path: Optional[str] = None,
+        attributes: Optional[dict[str, object]] = None,
+        key_field: Optional[str] = None,
+        time_field: Optional[str] = None,
+        workers: Optional[int] = None,
     ):
         super().__init__(name, path, attributes, key_field, time_field)
         self.online = True
@@ -949,8 +952,7 @@ class OnlineSource(BaseSourceDriver):
             is_explicit_ack_supported(context)
             and mlrun.mlconf.is_explicit_ack_enabled()
         )
-        # TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
-        src_class = storey.SyncEmitSource(
+        src_class = storey.AsyncEmitSource(
             context=context,
             key_field=self.key_field or key_field,
             full_event=True,
@@ -965,6 +967,21 @@ class OnlineSource(BaseSourceDriver):
             "This source type is not supported with ingestion service yet"
         )
+    @staticmethod
+    def set_explicit_ack_mode(function: Function, **extra_arguments) -> dict[str, Any]:
+        extra_arguments = extra_arguments or {}
+        engine = "sync"
+        if function.spec and hasattr(function.spec, "graph"):
+            engine = getattr(function.spec.graph, "engine", None) or engine
+        if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
+            extra_arguments["explicit_ack_mode"] = extra_arguments.get(
+                "explicit_ack_mode", "explicitOnly"
+            )
+            extra_arguments["worker_allocation_mode"] = extra_arguments.get(
+                "worker_allocation_mode", "static"
+            )
+        return extra_arguments
 class HttpSource(OnlineSource):
     kind = "http"
@@ -986,7 +1003,7 @@ class StreamSource(OnlineSource):
         seek_to="earliest",
         shards=1,
         retention_in_hours=24,
-        extra_attributes: dict = None,
+        extra_attributes: Optional[dict] = None,
         **kwargs,
     ):
         """
@@ -1027,15 +1044,7 @@ class StreamSource(OnlineSource):
             raise_for_status=v3io.dataplane.RaiseForStatus.never,
         )
         res.raise_for_status([409, 204])
-        kwargs = {}
-        engine = "async"
-        if hasattr(function.spec, "graph") and function.spec.graph.engine:
-            engine = function.spec.graph.engine
-        if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
-            kwargs["explicit_ack_mode"] = "explicitOnly"
-            kwargs["worker_allocation_mode"] = "static"
+        kwargs = self.set_explicit_ack_mode(function=function)
         function.add_v3io_stream_trigger(
             url,
@@ -1086,12 +1095,9 @@ class KafkaSource(OnlineSource):
         attributes["initial_offset"] = initial_offset
         if partitions is not None:
             attributes["partitions"] = partitions
-        sasl = attributes.pop("sasl", {})
-        if sasl_user and sasl_pass:
-            sasl["enabled"] = True
-            sasl["user"] = sasl_user
-            sasl["password"] = sasl_pass
-        if sasl:
+        if sasl := mlrun.datastore.utils.KafkaParameters(attributes).sasl(
+            usr=sasl_user, pwd=sasl_pass
+        ):
             attributes["sasl"] = sasl
         super().__init__(attributes=attributes, **kwargs)
@@ -1116,18 +1122,15 @@ class KafkaSource(OnlineSource):
         else:
             extra_attributes = copy(self.attributes)
         partitions = extra_attributes.pop("partitions", None)
-        explicit_ack_mode = None
-        engine = "async"
-        if hasattr(function.spec, "graph") and function.spec.graph.engine:
-            engine = function.spec.graph.engine
-        if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
-            explicit_ack_mode = "explicitOnly"
-            extra_attributes["workerAllocationMode"] = extra_attributes.get(
-                "worker_allocation_mode", "static"
-            )
+        extra_attributes = self.set_explicit_ack_mode(function, **extra_attributes)
+        explicit_ack_mode = extra_attributes.get("explicit_ack_mode")
+        extra_attributes["workerAllocationMode"] = extra_attributes.get(
+            "worker_allocation_mode", "pool"
+        )
         trigger_kwargs = {}
         if "max_workers" in extra_attributes:
             trigger_kwargs = {"max_workers": extra_attributes.pop("max_workers")}
@@ -1168,7 +1171,7 @@ class KafkaSource(OnlineSource):
         self,
         num_partitions: int = 4,
         replication_factor: int = 1,
-        topics: list[str] = None,
+        topics: Optional[list[str]] = None,
     ):
         """
         Create Kafka topics with the specified number of partitions and replication factor.
@@ -1193,19 +1196,13 @@ class KafkaSource(OnlineSource):
         new_topics = [
             NewTopic(topic, num_partitions, replication_factor) for topic in topics
         ]
-        kafka_admin = KafkaAdminClient(
-            bootstrap_servers=brokers,
-            sasl_mechanism=self.attributes.get("sasl", {}).get("sasl_mechanism"),
-            sasl_plain_username=self.attributes.get("sasl", {}).get("username"),
-            sasl_plain_password=self.attributes.get("sasl", {}).get("password"),
-            sasl_kerberos_service_name=self.attributes.get("sasl", {}).get(
-                "sasl_kerberos_service_name", "kafka"
-            ),
-            sasl_kerberos_domain_name=self.attributes.get("sasl", {}).get(
-                "sasl_kerberos_domain_name"
-            ),
-            sasl_oauth_token_provider=self.attributes.get("sasl", {}).get("mechanism"),
-        )
+        kafka_admin_kwargs = {}
+        kafka_admin_kwargs = mlrun.datastore.utils.KafkaParameters(
+            self.attributes
+        ).admin()
+        kafka_admin = KafkaAdminClient(bootstrap_servers=brokers, **kafka_admin_kwargs)
         try:
             kafka_admin.create_topics(new_topics)
         finally:
@@ -1226,16 +1223,16 @@ class SQLSource(BaseSourceDriver):
     def __init__(
         self,
         name: str = "",
-        chunksize: int = None,
-        key_field: str = None,
-        time_field: str = None,
-        schedule: str = None,
+        chunksize: Optional[int] = None,
+        key_field: Optional[str] = None,
+        time_field: Optional[str] = None,
+        schedule: Optional[str] = None,
         start_time: Optional[Union[datetime, str]] = None,
         end_time: Optional[Union[datetime, str]] = None,
-        db_url: str = None,
-        table_name: str = None,
-        spark_options: dict = None,
-        parse_dates: list[str] = None,
+        db_url: Optional[str] = None,
+        table_name: Optional[str] = None,
+        spark_options: Optional[dict] = None,
+        parse_dates: Optional[list[str]] = None,
         **kwargs,
     ):
         """

mlrun/datastore/store_resources.py CHANGED Viewed

@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
 import mlrun
 import mlrun.artifacts
 from mlrun.config import config
@@ -165,11 +163,16 @@ def get_store_resource(
         return db.get_feature_vector(name, project, tag, uid)
     elif StorePrefix.is_artifact(kind):
-        project, key, iteration, tag, tree = parse_artifact_uri(
+        project, key, iteration, tag, tree, uid = parse_artifact_uri(
             uri, project or config.default_project
         )
         resource = db.read_artifact(
-            key, project=project, tag=tag, iter=iteration, tree=tree
+            key,
+            project=project,
+            tag=tag,
+            iter=iteration,
+            tree=tree,
+            uid=uid,
         )
         if resource.get("kind", "") == "link":
             # todo: support other link types (not just iter, move this to the db/api layer

mlrun/datastore/storeytargets.py CHANGED Viewed

@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from urllib.parse import urlparse
 import storey
 from mergedeep import merge
 from storey import V3ioDriver
@@ -18,6 +20,12 @@ from storey import V3ioDriver
 import mlrun
 import mlrun.model_monitoring.helpers
 from mlrun.datastore.base import DataStore
+from mlrun.datastore.datastore_profile import (
+    DatastoreProfileKafkaSource,
+    DatastoreProfileKafkaTarget,
+    DatastoreProfileTDEngine,
+    datastore_profile_read,
+)
 from ..platforms.iguazio import parse_path
 from .utils import (
@@ -42,9 +50,16 @@ def get_url_and_storage_options(path, external_storage_options=None):
 class TDEngineStoreyTarget(storey.TDEngineTarget):
-    def __init__(self, *args, **kwargs):
-        kwargs["url"] = mlrun.model_monitoring.helpers.get_tsdb_connection_string()
-        super().__init__(*args, **kwargs)
+    def __init__(self, *args, url: str, **kwargs):
+        if url.startswith("ds://"):
+            datastore_profile = datastore_profile_read(url)
+            if not isinstance(datastore_profile, DatastoreProfileTDEngine):
+                raise ValueError(
+                    f"Unexpected datastore profile type:{datastore_profile.type}."
+                    "Only DatastoreProfileTDEngine is supported"
+                )
+            url = datastore_profile.dsn()
+        super().__init__(*args, url=url, **kwargs)
 class StoreyTargetUtils:
@@ -69,7 +84,12 @@ class StoreyTargetUtils:
 class ParquetStoreyTarget(storey.ParquetTarget):
     def __init__(self, *args, **kwargs):
+        alt_key_name = kwargs.pop("alternative_v3io_access_key", None)
         args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
+        storage_options = kwargs.get("storage_options", {})
+        if storage_options and storage_options.get("v3io_access_key") and alt_key_name:
+            if alt_key := mlrun.get_secret_or_env(alt_key_name):
+                storage_options["v3io_access_key"] = alt_key
         super().__init__(*args, **kwargs)
@@ -89,17 +109,20 @@ class StreamStoreyTarget(storey.StreamTarget):
             raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
         _, storage_options = get_url_and_storage_options(uri)
-        endpoint, path = parse_path(uri)
+        _, path = parse_path(uri)
         access_key = storage_options.get("v3io_access_key")
-        storage = V3ioDriver(
-            webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
-        )
+        if alt_key_name := kwargs.pop("alternative_v3io_access_key", None):
+            if alt_key := mlrun.get_secret_or_env(alt_key_name):
+                access_key = alt_key
+        storage = V3ioDriver(access_key=access_key)
         if storage_options:
             kwargs["storage"] = storage
         if args:
-            args[0] = endpoint
+            args[0] = path
         if "stream_path" in kwargs:
             kwargs["stream_path"] = path
@@ -108,17 +131,26 @@ class StreamStoreyTarget(storey.StreamTarget):
 class KafkaStoreyTarget(storey.KafkaTarget):
     def __init__(self, *args, **kwargs):
+        kwargs.pop("alternative_v3io_access_key", None)
         path = kwargs.pop("path")
-        attributes = kwargs.pop("attributes", None)
+        attributes = kwargs.pop("attributes", {})
         if path and path.startswith("ds://"):
-            datastore_profile = (
-                mlrun.datastore.datastore_profile.datastore_profile_read(path)
-            )
+            datastore_profile = datastore_profile_read(path)
+            if not isinstance(
+                datastore_profile,
+                (DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
+            ):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"Unsupported datastore profile type: {type(datastore_profile)}"
+                )
             attributes = merge(attributes, datastore_profile.attributes())
-            brokers = attributes.pop(
-                "brokers", attributes.pop("bootstrap_servers", None)
+            brokers = attributes.pop("brokers", None)
+            # Override the topic with the one in the url (if any)
+            parsed = urlparse(path)
+            topic = (
+                parsed.path.strip("/") if parsed.path else datastore_profile.get_topic()
             )
-            topic = datastore_profile.topic
         else:
             brokers = attributes.pop(
                 "brokers", attributes.pop("bootstrap_servers", None)
@@ -129,7 +161,10 @@ class KafkaStoreyTarget(storey.KafkaTarget):
             raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
         kwargs["brokers"] = brokers
         kwargs["topic"] = topic
-        super().__init__(*args, **kwargs, **attributes)
+        attributes = mlrun.datastore.utils.KafkaParameters(attributes).producer()
+        super().__init__(*args, **kwargs, producer_options=attributes)
 class NoSqlStoreyTarget(storey.NoSqlTarget):

mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0py3-none-any.whl