PyPI - mlrun - Versions diffs - 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl - Mend

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (93) hide show

mlrun/api/api/deps.py +14 -1
mlrun/api/api/endpoints/frontend_spec.py +0 -2
mlrun/api/api/endpoints/functions.py +15 -27
mlrun/api/api/endpoints/grafana_proxy.py +435 -74
mlrun/api/api/endpoints/healthz.py +5 -18
mlrun/api/api/endpoints/model_endpoints.py +33 -37
mlrun/api/api/utils.py +6 -13
mlrun/api/crud/__init__.py +14 -16
mlrun/api/crud/logs.py +5 -7
mlrun/api/crud/model_monitoring/__init__.py +2 -2
mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
mlrun/api/crud/pipelines.py +2 -3
mlrun/api/db/sqldb/models/models_mysql.py +52 -19
mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
mlrun/api/db/sqldb/session.py +19 -26
mlrun/api/schemas/__init__.py +2 -0
mlrun/api/schemas/constants.py +0 -13
mlrun/api/schemas/frontend_spec.py +0 -1
mlrun/api/schemas/model_endpoints.py +38 -195
mlrun/api/schemas/schedule.py +2 -2
mlrun/api/utils/clients/log_collector.py +5 -0
mlrun/builder.py +9 -41
mlrun/config.py +1 -76
mlrun/data_types/__init__.py +1 -6
mlrun/data_types/data_types.py +1 -3
mlrun/datastore/__init__.py +2 -9
mlrun/datastore/sources.py +20 -25
mlrun/datastore/store_resources.py +1 -1
mlrun/datastore/targets.py +34 -67
mlrun/datastore/utils.py +4 -26
mlrun/db/base.py +2 -4
mlrun/db/filedb.py +5 -13
mlrun/db/httpdb.py +32 -64
mlrun/db/sqldb.py +2 -4
mlrun/errors.py +0 -5
mlrun/execution.py +0 -2
mlrun/feature_store/api.py +8 -24
mlrun/feature_store/feature_set.py +6 -28
mlrun/feature_store/feature_vector.py +0 -2
mlrun/feature_store/ingestion.py +11 -8
mlrun/feature_store/retrieval/base.py +43 -271
mlrun/feature_store/retrieval/dask_merger.py +153 -55
mlrun/feature_store/retrieval/job.py +3 -12
mlrun/feature_store/retrieval/local_merger.py +130 -48
mlrun/feature_store/retrieval/spark_merger.py +125 -126
mlrun/features.py +2 -7
mlrun/model_monitoring/constants.py +6 -48
mlrun/model_monitoring/helpers.py +35 -118
mlrun/model_monitoring/model_monitoring_batch.py +260 -293
mlrun/model_monitoring/stream_processing_fs.py +253 -220
mlrun/platforms/iguazio.py +0 -33
mlrun/projects/project.py +72 -34
mlrun/runtimes/base.py +0 -5
mlrun/runtimes/daskjob.py +0 -2
mlrun/runtimes/function.py +3 -29
mlrun/runtimes/kubejob.py +15 -39
mlrun/runtimes/local.py +45 -7
mlrun/runtimes/mpijob/abstract.py +0 -2
mlrun/runtimes/mpijob/v1.py +0 -2
mlrun/runtimes/pod.py +0 -2
mlrun/runtimes/remotesparkjob.py +0 -2
mlrun/runtimes/serving.py +0 -6
mlrun/runtimes/sparkjob/abstract.py +2 -39
mlrun/runtimes/sparkjob/spark3job.py +0 -2
mlrun/serving/__init__.py +1 -2
mlrun/serving/routers.py +35 -35
mlrun/serving/server.py +12 -22
mlrun/serving/states.py +30 -162
mlrun/serving/v2_serving.py +10 -13
mlrun/utils/clones.py +1 -1
mlrun/utils/model_monitoring.py +96 -122
mlrun/utils/version/version.json +2 -2
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
mlrun/api/crud/model_monitoring/grafana.py +0 -427
mlrun/datastore/spark_udf.py +0 -40
mlrun/model_monitoring/__init__.py +0 -44
mlrun/model_monitoring/common.py +0 -112
mlrun/model_monitoring/model_endpoint.py +0 -141
mlrun/model_monitoring/stores/__init__.py +0 -106
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
mlrun/model_monitoring/stores/models/__init__.py +0 -23
mlrun/model_monitoring/stores/models/base.py +0 -18
mlrun/model_monitoring/stores/models/mysql.py +0 -100
mlrun/model_monitoring/stores/models/sqlite.py +0 -98
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
mlrun/utils/db.py +0 -52
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0

mlrun/feature_store/retrieval/spark_merger.py CHANGED Viewed

@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 import mlrun
 from mlrun.datastore.targets import get_offline_target
 from ...runtimes import RemoteSparkRuntime
 from ...runtimes.sparkjob.abstract import AbstractSparkRuntime
+from ..feature_vector import OfflineVectorResponse
 from .base import BaseMerger
@@ -33,6 +33,109 @@ class SparkFeatureMerger(BaseMerger):
     def to_spark_df(self, session, path):
         return session.read.load(path)
+    def _generate_vector(
+        self,
+        entity_rows,
+        entity_timestamp_column,
+        feature_set_objects,
+        feature_set_fields,
+        start_time=None,
+        end_time=None,
+        query=None,
+    ):
+        from pyspark.sql import SparkSession
+        from pyspark.sql.functions import col
+        if self.spark is None:
+            # create spark context
+            self.spark = SparkSession.builder.appName(
+                f"vector-merger-{self.vector.metadata.name}"
+            ).getOrCreate()
+        feature_sets = []
+        dfs = []
+        for name, columns in feature_set_fields.items():
+            feature_set = feature_set_objects[name]
+            feature_sets.append(feature_set)
+            column_names = [name for name, alias in columns]
+            if feature_set.spec.passthrough:
+                if not feature_set.spec.source:
+                    raise mlrun.errors.MLRunNotFoundError(
+                        f"passthrough feature set {name} with no source"
+                    )
+                source_kind = feature_set.spec.source.kind
+                source_path = feature_set.spec.source.path
+            else:
+                target = get_offline_target(feature_set)
+                if not target:
+                    raise mlrun.errors.MLRunInvalidArgumentError(
+                        f"feature set {name} does not have offline targets"
+                    )
+                source_kind = target.kind
+                source_path = target.get_target_path()
+            # handling case where there are multiple feature sets and user creates vector where
+            # entity_timestamp_column is from a specific feature set (can't be entity timestamp)
+            source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
+            if (
+                entity_timestamp_column in column_names
+                or feature_set.spec.timestamp_key == entity_timestamp_column
+            ):
+                source = source_driver(
+                    name=self.vector.metadata.name,
+                    path=source_path,
+                    time_field=entity_timestamp_column,
+                    start_time=start_time,
+                    end_time=end_time,
+                )
+            else:
+                source = source_driver(
+                    name=self.vector.metadata.name,
+                    path=source_path,
+                    time_field=entity_timestamp_column,
+                )
+            # add the index/key to selected columns
+            timestamp_key = feature_set.spec.timestamp_key
+            df = source.to_spark_df(
+                self.spark, named_view=self.named_view, time_field=timestamp_key
+            )
+            if timestamp_key and timestamp_key not in column_names:
+                columns.append((timestamp_key, None))
+            for entity in feature_set.spec.entities.keys():
+                if entity not in column_names:
+                    columns.append((entity, None))
+            # select requested columns and rename with alias where needed
+            df = df.select([col(name).alias(alias or name) for name, alias in columns])
+            dfs.append(df)
+            del df
+        # convert pandas entity_rows to spark DF if needed
+        if entity_rows is not None and not hasattr(entity_rows, "rdd"):
+            entity_rows = self.spark.createDataFrame(entity_rows)
+        # join the feature data frames
+        self.merge(entity_rows, entity_timestamp_column, feature_sets, dfs)
+        # filter joined data frame by the query param
+        if query:
+            self._result_df = self._result_df.filter(query)
+        self._result_df = self._result_df.drop(*self._drop_columns)
+        if self.vector.status.label_column:
+            self._result_df = self._result_df.dropna(
+                subset=[self.vector.status.label_column]
+            )
+        self._write_to_target()
+        return OfflineVectorResponse(self)
     def _unpersist_df(self, df):
         df.unpersist()
@@ -44,6 +147,7 @@ class SparkFeatureMerger(BaseMerger):
         featureset_df,
         left_keys: list,
         right_keys: list,
+        columns: list,
     ):
         """Perform an as of join between entity and featureset.
@@ -66,13 +170,15 @@ class SparkFeatureMerger(BaseMerger):
         from pyspark.sql.functions import col, monotonically_increasing_id, row_number
         entity_with_id = entity_df.withColumn("_row_nr", monotonically_increasing_id())
-        rename_right_keys = {}
-        for key in right_keys + [entity_timestamp_column]:
-            if key in entity_df.columns:
-                rename_right_keys[key] = f"ft__{key}"
+        indexes = list(featureset.spec.entities.keys())
         # get columns for projection
         projection = [
-            col(col_name).alias(rename_right_keys.get(col_name, col_name))
+            col(col_name).alias(
+                f"ft__{col_name}"
+                if col_name in indexes + [entity_timestamp_column]
+                else col_name
+            )
             for col_name in featureset_df.columns
         ]
@@ -81,16 +187,13 @@ class SparkFeatureMerger(BaseMerger):
         # set join conditions
         join_cond = (
             entity_with_id[entity_timestamp_column]
-            >= aliased_featureset_df[
-                rename_right_keys.get(entity_timestamp_column, entity_timestamp_column)
-            ]
+            >= aliased_featureset_df[f"ft__{entity_timestamp_column}"]
         )
         # join based on entities
-        for key_l, key_r in zip(left_keys, right_keys):
+        for key in indexes:
             join_cond = join_cond & (
-                entity_with_id[key_l]
-                == aliased_featureset_df[rename_right_keys.get(key_r, key_r)]
+                entity_with_id[key] == aliased_featureset_df[f"ft__{key}"]
             )
         conditional_join = entity_with_id.join(
@@ -104,16 +207,14 @@ class SparkFeatureMerger(BaseMerger):
             "_rank", row_number().over(window)
         ).filter(col("_rank") == 1)
-        for key in right_keys + [entity_timestamp_column]:
-            if key in entity_df.columns + [entity_timestamp_column]:
-                filter_most_recent_feature_timestamp = (
-                    filter_most_recent_feature_timestamp.drop(
-                        aliased_featureset_df[f"ft__{key}"]
-                    )
+        for key in indexes + [entity_timestamp_column]:
+            filter_most_recent_feature_timestamp = (
+                filter_most_recent_feature_timestamp.drop(
+                    aliased_featureset_df[f"ft__{key}"]
                 )
-        return filter_most_recent_feature_timestamp.drop("_row_nr", "_rank").orderBy(
-            col(entity_timestamp_column)
-        )
+            )
+        return filter_most_recent_feature_timestamp.drop("_row_nr", "_rank")
     def _join(
         self,
@@ -123,6 +224,7 @@ class SparkFeatureMerger(BaseMerger):
         featureset_df,
         left_keys: list,
         right_keys: list,
+        columns: list,
     ):
         """
@@ -143,19 +245,8 @@ class SparkFeatureMerger(BaseMerger):
                 be prefixed with featureset_df name.
         """
-        if left_keys != right_keys:
-            join_cond = [
-                entity_df[key_l] == featureset_df[key_r]
-                for key_l, key_r in zip(left_keys, right_keys)
-            ]
-        else:
-            join_cond = left_keys
-        merged_df = entity_df.join(
-            featureset_df,
-            join_cond,
-            how=self._join_type,
-        )
+        indexes = list(featureset.spec.entities.keys())
+        merged_df = entity_df.join(featureset_df, on=indexes)
         return merged_df
     def get_df(self, to_pandas=True):
@@ -177,95 +268,3 @@ class SparkFeatureMerger(BaseMerger):
             return RemoteSparkRuntime.default_image
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(f"Unsupported kind '{kind}'")
-    def _create_engine_env(self):
-        from pyspark.sql import SparkSession
-        if self.spark is None:
-            # create spark context
-            self.spark = SparkSession.builder.appName(
-                f"vector-merger-{self.vector.metadata.name}"
-            ).getOrCreate()
-    def _get_engine_df(
-        self,
-        feature_set,
-        feature_set_name,
-        column_names=None,
-        start_time=None,
-        end_time=None,
-        entity_timestamp_column=None,
-    ):
-        if feature_set.spec.passthrough:
-            if not feature_set.spec.source:
-                raise mlrun.errors.MLRunNotFoundError(
-                    f"passthrough feature set {feature_set_name} with no source"
-                )
-            source_kind = feature_set.spec.source.kind
-            source_path = feature_set.spec.source.path
-        else:
-            target = get_offline_target(feature_set)
-            if not target:
-                raise mlrun.errors.MLRunInvalidArgumentError(
-                    f"feature set {feature_set_name} does not have offline targets"
-                )
-            source_kind = target.kind
-            source_path = target.get_target_path()
-        # handling case where there are multiple feature sets and user creates vector where
-        # entity_timestamp_column is from a specific feature set (can't be entity timestamp)
-        source_driver = mlrun.datastore.sources.source_kind_to_driver[source_kind]
-        if (
-            entity_timestamp_column in column_names
-            or feature_set.spec.timestamp_key == entity_timestamp_column
-        ):
-            source = source_driver(
-                name=self.vector.metadata.name,
-                path=source_path,
-                time_field=entity_timestamp_column,
-                start_time=start_time,
-                end_time=end_time,
-            )
-        else:
-            source = source_driver(
-                name=self.vector.metadata.name,
-                path=source_path,
-                time_field=entity_timestamp_column,
-            )
-        if not entity_timestamp_column:
-            entity_timestamp_column = feature_set.spec.timestamp_key
-        # add the index/key to selected columns
-        timestamp_key = feature_set.spec.timestamp_key
-        return source.to_spark_df(
-            self.spark, named_view=self.named_view, time_field=timestamp_key
-        )
-    def _rename_columns_and_select(
-        self,
-        df,
-        rename_col_dict,
-        columns=None,
-    ):
-        from pyspark.sql.functions import col
-        return df.select(
-            [
-                col(name).alias(rename_col_dict.get(name, name))
-                for name in columns or rename_col_dict.keys()
-            ]
-        )
-    def _drop_columns_from_result(self):
-        self._result_df = self._result_df.drop(*self._drop_columns)
-    def _filter(self, query):
-        self._result_df = self._result_df.filter(query)
-    def _order_by(self, order_by_active):
-        from pyspark.sql.functions import col
-        self._result_df = self._result_df.orderBy(
-            *[col(col_name).asc_nulls_last() for col_name in order_by_active]
-        )

mlrun/features.py CHANGED Viewed

@@ -16,7 +16,7 @@ import math
 import re
 from typing import Dict, List, Optional, Union
-from .data_types import ValueType, python_type_to_value_type
+from .data_types import ValueType
 from .errors import MLRunRuntimeError, err_to_str
 from .model import ModelObj
@@ -105,12 +105,7 @@ class Feature(ModelObj):
         :param labels:      a set of key/value labels (tags)
         """
         self.name = name or ""
-        if isinstance(value_type, ValueType):
-            self.value_type = value_type
-        elif value_type is not None:
-            self.value_type = python_type_to_value_type(value_type)
-        else:
-            self.value_type = ValueType.STRING
+        self.value_type = ValueType(value_type) if value_type else ValueType.STRING
         self.dims = dims
         self.description = description
         self.default = default

mlrun/model_monitoring/constants.py CHANGED Viewed

@@ -14,17 +14,12 @@
 #
 class EventFieldType:
     FUNCTION_URI = "function_uri"
-    FUNCTION = "function"
-    MODEL_URI = "model_uri"
     MODEL = "model"
     VERSION = "version"
     VERSIONED_MODEL = "versioned_model"
     MODEL_CLASS = "model_class"
     TIMESTAMP = "timestamp"
-    # `endpoint_id` is deprecated as a field in the model endpoint schema since 1.3.1, replaced by `uid`.
     ENDPOINT_ID = "endpoint_id"
-    UID = "uid"
-    ENDPOINT_TYPE = "endpoint_type"
     REQUEST_ID = "request_id"
     RECORD_TYPE = "record_type"
     FEATURES = "features"
@@ -32,6 +27,8 @@ class EventFieldType:
     NAMED_FEATURES = "named_features"
     LABELS = "labels"
     LATENCY = "latency"
+    UNPACKED_LABELS = "unpacked_labels"
+    LABEL_COLUMNS = "label_columns"
     LABEL_NAMES = "label_names"
     PREDICTION = "prediction"
     PREDICTIONS = "predictions"
@@ -41,27 +38,15 @@ class EventFieldType:
     FIRST_REQUEST = "first_request"
     LAST_REQUEST = "last_request"
     METRICS = "metrics"
+    BATCH_TIMESTAMP = "batch_timestamp"
     TIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
     BATCH_INTERVALS_DICT = "batch_intervals_dict"
     DEFAULT_BATCH_INTERVALS = "default_batch_intervals"
+    DEFAULT_BATCH_IMAGE = "default_batch_image"
+    STREAM_IMAGE = "stream_image"
     MINUTES = "minutes"
     HOURS = "hours"
     DAYS = "days"
-    MODEL_ENDPOINTS = "model_endpoints"
-    STATE = "state"
-    PROJECT = "project"
-    STREAM_PATH = "stream_path"
-    ACTIVE = "active"
-    MONITORING_MODE = "monitoring_mode"
-    FEATURE_STATS = "feature_stats"
-    CURRENT_STATS = "current_stats"
-    CHILDREN = "children"
-    CHILDREN_UIDS = "children_uids"
-    DRIFT_MEASURES = "drift_measures"
-    DRIFT_STATUS = "drift_status"
-    MONITOR_CONFIGURATION = "monitor_configuration"
-    FEATURE_SET_URI = "monitoring_feature_set_uri"
-    ALGORITHM = "algorithm"
 class EventLiveStats:
@@ -76,34 +61,7 @@ class EventKeyMetrics:
     BASE_METRICS = "base_metrics"
     CUSTOM_METRICS = "custom_metrics"
     ENDPOINT_FEATURES = "endpoint_features"
-    GENERIC = "generic"
-    REAL_TIME = "real_time"
-class TimeSeriesTarget:
+class StoreTarget:
     TSDB = "tsdb"
-class ModelEndpointTarget:
-    V3IO_NOSQL = "v3io-nosql"
-    SQL = "sql"
-class ProjectSecretKeys:
-    ENDPOINT_STORE_CONNECTION = "MODEL_MONITORING_ENDPOINT_STORE_CONNECTION"
-    ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
-    KAFKA_BOOTSTRAP_SERVERS = "KAFKA_BOOTSTRAP_SERVERS"
-    STREAM_PATH = "STREAM_PATH"
-class ModelMonitoringStoreKinds:
-    ENDPOINTS = "endpoints"
-    EVENTS = "events"
-class FileTargetKind:
-    ENDPOINTS = "endpoints"
-    EVENTS = "events"
-    STREAM = "stream"
-    PARQUET = "parquet"
-    LOG_STREAM = "log_stream"

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -13,25 +13,18 @@
 # limitations under the License.
 #
 import pathlib
-import typing
 import sqlalchemy.orm
-from fastapi import Depends
 import mlrun
 import mlrun.api.api.utils
 import mlrun.api.crud.secrets
-import mlrun.api.schemas
 import mlrun.api.utils.singletons.db
-import mlrun.api.utils.singletons.k8s
 import mlrun.config
 import mlrun.feature_store as fstore
-import mlrun.model_monitoring.constants as model_monitoring_constants
 import mlrun.model_monitoring.stream_processing_fs
 import mlrun.runtimes
 import mlrun.utils.helpers
-import mlrun.utils.model_monitoring
-from mlrun.api.api import deps
 _CURRENT_FILE_PATH = pathlib.Path(__file__)
 _STREAM_PROCESSING_FUNCTION_PATH = _CURRENT_FILE_PATH.parent / "stream_processing_fs.py"
@@ -43,20 +36,16 @@ _MONIOTINRG_BATCH_FUNCTION_PATH = (
 def initial_model_monitoring_stream_processing_function(
     project: str,
     model_monitoring_access_key: str,
+    db_session: sqlalchemy.orm.Session,
     tracking_policy: mlrun.utils.model_monitoring.TrackingPolicy,
-    auth_info: mlrun.api.schemas.AuthInfo,
-    parquet_target: str,
 ):
     """
     Initialize model monitoring stream processing function.
-    :param project:                     Project name.
-    :param model_monitoring_access_key: Access key to apply the model monitoring process. Please note that in CE
-                                        deployments this parameter will be None.
+    :param project:                     project name.
+    :param model_monitoring_access_key: access key to apply the model monitoring process.
+    :param db_session:                  A session that manages the current dialog with the database.
     :param tracking_policy:             Model monitoring configurations.
-    :param auth_info:                   The auth info of the request.
-    :parquet_target:                    Path to model monitoring parquet file that will be generated by the monitoring
-                                        stream nuclio function.
     :return:                            A function object from a mlrun runtime class
@@ -65,11 +54,12 @@ def initial_model_monitoring_stream_processing_function(
     # Initialize Stream Processor object
     stream_processor = mlrun.model_monitoring.stream_processing_fs.EventStreamProcessor(
         project=project,
-        parquet_batching_max_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
-        parquet_target=parquet_target,
         model_monitoring_access_key=model_monitoring_access_key,
+        parquet_batching_max_events=mlrun.mlconf.model_endpoint_monitoring.parquet_batching_max_events,
     )
+    http_source = mlrun.datastore.sources.HttpSource()
     # Create a new serving function for the streaming process
     function = mlrun.code_to_function(
         name="model-monitoring-stream",
@@ -85,19 +75,32 @@ def initial_model_monitoring_stream_processing_function(
     # Set the project to the serving function
     function.metadata.project = project
-    # Add stream triggers
-    function = _apply_stream_trigger(
-        project=project,
-        function=function,
-        model_monitoring_access_key=model_monitoring_access_key,
-        auth_info=auth_info,
+    # Add v3io stream trigger
+    stream_path = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
+        project=project, kind="stream"
+    )
+    function.add_v3io_stream_trigger(
+        stream_path=stream_path, name="monitoring_stream_trigger"
+    )
+    # Set model monitoring access key for managing permissions
+    function.set_env_from_secret(
+        "MODEL_MONITORING_ACCESS_KEY",
+        mlrun.api.utils.singletons.k8s.get_k8s().get_project_secret_name(project),
+        mlrun.api.crud.secrets.Secrets().generate_client_project_secret_key(
+            mlrun.api.crud.secrets.SecretsClientType.model_monitoring,
+            "MODEL_MONITORING_ACCESS_KEY",
+        ),
     )
-    # Apply feature store run configurations on the serving function
     run_config = fstore.RunConfig(function=function, local=False)
     function.spec.parameters = run_config.parameters
-    return function
+    func = http_source.add_nuclio_trigger(function)
+    func.metadata.credentials.access_key = model_monitoring_access_key
+    func.apply(mlrun.v3io_cred())
+    return func
 def get_model_monitoring_batch_function(
@@ -111,8 +114,7 @@ def get_model_monitoring_batch_function(
     Initialize model monitoring batch function.
     :param project:                     project name.
-    :param model_monitoring_access_key: access key to apply the model monitoring process. Please note that in CE
-                                        deployments this parameter will be None.
+    :param model_monitoring_access_key: access key to apply the model monitoring process.
     :param db_session:                  A session that manages the current dialog with the database.
     :param auth_info:                   The auth info of the request.
     :param tracking_policy:             Model monitoring configurations.
@@ -135,106 +137,21 @@ def get_model_monitoring_batch_function(
     # Set the project to the job function
     function.metadata.project = project
-    if not mlrun.mlconf.is_ce_mode():
-        function = _apply_access_key_and_mount_function(
-            project=project,
-            function=function,
-            model_monitoring_access_key=model_monitoring_access_key,
-            auth_info=auth_info,
-        )
-    # Enrich runtime with the required configurations
-    mlrun.api.api.utils.apply_enrichment_and_validation_on_function(function, auth_info)
-    return function
-def _apply_stream_trigger(
-    project: str,
-    function: mlrun.runtimes.ServingRuntime,
-    model_monitoring_access_key: str = None,
-    auth_info: mlrun.api.schemas.AuthInfo = Depends(deps.authenticate_request),
-) -> mlrun.runtimes.ServingRuntime:
-    """Adding stream source for the nuclio serving function. By default, the function has HTTP stream trigger along
-    with another supported stream source that can be either Kafka or V3IO, depends on the stream path schema that is
-    defined under mlrun.mlconf.model_endpoint_monitoring.store_prefixes. Note that if no valid stream path has been
-    provided then the function will have a single HTTP stream source.
-    :param project:                     Project name.
-    :param function:                    The serving function object that will be applied with the stream trigger.
-    :param model_monitoring_access_key: Access key to apply the model monitoring stream function when the stream is
-                                        schema is V3IO.
-    :param auth_info:                   The auth info of the request.
-    :return: ServingRuntime object with stream trigger.
-    """
-    # Get the stream path from the configuration
-    # stream_path = mlrun.mlconf.get_file_target_path(project=project, kind="stream", target="stream")
-    stream_path = mlrun.utils.model_monitoring.get_stream_path(project=project)
-    if stream_path.startswith("kafka://"):
-        topic, brokers = mlrun.datastore.utils.parse_kafka_url(url=stream_path)
-        # Generate Kafka stream source
-        stream_source = mlrun.datastore.sources.KafkaSource(
-            brokers=brokers,
-            topics=[topic],
-        )
-        function = stream_source.add_nuclio_trigger(function)
-    if not mlrun.mlconf.is_ce_mode():
-        function = _apply_access_key_and_mount_function(
-            project=project,
-            function=function,
-            model_monitoring_access_key=model_monitoring_access_key,
-            auth_info=auth_info,
-        )
-        if stream_path.startswith("v3io://"):
-            # Generate V3IO stream trigger
-            function.add_v3io_stream_trigger(
-                stream_path=stream_path, name="monitoring_stream_trigger"
-            )
-    # Add the default HTTP source
-    http_source = mlrun.datastore.sources.HttpSource()
-    function = http_source.add_nuclio_trigger(function)
-    return function
-def _apply_access_key_and_mount_function(
-    project: str,
-    function: typing.Union[
-        mlrun.runtimes.KubejobRuntime, mlrun.runtimes.ServingRuntime
-    ],
-    model_monitoring_access_key: str,
-    auth_info: mlrun.api.schemas.AuthInfo,
-) -> typing.Union[mlrun.runtimes.KubejobRuntime, mlrun.runtimes.ServingRuntime]:
-    """Applying model monitoring access key on the provided function when using V3IO path. In addition, this method
-    mount the V3IO path for the provided function to configure the access to the system files.
-    :param project:                     Project name.
-    :param function:                    Model monitoring function object that will be filled with the access key and
-                                        the access to the system files.
-    :param model_monitoring_access_key: Access key to apply the model monitoring stream function when the stream is
-                                        schema is V3IO.
-    :param auth_info:                   The auth info of the request.
-    :return: function runtime object with access key and access to system files.
-    """
     # Set model monitoring access key for managing permissions
     function.set_env_from_secret(
-        model_monitoring_constants.ProjectSecretKeys.ACCESS_KEY,
+        "MODEL_MONITORING_ACCESS_KEY",
         mlrun.api.utils.singletons.k8s.get_k8s().get_project_secret_name(project),
         mlrun.api.crud.secrets.Secrets().generate_client_project_secret_key(
             mlrun.api.crud.secrets.SecretsClientType.model_monitoring,
-            model_monitoring_constants.ProjectSecretKeys.ACCESS_KEY,
+            "MODEL_MONITORING_ACCESS_KEY",
         ),
     )
-    function.metadata.credentials.access_key = model_monitoring_access_key
     function.apply(mlrun.mount_v3io())
+    # Needs to be a member of the project and have access to project data path
+    function.metadata.credentials.access_key = model_monitoring_access_key
     # Ensure that the auth env vars are set
     mlrun.api.api.utils.ensure_function_has_auth_set(function, auth_info)

mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl

Potentially problematic release.

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl