PyPI - mlrun - Versions diffs - 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl - Mend

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (93) hide show

mlrun/api/api/deps.py +14 -1
mlrun/api/api/endpoints/frontend_spec.py +0 -2
mlrun/api/api/endpoints/functions.py +15 -27
mlrun/api/api/endpoints/grafana_proxy.py +435 -74
mlrun/api/api/endpoints/healthz.py +5 -18
mlrun/api/api/endpoints/model_endpoints.py +33 -37
mlrun/api/api/utils.py +6 -13
mlrun/api/crud/__init__.py +14 -16
mlrun/api/crud/logs.py +5 -7
mlrun/api/crud/model_monitoring/__init__.py +2 -2
mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
mlrun/api/crud/pipelines.py +2 -3
mlrun/api/db/sqldb/models/models_mysql.py +52 -19
mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
mlrun/api/db/sqldb/session.py +19 -26
mlrun/api/schemas/__init__.py +2 -0
mlrun/api/schemas/constants.py +0 -13
mlrun/api/schemas/frontend_spec.py +0 -1
mlrun/api/schemas/model_endpoints.py +38 -195
mlrun/api/schemas/schedule.py +2 -2
mlrun/api/utils/clients/log_collector.py +5 -0
mlrun/builder.py +9 -41
mlrun/config.py +1 -76
mlrun/data_types/__init__.py +1 -6
mlrun/data_types/data_types.py +1 -3
mlrun/datastore/__init__.py +2 -9
mlrun/datastore/sources.py +20 -25
mlrun/datastore/store_resources.py +1 -1
mlrun/datastore/targets.py +34 -67
mlrun/datastore/utils.py +4 -26
mlrun/db/base.py +2 -4
mlrun/db/filedb.py +5 -13
mlrun/db/httpdb.py +32 -64
mlrun/db/sqldb.py +2 -4
mlrun/errors.py +0 -5
mlrun/execution.py +0 -2
mlrun/feature_store/api.py +8 -24
mlrun/feature_store/feature_set.py +6 -28
mlrun/feature_store/feature_vector.py +0 -2
mlrun/feature_store/ingestion.py +11 -8
mlrun/feature_store/retrieval/base.py +43 -271
mlrun/feature_store/retrieval/dask_merger.py +153 -55
mlrun/feature_store/retrieval/job.py +3 -12
mlrun/feature_store/retrieval/local_merger.py +130 -48
mlrun/feature_store/retrieval/spark_merger.py +125 -126
mlrun/features.py +2 -7
mlrun/model_monitoring/constants.py +6 -48
mlrun/model_monitoring/helpers.py +35 -118
mlrun/model_monitoring/model_monitoring_batch.py +260 -293
mlrun/model_monitoring/stream_processing_fs.py +253 -220
mlrun/platforms/iguazio.py +0 -33
mlrun/projects/project.py +72 -34
mlrun/runtimes/base.py +0 -5
mlrun/runtimes/daskjob.py +0 -2
mlrun/runtimes/function.py +3 -29
mlrun/runtimes/kubejob.py +15 -39
mlrun/runtimes/local.py +45 -7
mlrun/runtimes/mpijob/abstract.py +0 -2
mlrun/runtimes/mpijob/v1.py +0 -2
mlrun/runtimes/pod.py +0 -2
mlrun/runtimes/remotesparkjob.py +0 -2
mlrun/runtimes/serving.py +0 -6
mlrun/runtimes/sparkjob/abstract.py +2 -39
mlrun/runtimes/sparkjob/spark3job.py +0 -2
mlrun/serving/__init__.py +1 -2
mlrun/serving/routers.py +35 -35
mlrun/serving/server.py +12 -22
mlrun/serving/states.py +30 -162
mlrun/serving/v2_serving.py +10 -13
mlrun/utils/clones.py +1 -1
mlrun/utils/model_monitoring.py +96 -122
mlrun/utils/version/version.json +2 -2
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
mlrun/api/crud/model_monitoring/grafana.py +0 -427
mlrun/datastore/spark_udf.py +0 -40
mlrun/model_monitoring/__init__.py +0 -44
mlrun/model_monitoring/common.py +0 -112
mlrun/model_monitoring/model_endpoint.py +0 -141
mlrun/model_monitoring/stores/__init__.py +0 -106
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
mlrun/model_monitoring/stores/models/__init__.py +0 -23
mlrun/model_monitoring/stores/models/base.py +0 -18
mlrun/model_monitoring/stores/models/mysql.py +0 -100
mlrun/model_monitoring/stores/models/sqlite.py +0 -98
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
mlrun/utils/db.py +0 -52
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/stream_processing_fs.py CHANGED Viewed

@@ -19,24 +19,23 @@ import os
 import typing
 import pandas as pd
+# Constants
 import storey
+import v3io
+import v3io.dataplane
-import mlrun
 import mlrun.config
 import mlrun.datastore.targets
 import mlrun.feature_store.steps
 import mlrun.utils
 import mlrun.utils.model_monitoring
 import mlrun.utils.v3io_clients
-from mlrun.model_monitoring import (
+from mlrun.model_monitoring.constants import (
     EventFieldType,
     EventKeyMetrics,
     EventLiveStats,
-    FileTargetKind,
-    ModelEndpointTarget,
-    ProjectSecretKeys,
 )
-from mlrun.model_monitoring.stores import get_model_endpoint_store
 from mlrun.utils import logger
@@ -46,90 +45,81 @@ class EventStreamProcessor:
         self,
         project: str,
         parquet_batching_max_events: int,
-        parquet_target: str,
         sample_window: int = 10,
+        tsdb_batching_max_events: int = 10,
+        tsdb_batching_timeout_secs: int = 60 * 5,  # Default 5 minutes
         parquet_batching_timeout_secs: int = 30 * 60,  # Default 30 minutes
         aggregate_count_windows: typing.Optional[typing.List[str]] = None,
         aggregate_count_period: str = "30s",
         aggregate_avg_windows: typing.Optional[typing.List[str]] = None,
         aggregate_avg_period: str = "30s",
+        v3io_access_key: typing.Optional[str] = None,
+        v3io_framesd: typing.Optional[str] = None,
+        v3io_api: typing.Optional[str] = None,
         model_monitoring_access_key: str = None,
     ):
-        # General configurations, mainly used for the storey steps in the future serving graph
         self.project = project
         self.sample_window = sample_window
+        self.tsdb_batching_max_events = tsdb_batching_max_events
+        self.tsdb_batching_timeout_secs = tsdb_batching_timeout_secs
+        self.parquet_batching_max_events = parquet_batching_max_events
+        self.parquet_batching_timeout_secs = parquet_batching_timeout_secs
         self.aggregate_count_windows = aggregate_count_windows or ["5m", "1h"]
         self.aggregate_count_period = aggregate_count_period
         self.aggregate_avg_windows = aggregate_avg_windows or ["5m", "1h"]
         self.aggregate_avg_period = aggregate_avg_period
-        # Parquet path and configurations
-        self.parquet_path = parquet_target
-        self.parquet_batching_max_events = parquet_batching_max_events
-        self.parquet_batching_timeout_secs = parquet_batching_timeout_secs
-        self.model_endpoint_store_target = (
-            mlrun.mlconf.model_endpoint_monitoring.store_type
-        )
-        logger.info(
-            "Initializing model monitoring event stream processor",
-            parquet_path=self.parquet_path,
-            parquet_batching_max_events=self.parquet_batching_max_events,
-        )
-        self.storage_options = None
-        if not mlrun.mlconf.is_ce_mode():
-            self._initialize_v3io_configurations(
-                model_monitoring_access_key=model_monitoring_access_key
-            )
-    def _initialize_v3io_configurations(
-        self,
-        tsdb_batching_max_events: int = 10,
-        tsdb_batching_timeout_secs: int = 60 * 5,  # Default 5 minutes
-        v3io_access_key: typing.Optional[str] = None,
-        v3io_framesd: typing.Optional[str] = None,
-        v3io_api: typing.Optional[str] = None,
-        model_monitoring_access_key: str = None,
-    ):
-        # Get the V3IO configurations
         self.v3io_framesd = v3io_framesd or mlrun.mlconf.v3io_framesd
         self.v3io_api = v3io_api or mlrun.mlconf.v3io_api
         self.v3io_access_key = v3io_access_key or os.environ.get("V3IO_ACCESS_KEY")
         self.model_monitoring_access_key = (
             model_monitoring_access_key
-            or os.environ.get(ProjectSecretKeys.ACCESS_KEY)
+            or os.environ.get("MODEL_MONITORING_ACCESS_KEY")
             or self.v3io_access_key
         )
         self.storage_options = dict(
             v3io_access_key=self.model_monitoring_access_key, v3io_api=self.v3io_api
         )
-        # KV path
-        kv_path = mlrun.mlconf.get_model_monitoring_file_target_path(
-            project=self.project, kind=FileTargetKind.ENDPOINTS
-        )
+        template = mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default
+        kv_path = template.format(project=project, kind="endpoints")
         (
             _,
             self.kv_container,
             self.kv_path,
         ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(kv_path)
-        # TSDB path and configurations
-        tsdb_path = mlrun.mlconf.get_model_monitoring_file_target_path(
-            project=self.project, kind=FileTargetKind.EVENTS
-        )
+        tsdb_path = template.format(project=project, kind="events")
         (
             _,
             self.tsdb_container,
             self.tsdb_path,
         ) = mlrun.utils.model_monitoring.parse_model_endpoint_store_prefix(tsdb_path)
         self.tsdb_path = f"{self.tsdb_container}/{self.tsdb_path}"
-        self.tsdb_batching_max_events = tsdb_batching_max_events
-        self.tsdb_batching_timeout_secs = tsdb_batching_timeout_secs
+        self.parquet_path = (
+            mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
+                project=project, kind="parquet"
+            )
+        )
+        logger.info(
+            "Initializing model monitoring event stream processor",
+            parquet_batching_max_events=self.parquet_batching_max_events,
+            v3io_access_key=self.v3io_access_key,
+            model_monitoring_access_key=self.model_monitoring_access_key,
+            default_store_prefix=mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default,
+            user_space_store_prefix=mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space,
+            v3io_api=self.v3io_api,
+            v3io_framesd=self.v3io_framesd,
+            kv_container=self.kv_container,
+            kv_path=self.kv_path,
+            tsdb_container=self.tsdb_container,
+            tsdb_path=self.tsdb_path,
+            parquet_path=self.parquet_path,
+        )
     def apply_monitoring_serving_graph(self, fn):
         """
@@ -137,23 +127,20 @@ class EventStreamProcessor:
         of different operations that are executed on the events from the model server. Each event has
         metadata (function_uri, timestamp, class, etc.) but also inputs and predictions from the model server.
         Throughout the serving graph, the results are written to 3 different databases:
-        1. KV/SQL (steps 7-9): Stores metadata and stats about the average latency and the amount of predictions over
-           time per endpoint. for example the amount of predictions of endpoint x in the last 5 min. This data is used
-           by the monitoring dashboards in grafana. The model endpoints table also contains data on the model endpoint
-           from other processes, such as current_stats that is being calculated by the monitoring batch job
-           process. If the target is from type KV, then the model endpoints table can be found under
-           v3io:///users/pipelines/project-name/model-endpoints/endpoints/. If the target is SQL, then the table
-           is stored within the database that was defined in the provided connection string and can be found
-           under mlrun.mlconf.model_endpoint_monitoring.endpoint_store_connection.
+        1. KV (steps 7-9): Stores metadata and stats about the average latency and the amount of predictions over time
+           per endpoint. for example the amount of predictions of endpoint x in the last 5 min. This data is used by
+           the monitoring dashboards in grafana. Please note that the KV table, which can be found under
+           v3io:///users/pipelines/project-name/model-endpoints/endpoints/ also contains data on the model endpoint
+            from other processes, such as current_stats that is being calculated by the monitoring batch job
+            process.
         2. TSDB (steps 12-18): Stores live data of different key metric dictionaries in tsdb target. Results can be
            found under v3io:///users/pipelines/project-name/model-endpoints/events/. At the moment, this part supports
            3 different key metric dictionaries: base_metrics (average latency and predictions over time),
            endpoint_features (Prediction and feature names and values), and custom_metrics (user-defined metrics).
            This data is also being used by the monitoring dashboards in grafana.
         3. Parquet (steps 19-20): This Parquet file includes the required data for the model monitoring batch job
-           that run every hour by default. If defined, the parquet target path can be found under
-           mlrun.mlconf.model_endpoint_monitoring.offline. Otherwise, the default parquet path is under
-           mlrun.mlconf.model_endpoint_monitoring.user_space.
+           that run every hour by default. The parquet target can be found under
+           v3io:///projects/{project}/model-endpoints/.
         :param fn: A serving function.
         """
@@ -164,6 +151,9 @@ class EventStreamProcessor:
         def apply_process_endpoint_event():
             graph.add_step(
                 "ProcessEndpointEvent",
+                kv_container=self.kv_container,
+                kv_path=self.kv_path,
+                v3io_access_key=self.v3io_access_key,
                 full_event=True,
                 project=self.project,
             )
@@ -192,8 +182,10 @@ class EventStreamProcessor:
             graph.add_step(
                 "MapFeatureNames",
                 name="MapFeatureNames",
+                kv_container=self.kv_container,
+                kv_path=self.kv_path,
+                access_key=self.v3io_access_key,
                 infer_columns_from_data=True,
-                project=self.project,
                 after="flatten_events",
             )
@@ -217,6 +209,7 @@ class EventStreamProcessor:
                 after="MapFeatureNames",
                 step_name="Aggregates",
                 table=".",
+                v3io_access_key=self.v3io_access_key,
             )
             # Step 5.2 - Calculate average latency time for each window (5 min and 1 hour by default)
             graph.add_step(
@@ -233,6 +226,7 @@ class EventStreamProcessor:
                 name=EventFieldType.LATENCY,
                 after=EventFieldType.PREDICTIONS,
                 table=".",
+                v3io_access_key=self.v3io_access_key,
             )
         apply_storey_aggregations()
@@ -245,121 +239,117 @@ class EventStreamProcessor:
                 after=EventFieldType.LATENCY,
                 window_size=self.sample_window,
                 key=EventFieldType.ENDPOINT_ID,
+                v3io_access_key=self.v3io_access_key,
             )
         apply_storey_sample_window()
-        # Steps 7-9 - KV/SQL branch
-        # Step 7 - Filter relevant keys from the event before writing the data into the database table
-        def apply_process_before_endpoint_update():
-            graph.add_step(
-                "ProcessBeforeEndpointUpdate",
-                name="ProcessBeforeEndpointUpdate",
-                after="sample",
-            )
+        # Steps 7-9 - KV branch
+        # Step 7 - Filter relevant keys from the event before writing the data into KV
+        def apply_process_before_kv():
+            graph.add_step("ProcessBeforeKV", name="ProcessBeforeKV", after="sample")
-        apply_process_before_endpoint_update()
+        apply_process_before_kv()
-        # Step 8 - Write the filtered event to KV/SQL table. At this point, the serving graph updates the stats
+        # Step 8 - Write the filtered event to KV table. At this point, the serving graph updates the stats
         # about average latency and the amount of predictions over time
-        def apply_update_endpoint():
+        def apply_write_to_kv():
             graph.add_step(
-                "UpdateEndpoint",
-                name="UpdateEndpoint",
-                after="ProcessBeforeEndpointUpdate",
-                project=self.project,
-                model_endpoint_store_target=self.model_endpoint_store_target,
+                "WriteToKV",
+                name="WriteToKV",
+                after="ProcessBeforeKV",
+                container=self.kv_container,
+                table=self.kv_path,
+                v3io_access_key=self.v3io_access_key,
             )
-        apply_update_endpoint()
+        apply_write_to_kv()
-        # Step 9 (only for KV target) - Apply infer_schema on the model endpoints table for generating schema file
+        # Step 9 - Apply infer_schema on the KB table for generating schema file
         # which will be used by Grafana monitoring dashboards
         def apply_infer_schema():
             graph.add_step(
                 "InferSchema",
                 name="InferSchema",
-                after="UpdateEndpoint",
+                after="WriteToKV",
+                v3io_access_key=self.v3io_access_key,
                 v3io_framesd=self.v3io_framesd,
                 container=self.kv_container,
                 table=self.kv_path,
             )
-        if self.model_endpoint_store_target == ModelEndpointTarget.V3IO_NOSQL:
-            apply_infer_schema()
+        apply_infer_schema()
-        # Steps 11-18 - TSDB branch (not supported in CE environment at the moment)
+        # Steps 11-18 - TSDB branch
+        # Step 11 - Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
+        # stats and details about the events
+        def apply_process_before_tsdb():
+            graph.add_step(
+                "ProcessBeforeTSDB", name="ProcessBeforeTSDB", after="sample"
+            )
-        if not mlrun.mlconf.is_ce_mode():
-            # Step 11 - Before writing data to TSDB, create dictionary of 2-3 dictionaries that contains
-            # stats and details about the events
-            def apply_process_before_tsdb():
-                graph.add_step(
-                    "ProcessBeforeTSDB", name="ProcessBeforeTSDB", after="sample"
-                )
+        apply_process_before_tsdb()
-            apply_process_before_tsdb()
+        # Steps 12-18: - Unpacked keys from each dictionary and write to TSDB target
+        def apply_filter_and_unpacked_keys(name, keys):
+            graph.add_step(
+                "FilterAndUnpackKeys",
+                name=name,
+                after="ProcessBeforeTSDB",
+                keys=[keys],
+            )
-            # Steps 12-18: - Unpacked keys from each dictionary and write to TSDB target
-            def apply_filter_and_unpacked_keys(name, keys):
-                graph.add_step(
-                    "FilterAndUnpackKeys",
-                    name=name,
-                    after="ProcessBeforeTSDB",
-                    keys=[keys],
-                )
+        def apply_tsdb_target(name, after):
+            graph.add_step(
+                "storey.TSDBTarget",
+                name=name,
+                after=after,
+                path=self.tsdb_path,
+                rate="10/m",
+                time_col=EventFieldType.TIMESTAMP,
+                container=self.tsdb_container,
+                access_key=self.v3io_access_key,
+                v3io_frames=self.v3io_framesd,
+                infer_columns_from_data=True,
+                index_cols=[
+                    EventFieldType.ENDPOINT_ID,
+                    EventFieldType.RECORD_TYPE,
+                ],
+                max_events=self.tsdb_batching_max_events,
+                flush_after_seconds=self.tsdb_batching_timeout_secs,
+                key=EventFieldType.ENDPOINT_ID,
+            )
-            def apply_tsdb_target(name, after):
-                graph.add_step(
-                    "storey.TSDBTarget",
-                    name=name,
-                    after=after,
-                    path=self.tsdb_path,
-                    rate="10/m",
-                    time_col=EventFieldType.TIMESTAMP,
-                    container=self.tsdb_container,
-                    access_key=self.v3io_access_key,
-                    v3io_frames=self.v3io_framesd,
-                    infer_columns_from_data=True,
-                    index_cols=[
-                        EventFieldType.ENDPOINT_ID,
-                        EventFieldType.RECORD_TYPE,
-                    ],
-                    max_events=self.tsdb_batching_max_events,
-                    flush_after_seconds=self.tsdb_batching_timeout_secs,
-                    key=EventFieldType.ENDPOINT_ID,
-                )
+        # Steps 12-13 - unpacked base_metrics dictionary
+        apply_filter_and_unpacked_keys(
+            name="FilterAndUnpackKeys1",
+            keys=EventKeyMetrics.BASE_METRICS,
+        )
+        apply_tsdb_target(name="tsdb1", after="FilterAndUnpackKeys1")
-            # Steps 12-13 - unpacked base_metrics dictionary
-            apply_filter_and_unpacked_keys(
-                name="FilterAndUnpackKeys1",
-                keys=EventKeyMetrics.BASE_METRICS,
-            )
-            apply_tsdb_target(name="tsdb1", after="FilterAndUnpackKeys1")
+        # Steps 14-15 - unpacked endpoint_features dictionary
+        apply_filter_and_unpacked_keys(
+            name="FilterAndUnpackKeys2",
+            keys=EventKeyMetrics.ENDPOINT_FEATURES,
+        )
+        apply_tsdb_target(name="tsdb2", after="FilterAndUnpackKeys2")
-            # Steps 14-15 - unpacked endpoint_features dictionary
-            apply_filter_and_unpacked_keys(
-                name="FilterAndUnpackKeys2",
-                keys=EventKeyMetrics.ENDPOINT_FEATURES,
-            )
-            apply_tsdb_target(name="tsdb2", after="FilterAndUnpackKeys2")
+        # Steps 16-18 - unpacked custom_metrics dictionary. In addition, use storey.Filter remove none values
+        apply_filter_and_unpacked_keys(
+            name="FilterAndUnpackKeys3",
+            keys=EventKeyMetrics.CUSTOM_METRICS,
+        )
-            # Steps 16-18 - unpacked custom_metrics dictionary. In addition, use storey.Filter remove none values
-            apply_filter_and_unpacked_keys(
-                name="FilterAndUnpackKeys3",
-                keys=EventKeyMetrics.CUSTOM_METRICS,
+        def apply_storey_filter():
+            graph.add_step(
+                "storey.Filter",
+                "FilterNotNone",
+                after="FilterAndUnpackKeys3",
+                _fn="(event is not None)",
             )
-            def apply_storey_filter():
-                graph.add_step(
-                    "storey.Filter",
-                    "FilterNotNone",
-                    after="FilterAndUnpackKeys3",
-                    _fn="(event is not None)",
-                )
-            apply_storey_filter()
-            apply_tsdb_target(name="tsdb3", after="FilterNotNone")
+        apply_storey_filter()
+        apply_tsdb_target(name="tsdb3", after="FilterNotNone")
         # Steps 19-20 - Parquet branch
         # Step 19 - Filter and validate different keys before writing the data to Parquet target
@@ -394,14 +384,14 @@ class EventStreamProcessor:
         apply_parquet_target()
-class ProcessBeforeEndpointUpdate(mlrun.feature_store.steps.MapClass):
+class ProcessBeforeKV(mlrun.feature_store.steps.MapClass):
     def __init__(self, **kwargs):
         """
-        Filter relevant keys from the event before writing the data to database table (in EndpointUpdate step).
-        Note that in the endpoint table we only keep metadata (function_uri, model_class, etc.) and stats about the
-        average latency and the number of predictions (per 5min and 1hour).
+        Filter relevant keys from the event before writing the data to KV table (in WriteToKV step). Note that in KV
+        we only keep metadata (function_uri, model_class, etc.) and stats about the average latency and the number
+        of predictions (per 5min and 1hour).
-        :returns: A filtered event as a dictionary which will be written to the endpoint table in the next step.
+        :returns: A filtered event as a dictionary which will be written to KV table in the next step.
         """
         super().__init__(**kwargs)
@@ -418,31 +408,26 @@ class ProcessBeforeEndpointUpdate(mlrun.feature_store.steps.MapClass):
                 EventFieldType.FUNCTION_URI,
                 EventFieldType.MODEL,
                 EventFieldType.MODEL_CLASS,
+                EventFieldType.TIMESTAMP,
                 EventFieldType.ENDPOINT_ID,
                 EventFieldType.LABELS,
-                EventFieldType.FIRST_REQUEST,
-                EventFieldType.LAST_REQUEST,
-                EventFieldType.ERROR_COUNT,
-            ]
-        }
-        # Add generic metrics statistics
-        generic_metrics = {
-            k: event[k]
-            for k in [
+                EventFieldType.UNPACKED_LABELS,
                 EventLiveStats.LATENCY_AVG_5M,
                 EventLiveStats.LATENCY_AVG_1H,
                 EventLiveStats.PREDICTIONS_PER_SECOND,
                 EventLiveStats.PREDICTIONS_COUNT_5M,
                 EventLiveStats.PREDICTIONS_COUNT_1H,
+                EventFieldType.FIRST_REQUEST,
+                EventFieldType.LAST_REQUEST,
+                EventFieldType.ERROR_COUNT,
             ]
         }
-        e[EventFieldType.METRICS] = json.dumps(
-            {EventKeyMetrics.GENERIC: generic_metrics}
-        )
-        # Write labels as json string as required by the DB format
+        # Unpack labels dictionary
+        e = {
+            **e.pop(EventFieldType.UNPACKED_LABELS, {}),
+            **e,
+        }
+        # Write labels to kv as json string to be presentable later
         e[EventFieldType.LABELS] = json.dumps(e[EventFieldType.LABELS])
         return e
@@ -538,6 +523,7 @@ class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
         logger.info("ProcessBeforeParquet1", event=event)
         # Remove the following keys from the event
         for key in [
+            EventFieldType.UNPACKED_LABELS,
             EventFieldType.FEATURES,
             EventFieldType.NAMED_FEATURES,
         ]:
@@ -563,23 +549,32 @@ class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
 class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
     def __init__(
         self,
-        project: str,
+        kv_container: str,
+        kv_path: str,
+        v3io_access_key: str,
         **kwargs,
     ):
         """
         Process event or batch of events as part of the first step of the monitoring serving graph. It includes
-        Adding important details to the event such as endpoint_id, handling errors coming from the stream, validation
+        Adding important details to the event such as endpoint_id, handling errors coming from the stream, Validation
         of event data such as inputs and outputs, and splitting model event into sub-events.
-        :param project: Project name.
+        :param kv_container:    Name of the container that will be used to retrieve the endpoint id. For model
+                                endpoints it is usually 'users'.
+        :param kv_path:         KV table path that will be used to retrieve the endpoint id. For model endpoints
+                                it is usually pipelines/project-name/model-endpoints/endpoints/
+        :param v3io_access_key: Access key with permission to read from a KV table.
+        :param project:         Project name.
         :returns: A Storey event object which is the basic unit of data in Storey. Note that the next steps of
                   the monitoring serving graph are based on Storey operations.
         """
         super().__init__(**kwargs)
-        self.project: str = project
+        self.kv_container: str = kv_container
+        self.kv_path: str = kv_path
+        self.v3io_access_key: str = v3io_access_key
         # First and last requests timestamps (value) of each endpoint (key)
         self.first_request: typing.Dict[str, str] = dict()
@@ -607,7 +602,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         version = event.get(EventFieldType.VERSION)
         versioned_model = f"{model}:{version}" if version else f"{model}:latest"
-        endpoint_id = mlrun.model_monitoring.create_model_endpoint_uid(
+        endpoint_id = mlrun.utils.model_monitoring.create_model_endpoint_id(
             function_uri=function_uri,
             versioned_model=versioned_model,
         )
@@ -684,6 +679,11 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         ):
             return None
+        # Get labels from event (if exist)
+        unpacked_labels = {
+            f"_{k}": v for k, v in event.get(EventFieldType.LABELS, {}).items()
+        }
         # Adjust timestamp format
         timestamp = datetime.datetime.strptime(timestamp[:-6], "%Y-%m-%d %H:%M:%S.%f")
@@ -722,6 +722,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                     EventFieldType.ENTITIES: event.get("request", {}).get(
                         EventFieldType.ENTITIES, {}
                     ),
+                    EventFieldType.UNPACKED_LABELS: unpacked_labels,
                 }
             )
@@ -750,8 +751,8 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                 f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
             )
-    @staticmethod
     def is_list_of_numerics(
+        self,
         field: typing.List[typing.Union[int, float, dict, list]],
         dict_path: typing.List[str],
     ):
@@ -766,11 +767,12 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
         # Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
         # left them
         if endpoint_id not in self.endpoints:
             logger.info("Trying to resume state", endpoint_id=endpoint_id)
             endpoint_record = get_endpoint_record(
-                project=self.project,
+                kv_container=self.kv_container,
+                kv_path=self.kv_path,
                 endpoint_id=endpoint_id,
+                access_key=self.v3io_access_key,
             )
             # If model endpoint found, get first_request, last_request and error_count values
@@ -788,7 +790,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
                 error_count = endpoint_record.get(EventFieldType.ERROR_COUNT)
                 if error_count:
-                    self.error_count[endpoint_id] = int(error_count)
+                    self.error_count[endpoint_id] = error_count
             # add endpoint to endpoints set
             self.endpoints.add(endpoint_id)
@@ -855,7 +857,9 @@ class FilterAndUnpackKeys(mlrun.feature_store.steps.MapClass):
 class MapFeatureNames(mlrun.feature_store.steps.MapClass):
     def __init__(
         self,
-        project: str,
+        kv_container: str,
+        kv_path: str,
+        access_key: str,
         infer_columns_from_data: bool = False,
         **kwargs,
     ):
@@ -863,7 +867,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         Validating feature names and label columns and map each feature to its value. In the end of this step,
         the event should have key-value pairs of (feature name: feature value).
-        :param project:                 Project name.
+        :param kv_container:            Name of the container that will be used to retrieve the endpoint id. For model
+                                        endpoints it is usually 'users'.
+        :param kv_path:                 KV table path that will be used to retrieve the endpoint id. For model endpoints
+                                        it is usually pipelines/project-name/model-endpoints/endpoints/
+        :param v3io_access_key:         Access key with permission to read from a KV table.
         :param infer_columns_from_data: If true and features or labels names were not found, then try to
                                         retrieve them from data that was stored in the previous events of
                                         the current process. This data can be found under self.feature_names and
@@ -874,9 +882,10 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                   feature names and values (as well as the prediction results).
         """
         super().__init__(**kwargs)
+        self.kv_container = kv_container
+        self.kv_path = kv_path
+        self.access_key = access_key
         self._infer_columns_from_data = infer_columns_from_data
-        self.project = project
         # Dictionaries that will be used in case features names
         # and labels columns were not found in the current event
@@ -905,8 +914,10 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
         # Get feature names and label columns
         if endpoint_id not in self.feature_names:
             endpoint_record = get_endpoint_record(
-                project=self.project,
+                kv_container=self.kv_container,
+                kv_path=self.kv_path,
                 endpoint_id=endpoint_id,
+                access_key=self.access_key,
             )
             feature_names = endpoint_record.get(EventFieldType.FEATURE_NAMES)
             feature_names = json.loads(feature_names) if feature_names else None
@@ -929,12 +940,15 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                 ]
                 # Update the endpoint record with the generated features
-                update_endpoint_record(
-                    project=self.project,
-                    endpoint_id=endpoint_id,
+                mlrun.utils.v3io_clients.get_v3io_client().kv.update(
+                    container=self.kv_container,
+                    table_path=self.kv_path,
+                    access_key=self.access_key,
+                    key=event[EventFieldType.ENDPOINT_ID],
                     attributes={
                         EventFieldType.FEATURE_NAMES: json.dumps(feature_names)
                     },
+                    raise_for_status=v3io.dataplane.RaiseForStatus.always,
                 )
             # Similar process with label columns
@@ -949,11 +963,15 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
                 label_columns = [
                     f"p{i}" for i, _ in enumerate(event[EventFieldType.PREDICTION])
                 ]
-                update_endpoint_record(
-                    project=self.project,
-                    endpoint_id=endpoint_id,
-                    attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
+                mlrun.utils.v3io_clients.get_v3io_client().kv.update(
+                    container=self.kv_container,
+                    table_path=self.kv_path,
+                    access_key=self.access_key,
+                    key=event[EventFieldType.ENDPOINT_ID],
+                    attributes={
+                        EventFieldType.LABEL_COLUMNS: json.dumps(label_columns)
+                    },
+                    raise_for_status=v3io.dataplane.RaiseForStatus.always,
                 )
             self.label_columns[endpoint_id] = label_columns
@@ -1015,24 +1033,33 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
             event[mapping_dictionary][name] = value
-class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
-    def __init__(self, project: str, model_endpoint_store_target: str, **kwargs):
+class WriteToKV(mlrun.feature_store.steps.MapClass):
+    def __init__(self, container: str, table: str, v3io_access_key: str, **kwargs):
         """
-        Update the model endpoint record in the DB. Note that the event at this point includes metadata and stats about
-        the average latency and the amount of predictions over time. This data will be used in the monitoring dashboards
+        Writes the event to KV table. Note that the event at this point includes metadata and stats about the
+        average latency and the amount of predictions over time. This data will be used in the monitoring dashboards
         such as "Model Monitoring - Performance" which can be found in Grafana.
+        :param kv_container:            Name of the container that will be used to retrieve the endpoint id. For model
+                                        endpoints it is usually 'users'.
+        :param table:                   KV table path that will be used to retrieve the endpoint id. For model endpoints
+                                        it is usually pipelines/project-name/model-endpoints/endpoints/.
+        :param v3io_access_key:         Access key with permission to read from a KV table.
         :returns: Event as a dictionary (without any changes) for the next step (InferSchema).
         """
         super().__init__(**kwargs)
-        self.project = project
-        self.model_endpoint_store_target = model_endpoint_store_target
+        self.container = container
+        self.table = table
+        self.v3io_access_key = v3io_access_key
     def do(self, event: typing.Dict):
-        update_endpoint_record(
-            project=self.project,
-            endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
+        mlrun.utils.v3io_clients.get_v3io_client().kv.update(
+            container=self.container,
+            table_path=self.table,
+            key=event[EventFieldType.ENDPOINT_ID],
             attributes=event,
+            access_key=self.v3io_access_key,
         )
         return event
@@ -1040,6 +1067,7 @@ class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
 class InferSchema(mlrun.feature_store.steps.MapClass):
     def __init__(
         self,
+        v3io_access_key: str,
         v3io_framesd: str,
         container: str,
         table: str,
@@ -1059,40 +1087,45 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
         """
         super().__init__(**kwargs)
         self.container = container
+        self.v3io_access_key = v3io_access_key
         self.v3io_framesd = v3io_framesd
         self.table = table
         self.keys = set()
     def do(self, event: typing.Dict):
         key_set = set(event.keys())
         if not key_set.issubset(self.keys):
             self.keys.update(key_set)
             # Apply infer_schema on the kv table for generating the schema file
             mlrun.utils.v3io_clients.get_frames_client(
+                token=self.v3io_access_key,
                 container=self.container,
                 address=self.v3io_framesd,
             ).execute(backend="kv", table=self.table, command="infer_schema")
         return event
-def update_endpoint_record(
-    project: str,
-    endpoint_id: str,
-    attributes: dict,
-):
-    model_endpoint_store = get_model_endpoint_store(
-        project=project,
+def get_endpoint_record(
+    kv_container: str, kv_path: str, endpoint_id: str, access_key: str
+) -> typing.Optional[dict]:
+    logger.info(
+        "Grabbing endpoint data",
+        container=kv_container,
+        table_path=kv_path,
+        key=endpoint_id,
     )
-    model_endpoint_store.update_model_endpoint(
-        endpoint_id=endpoint_id, attributes=attributes
-    )
-def get_endpoint_record(project: str, endpoint_id: str):
-    model_endpoint_store = get_model_endpoint_store(
-        project=project,
-    )
-    return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
+    try:
+        endpoint_record = (
+            mlrun.utils.v3io_clients.get_v3io_client()
+            .kv.get(
+                container=kv_container,
+                table_path=kv_path,
+                key=endpoint_id,
+                access_key=access_key,
+                raise_for_status=v3io.dataplane.RaiseForStatus.always,
+            )
+            .output.item
+        )
+        return endpoint_record
+    except Exception:
+        return None

mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl

Potentially problematic release.

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl