PyPI - mlrun - Versions diffs - 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl - Mend

mlrun 1.10.0rc13py3-none-any.whl → 1.10.0rc42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (107) hide show

mlrun/__init__.py +22 -2
mlrun/artifacts/base.py +0 -31
mlrun/artifacts/document.py +6 -1
mlrun/artifacts/llm_prompt.py +123 -25
mlrun/artifacts/manager.py +0 -5
mlrun/artifacts/model.py +3 -3
mlrun/common/constants.py +10 -1
mlrun/common/formatters/artifact.py +1 -0
mlrun/common/model_monitoring/helpers.py +86 -0
mlrun/common/schemas/__init__.py +3 -0
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/function.py +10 -0
mlrun/common/schemas/hub.py +30 -18
mlrun/common/schemas/model_monitoring/__init__.py +3 -0
mlrun/common/schemas/model_monitoring/constants.py +30 -6
mlrun/common/schemas/model_monitoring/functions.py +14 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -0
mlrun/common/schemas/pipeline.py +1 -1
mlrun/common/schemas/serving.py +3 -0
mlrun/common/schemas/workflow.py +3 -1
mlrun/common/secrets.py +22 -1
mlrun/config.py +33 -11
mlrun/datastore/__init__.py +11 -3
mlrun/datastore/azure_blob.py +162 -47
mlrun/datastore/datastore.py +9 -4
mlrun/datastore/datastore_profile.py +61 -5
mlrun/datastore/model_provider/huggingface_provider.py +363 -0
mlrun/datastore/model_provider/mock_model_provider.py +87 -0
mlrun/datastore/model_provider/model_provider.py +230 -65
mlrun/datastore/model_provider/openai_provider.py +295 -42
mlrun/datastore/s3.py +24 -2
mlrun/datastore/storeytargets.py +2 -3
mlrun/datastore/utils.py +15 -3
mlrun/db/base.py +47 -19
mlrun/db/httpdb.py +120 -56
mlrun/db/nopdb.py +38 -10
mlrun/execution.py +70 -19
mlrun/hub/__init__.py +15 -0
mlrun/hub/module.py +181 -0
mlrun/k8s_utils.py +105 -16
mlrun/launcher/base.py +13 -6
mlrun/launcher/local.py +15 -0
mlrun/model.py +24 -3
mlrun/model_monitoring/__init__.py +1 -0
mlrun/model_monitoring/api.py +66 -27
mlrun/model_monitoring/applications/__init__.py +1 -1
mlrun/model_monitoring/applications/base.py +509 -117
mlrun/model_monitoring/applications/context.py +2 -4
mlrun/model_monitoring/applications/results.py +4 -7
mlrun/model_monitoring/controller.py +239 -101
mlrun/model_monitoring/db/_schedules.py +116 -33
mlrun/model_monitoring/db/_stats.py +4 -3
mlrun/model_monitoring/db/tsdb/base.py +100 -9
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +11 -6
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +191 -50
mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +51 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +17 -4
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +259 -40
mlrun/model_monitoring/helpers.py +54 -9
mlrun/model_monitoring/stream_processing.py +45 -14
mlrun/model_monitoring/writer.py +220 -1
mlrun/platforms/__init__.py +3 -2
mlrun/platforms/iguazio.py +7 -3
mlrun/projects/operations.py +6 -1
mlrun/projects/pipelines.py +46 -26
mlrun/projects/project.py +166 -58
mlrun/run.py +94 -17
mlrun/runtimes/__init__.py +18 -0
mlrun/runtimes/base.py +14 -6
mlrun/runtimes/daskjob.py +7 -0
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mounts.py +20 -2
mlrun/runtimes/mpijob/abstract.py +6 -0
mlrun/runtimes/mpijob/v1.py +6 -0
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/application/application.py +149 -17
mlrun/runtimes/nuclio/function.py +76 -27
mlrun/runtimes/nuclio/serving.py +97 -15
mlrun/runtimes/pod.py +234 -21
mlrun/runtimes/remotesparkjob.py +6 -0
mlrun/runtimes/sparkjob/spark3job.py +6 -0
mlrun/runtimes/utils.py +49 -11
mlrun/secrets.py +54 -13
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +79 -6
mlrun/serving/routers.py +23 -41
mlrun/serving/server.py +320 -80
mlrun/serving/states.py +725 -157
mlrun/serving/steps.py +62 -0
mlrun/serving/system_steps.py +200 -119
mlrun/serving/v2_serving.py +9 -10
mlrun/utils/helpers.py +288 -88
mlrun/utils/logger.py +3 -1
mlrun/utils/notifications/notification/base.py +18 -0
mlrun/utils/notifications/notification/git.py +2 -4
mlrun/utils/notifications/notification/slack.py +2 -4
mlrun/utils/notifications/notification/webhook.py +2 -5
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/retryer.py +15 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/METADATA +45 -51
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/RECORD +106 -101
mlrun/api/schemas/__init__.py +0 -259
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc13.dist-info → mlrun-1.10.0rc42.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import threading
 from datetime import datetime, timedelta
-from typing import Callable, Final, Literal, Optional, Union
+from typing import Final, Literal, Optional, Union
 import pandas as pd
 import taosws
@@ -22,7 +22,7 @@ import taosws
 import mlrun.common.schemas.model_monitoring as mm_schemas
 import mlrun.common.types
 import mlrun.model_monitoring.db.tsdb.tdengine.schemas as tdengine_schemas
-import mlrun.model_monitoring.db.tsdb.tdengine.stream_graph_steps
+from mlrun.config import config
 from mlrun.datastore.datastore_profile import DatastoreProfile
 from mlrun.model_monitoring.db import TSDBConnector
 from mlrun.model_monitoring.db.tsdb.tdengine.tdengine_connection import (
@@ -55,14 +55,12 @@ class TDEngineConnector(TSDBConnector):
     """
     type: str = mm_schemas.TSDBTarget.TDEngine
-    database = f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
     def __init__(
         self,
         project: str,
         profile: DatastoreProfile,
         timestamp_precision: TDEngineTimestampPrecision = TDEngineTimestampPrecision.MICROSECOND,
-        **kwargs,
     ):
         super().__init__(project=project)
@@ -72,6 +70,15 @@ class TDEngineConnector(TSDBConnector):
             timestamp_precision
         )
+        if not mlrun.mlconf.system_id:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "system_id is not set in mlrun.mlconf. "
+                "TDEngineConnector requires system_id to be configured for database name construction. "
+                "Please ensure MLRun configuration is properly loaded before creating TDEngineConnector."
+            )
+        self.database = (
+            f"{tdengine_schemas._MODEL_MONITORING_DATABASE}_{mlrun.mlconf.system_id}"
+        )
         self._init_super_tables()
     @property
@@ -205,7 +212,7 @@ class TDEngineConnector(TSDBConnector):
     @staticmethod
     def _generate_filter_query(
         filter_column: str, filter_values: Union[str, list[Union[str, int]]]
-    ) -> Optional[str]:
+    ) -> str:
         """
         Generate a filter query for TDEngine based on the provided column and values.
@@ -213,15 +220,14 @@ class TDEngineConnector(TSDBConnector):
         :param filter_values: A single value or a list of values to filter by.
         :return: A string representing the filter query.
-        :raise: MLRunInvalidArgumentError if the filter values are not of type string or list.
+        :raise: ``MLRunValueError`` if the filter values are not of type string or list.
         """
         if isinstance(filter_values, str):
             return f"{filter_column}='{filter_values}'"
         elif isinstance(filter_values, list):
             return f"{filter_column} IN ({', '.join(repr(v) for v in filter_values)}) "
         else:
-            raise mlrun.errors.MLRunInvalidArgumentError(
+            raise mlrun.errors.MLRunValueError(
                 f"Invalid filter values {filter_values}: must be a string or a list, "
                 f"got {type(filter_values).__name__}; filter values: {filter_values}"
             )
@@ -279,6 +285,65 @@ class TDEngineConnector(TSDBConnector):
             after="ProcessBeforeTDEngine",
         )
+    def add_pre_writer_steps(self, graph, after):
+        return graph.add_step(
+            "mlrun.model_monitoring.db.tsdb.tdengine.writer_graph_steps.ProcessBeforeTDEngine",
+            name="ProcessBeforeTDEngine",
+            after=after,
+        )
+    def apply_writer_steps(self, graph, after, **kwargs) -> None:
+        graph.add_step(
+            "mlrun.datastore.storeytargets.TDEngineStoreyTarget",
+            name="tsdb_metrics",
+            after=after,
+            url=f"ds://{self._tdengine_connection_profile.name}",
+            supertable=self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table,
+            table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
+            time_col=mm_schemas.WriterEvent.END_INFER_TIME,
+            database=self.database,
+            graph_shape="cylinder",
+            columns=[
+                mm_schemas.WriterEvent.START_INFER_TIME,
+                mm_schemas.MetricData.METRIC_VALUE,
+            ],
+            tag_cols=[
+                mm_schemas.WriterEvent.ENDPOINT_ID,
+                mm_schemas.WriterEvent.APPLICATION_NAME,
+                mm_schemas.MetricData.METRIC_NAME,
+            ],
+            max_events=config.model_endpoint_monitoring.writer_graph.max_events,
+            flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
+        )
+        graph.add_step(
+            "mlrun.datastore.storeytargets.TDEngineStoreyTarget",
+            name="tsdb_app_results",
+            after=after,
+            url=f"ds://{self._tdengine_connection_profile.name}",
+            supertable=self.tables[
+                mm_schemas.TDEngineSuperTables.APP_RESULTS
+            ].super_table,
+            table_col=mm_schemas.EventFieldType.TABLE_COLUMN,
+            time_col=mm_schemas.WriterEvent.END_INFER_TIME,
+            database=self.database,
+            graph_shape="cylinder",
+            columns=[
+                mm_schemas.WriterEvent.START_INFER_TIME,
+                mm_schemas.ResultData.RESULT_VALUE,
+                mm_schemas.ResultData.RESULT_STATUS,
+                mm_schemas.ResultData.RESULT_EXTRA_DATA,
+            ],
+            tag_cols=[
+                mm_schemas.WriterEvent.ENDPOINT_ID,
+                mm_schemas.WriterEvent.APPLICATION_NAME,
+                mm_schemas.ResultData.RESULT_NAME,
+                mm_schemas.ResultData.RESULT_KIND,
+            ],
+            max_events=config.model_endpoint_monitoring.writer_graph.max_events,
+            flush_after_seconds=config.model_endpoint_monitoring.writer_graph.flush_after_seconds,
+        )
     def handle_model_error(
         self,
         graph,
@@ -311,10 +376,7 @@ class TDEngineConnector(TSDBConnector):
             flush_after_seconds=tsdb_batching_timeout_secs,
         )
-    def delete_tsdb_records(
-        self,
-        endpoint_ids: list[str],
-    ):
+    def delete_tsdb_records(self, endpoint_ids: list[str]) -> None:
         """
         To delete subtables within TDEngine, we first query the subtables names with the provided endpoint_ids.
         Then, we drop each subtable.
@@ -332,9 +394,7 @@ class TDEngineConnector(TSDBConnector):
                 get_subtable_query = self.tables[table]._get_subtables_query_by_tag(
                     filter_tag="endpoint_id", filter_values=endpoint_ids
                 )
-                subtables_result = self.connection.run(
-                    query=get_subtable_query,
-                )
+                subtables_result = self.connection.run(query=get_subtable_query)
                 subtables.extend([subtable[0] for subtable in subtables_result.data])
         except Exception as e:
             logger.warning(
@@ -346,15 +406,13 @@ class TDEngineConnector(TSDBConnector):
             )
         # Prepare the drop statements
-        drop_statements = []
-        for subtable in subtables:
-            drop_statements.append(
-                self.tables[table].drop_subtable_query(subtable=subtable)
-            )
+        drop_statements = [
+            self.tables[table].drop_subtable_query(subtable=subtable)
+            for subtable in subtables
+        ]
         try:
-            self.connection.run(
-                statements=drop_statements,
-            )
+            logger.debug("Dropping subtables", drop_statements=drop_statements)
+            self.connection.run(statements=drop_statements)
         except Exception as e:
             logger.warning(
                 "Failed to delete model endpoint resources. You may need to delete them manually. "
@@ -369,6 +427,48 @@ class TDEngineConnector(TSDBConnector):
             number_of_endpoints_to_delete=len(endpoint_ids),
         )
+    def delete_application_records(
+        self, application_name: str, endpoint_ids: Optional[list[str]] = None
+    ) -> None:
+        """
+        Delete application records from the TSDB for the given model endpoints or all if ``endpoint_ids`` is ``None``.
+        """
+        logger.debug(
+            "Deleting application records",
+            project=self.project,
+            application_name=application_name,
+            endpoint_ids=endpoint_ids,
+        )
+        tables = [
+            self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS],
+            self.tables[mm_schemas.TDEngineSuperTables.METRICS],
+        ]
+        filter_query = self._generate_filter_query(
+            filter_column=mm_schemas.ApplicationEvent.APPLICATION_NAME,
+            filter_values=application_name,
+        )
+        if endpoint_ids:
+            endpoint_ids_filter = self._generate_filter_query(
+                filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
+                filter_values=endpoint_ids,
+            )
+            filter_query += f" AND {endpoint_ids_filter}"
+        drop_statements: list[str] = []
+        for table in tables:
+            get_subtable_query = table._get_tables_query_by_condition(filter_query)
+            subtables_result = self.connection.run(query=get_subtable_query)
+            drop_statements.extend(
+                [
+                    table.drop_subtable_query(subtable=subtable[0])
+                    for subtable in subtables_result.data
+                ]
+            )
+        logger.debug("Dropping application records", drop_statements=drop_statements)
+        self.connection.run(statements=drop_statements)
     def delete_tsdb_resources(self):
         """
         Delete all project resources in the TSDB connector, such as model endpoints data and drift results.
@@ -469,6 +569,7 @@ class TDEngineConnector(TSDBConnector):
         preform_agg_columns: Optional[list] = None,
         order_by: Optional[str] = None,
         desc: Optional[bool] = None,
+        partition_by: Optional[str] = None,
     ) -> pd.DataFrame:
         """
         Getting records from TSDB data collection.
@@ -496,6 +597,8 @@ class TDEngineConnector(TSDBConnector):
                                       if an empty list was provided The aggregation won't be performed.
         :param order_by:              The column or alias to preform ordering on the query.
         :param desc:                  Whether or not to sort the results in descending order.
+        :param partition_by:          The column to partition the results by. Note that if interval is provided,
+                                      `agg_funcs` must bg provided as well.
         :return: DataFrame with the provided attributes from the data collection.
         :raise:  MLRunInvalidArgumentError if query the provided table failed.
@@ -517,6 +620,7 @@ class TDEngineConnector(TSDBConnector):
             preform_agg_funcs_columns=preform_agg_columns,
             order_by=order_by,
             desc=desc,
+            partition_by=partition_by,
         )
         logger.debug("Querying TDEngine", query=full_query)
         try:
@@ -684,7 +788,9 @@ class TDEngineConnector(TSDBConnector):
         endpoint_ids: Union[str, list[str]],
         start: Optional[datetime] = None,
         end: Optional[datetime] = None,
-    ) -> pd.DataFrame:
+    ) -> Union[pd.DataFrame, dict[str, float]]:
+        if not endpoint_ids:
+            return {}
         filter_query = self._generate_filter_query(
             filter_column=mm_schemas.EventFieldType.ENDPOINT_ID,
             filter_values=endpoint_ids,
@@ -819,7 +925,7 @@ class TDEngineConnector(TSDBConnector):
         # Convert DataFrame to a dictionary
         return {
             (
-                row[mm_schemas.WriterEvent.APPLICATION_NAME],
+                row[mm_schemas.WriterEvent.APPLICATION_NAME].lower(),
                 row[mm_schemas.ResultData.RESULT_STATUS],
             ): row["count(result_value)"]
             for _, row in df.iterrows()
@@ -904,26 +1010,34 @@ class TDEngineConnector(TSDBConnector):
                 mm_schemas.WriterEvent.END_INFER_TIME,
                 mm_schemas.WriterEvent.APPLICATION_NAME,
             ]
+            agg_columns = [mm_schemas.WriterEvent.END_INFER_TIME]
+            group_by_columns = [mm_schemas.WriterEvent.APPLICATION_NAME]
             if record_type == "results":
                 table = self.tables[
                     mm_schemas.TDEngineSuperTables.APP_RESULTS
                 ].super_table
                 columns += [
                     mm_schemas.ResultData.RESULT_NAME,
+                    mm_schemas.ResultData.RESULT_KIND,
+                    mm_schemas.ResultData.RESULT_STATUS,
+                    mm_schemas.ResultData.RESULT_VALUE,
+                ]
+                agg_columns += [
                     mm_schemas.ResultData.RESULT_VALUE,
                     mm_schemas.ResultData.RESULT_STATUS,
                     mm_schemas.ResultData.RESULT_KIND,
                 ]
-                agg_column = mm_schemas.ResultData.RESULT_VALUE
+                group_by_columns += [mm_schemas.ResultData.RESULT_NAME]
             else:
                 table = self.tables[mm_schemas.TDEngineSuperTables.METRICS].super_table
                 columns += [
                     mm_schemas.MetricData.METRIC_NAME,
                     mm_schemas.MetricData.METRIC_VALUE,
                 ]
-                agg_column = mm_schemas.MetricData.METRIC_VALUE
+                agg_columns += [mm_schemas.MetricData.METRIC_VALUE]
+                group_by_columns += [mm_schemas.MetricData.METRIC_NAME]
-            return self._get_records(
+            df = self._get_records(
                 table=table,
                 start=start,
                 end=end,
@@ -931,10 +1045,17 @@ class TDEngineConnector(TSDBConnector):
                 filter_query=filter_query,
                 timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
                 # Aggregate per application/metric pair regardless of timestamp
-                group_by=columns[1:],
-                preform_agg_columns=[agg_column],
+                group_by=group_by_columns,
+                preform_agg_columns=agg_columns,
                 agg_funcs=["last"],
             )
+            if not df.empty:
+                for column in agg_columns:
+                    df.rename(
+                        columns={f"last({column})": column},
+                        inplace=True,
+                    )
+            return df
         df_results = get_latest_metrics_records(record_type="results")
         df_metrics = get_latest_metrics_records(record_type="metrics")
@@ -951,19 +1072,14 @@ class TDEngineConnector(TSDBConnector):
             ]
         ):
             metric_objects = []
             if not df_results.empty:
-                df_results.rename(
-                    columns={
-                        f"last({mm_schemas.ResultData.RESULT_VALUE})": mm_schemas.ResultData.RESULT_VALUE,
-                    },
-                    inplace=True,
-                )
                 for _, row in df_results.iterrows():
                     metric_objects.append(
                         mm_schemas.ApplicationResultRecord(
                             time=datetime.fromisoformat(
-                                row[mm_schemas.WriterEvent.END_INFER_TIME]
+                                row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
+                                    " +", "+"
+                                )
                             ),
                             result_name=row[mm_schemas.ResultData.RESULT_NAME],
                             kind=row[mm_schemas.ResultData.RESULT_KIND],
@@ -973,17 +1089,13 @@ class TDEngineConnector(TSDBConnector):
                     )
             if not df_metrics.empty:
-                df_metrics.rename(
-                    columns={
-                        f"last({mm_schemas.MetricData.METRIC_VALUE})": mm_schemas.MetricData.METRIC_VALUE,
-                    },
-                    inplace=True,
-                )
                 for _, row in df_metrics.iterrows():
                     metric_objects.append(
                         mm_schemas.ApplicationMetricRecord(
                             time=datetime.fromisoformat(
-                                row[mm_schemas.WriterEvent.END_INFER_TIME]
+                                row[mm_schemas.WriterEvent.END_INFER_TIME].replace(
+                                    " +", "+"
+                                )
                             ),
                             metric_name=row[mm_schemas.MetricData.METRIC_NAME],
                             value=row[mm_schemas.MetricData.METRIC_VALUE],
@@ -1142,11 +1254,9 @@ class TDEngineConnector(TSDBConnector):
             df.dropna(inplace=True)
         return df
-    async def add_basic_metrics(
+    def add_basic_metrics(
         self,
         model_endpoint_objects: list[mlrun.common.schemas.ModelEndpoint],
-        project: str,
-        run_in_threadpool: Callable,
         metric_list: Optional[list[str]] = None,
     ) -> list[mlrun.common.schemas.ModelEndpoint]:
         """
@@ -1154,8 +1264,6 @@ class TDEngineConnector(TSDBConnector):
         :param model_endpoint_objects: A list of `ModelEndpoint` objects that will
                                         be filled with the relevant basic metrics.
-        :param project:                The name of the project.
-        :param run_in_threadpool:      A function that runs another function in a thread pool.
         :param metric_list:            List of metrics to include from the time series DB. Defaults to all metrics.
         :return: A list of `ModelEndpointMonitoringMetric` objects.
@@ -1205,6 +1313,39 @@ class TDEngineConnector(TSDBConnector):
             )
         )
+    def get_drift_data(
+        self,
+        start: datetime,
+        end: datetime,
+    ) -> mm_schemas.ModelEndpointDriftValues:
+        filter_query = self._generate_filter_query(
+            filter_column=mm_schemas.ResultData.RESULT_STATUS,
+            filter_values=[
+                mm_schemas.ResultStatusApp.potential_detection.value,
+                mm_schemas.ResultStatusApp.detected.value,
+            ],
+        )
+        table = self.tables[mm_schemas.TDEngineSuperTables.APP_RESULTS].super_table
+        start, end, interval = self._prepare_aligned_start_end(start, end)
+        # get per time-interval x endpoint_id combination the max result status
+        df = self._get_records(
+            table=table,
+            start=start,
+            end=end,
+            interval=interval,
+            columns=[mm_schemas.ResultData.RESULT_STATUS],
+            filter_query=filter_query,
+            timestamp_column=mm_schemas.WriterEvent.END_INFER_TIME,
+            agg_funcs=["max"],
+            partition_by=mm_schemas.WriterEvent.ENDPOINT_ID,
+        )
+        if df.empty:
+            return mm_schemas.ModelEndpointDriftValues(values=[])
+        df["_wstart"] = pd.to_datetime(df["_wstart"])
+        return self._df_to_drift_data(df)
     # Note: this function serves as a reference for checking the TSDB for the existence of a metric.
     #
     # def read_prediction_metric_for_endpoint_if_exists(

mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py ADDED Viewed

@@ -0,0 +1,51 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from datetime import datetime
+import mlrun.common.schemas.model_monitoring as mm_schemas
+import mlrun.feature_store.steps
+from mlrun.utils import logger
+class ProcessBeforeTDEngine(mlrun.feature_store.steps.MapClass):
+    def __init__(self, **kwargs):
+        """
+        Process the data before writing to TDEngine. This step create the table name.
+        :returns: Event as a dictionary which will be written into the TDEngine Metrics/Results tables.
+        """
+        super().__init__(**kwargs)
+    def do(self, event):
+        logger.info("Process event before writing to TDEngine", event=event)
+        kind = event.get("kind")
+        table_name = (
+            f"{event[mm_schemas.WriterEvent.ENDPOINT_ID]}_"
+            f"{event[mm_schemas.WriterEvent.APPLICATION_NAME]}"
+        )
+        if kind == mm_schemas.WriterEventKind.RESULT:
+            # Write a new result
+            event[mm_schemas.EventFieldType.TABLE_COLUMN] = (
+                f"{table_name}_{event[mm_schemas.ResultData.RESULT_NAME]}"
+            ).replace("-", "_")
+        elif kind == mm_schemas.WriterEventKind.METRIC:
+            # Write a new metric
+            event[mm_schemas.EventFieldType.TABLE_COLUMN] = (
+                f"{table_name}_{event[mm_schemas.MetricData.METRIC_NAME]}"
+            ).replace("-", "_")
+        event[mm_schemas.WriterEvent.START_INFER_TIME] = datetime.fromisoformat(
+            event[mm_schemas.WriterEvent.START_INFER_TIME]
+        )
+        return event

mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py CHANGED Viewed

@@ -25,10 +25,12 @@ from mlrun.utils import logger
 def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
     """
-    Normalize user defined keys - input data to a model and its predictions,
-    to a form V3IO frames tolerates.
+    Normalize user-defined keys (e.g., model input data and predictions) to a format V3IO Frames tolerates.
-    The dictionary keys should conform to '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'.
+    - Keys must match regex: '^[a-zA-Z_:]([a-zA-Z0-9_:])*$'
+    - Replace invalid characters (e.g., '-') with '_'.
+    - Prefix keys starting with digits with '_'.
+    - Flatten nested dictionaries using dot notation, while normalizing keys recursively.
     """
     prefix = "_"
@@ -38,7 +40,18 @@ def _normalize_dict_for_v3io_frames(event: dict[str, Any]) -> dict[str, Any]:
             return prefix + key
         return key
-    return {norm_key(k): v for k, v in event.items()}
+    def flatten_dict(d: dict[str, Any], parent_key: str = "") -> dict[str, Any]:
+        items = {}
+        for k, v in d.items():
+            new_key = norm_key(k)
+            full_key = f"{parent_key}.{new_key}" if parent_key else new_key
+            if isinstance(v, dict):
+                items.update(flatten_dict(v, full_key))
+            else:
+                items[full_key] = v
+        return items
+    return flatten_dict(event)
 class ProcessBeforeTSDB(mlrun.feature_store.steps.MapClass):

mlrun 1.10.0rc13__py3-none-any.whl → 1.10.0rc42__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc13py3-none-any.whl → 1.10.0rc42py3-none-any.whl