PyPI - mlrun - Versions diffs - 1.5.0rc11__py3-none-any.whl → 1.5.0rc13__py3-none-any.whl - Mend

mlrun 1.5.0rc11py3-none-any.whl → 1.5.0rc13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (49) hide show

mlrun/__main__.py +31 -2
mlrun/api/api/endpoints/functions.py +110 -52
mlrun/api/api/endpoints/model_endpoints.py +0 -56
mlrun/api/crud/model_monitoring/deployment.py +208 -38
mlrun/api/crud/model_monitoring/helpers.py +19 -6
mlrun/api/crud/model_monitoring/model_endpoints.py +14 -31
mlrun/api/db/sqldb/db.py +3 -1
mlrun/api/utils/builder.py +2 -4
mlrun/common/model_monitoring/helpers.py +19 -5
mlrun/common/schemas/model_monitoring/constants.py +69 -0
mlrun/common/schemas/model_monitoring/model_endpoints.py +22 -1
mlrun/config.py +30 -12
mlrun/datastore/__init__.py +1 -0
mlrun/datastore/datastore_profile.py +2 -2
mlrun/datastore/sources.py +4 -30
mlrun/datastore/targets.py +106 -55
mlrun/db/httpdb.py +20 -6
mlrun/feature_store/__init__.py +2 -0
mlrun/feature_store/api.py +3 -31
mlrun/feature_store/feature_vector.py +1 -1
mlrun/feature_store/retrieval/base.py +8 -3
mlrun/launcher/remote.py +3 -3
mlrun/lists.py +11 -0
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +1 -1
mlrun/model_monitoring/application.py +313 -0
mlrun/model_monitoring/batch_application.py +526 -0
mlrun/model_monitoring/batch_application_handler.py +32 -0
mlrun/model_monitoring/evidently_application.py +89 -0
mlrun/model_monitoring/helpers.py +39 -3
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +38 -7
mlrun/model_monitoring/tracking_policy.py +4 -4
mlrun/model_monitoring/writer.py +37 -0
mlrun/projects/pipelines.py +38 -4
mlrun/projects/project.py +257 -43
mlrun/run.py +5 -2
mlrun/runtimes/__init__.py +2 -0
mlrun/runtimes/function.py +2 -1
mlrun/utils/helpers.py +12 -0
mlrun/utils/http.py +3 -0
mlrun/utils/notifications/notification_pusher.py +22 -8
mlrun/utils/version/version.json +2 -2
{mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/METADATA +5 -5
{mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/RECORD +49 -44
/mlrun/model_monitoring/{model_monitoring_batch.py → batch.py} +0 -0
{mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/LICENSE +0 -0
{mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/WHEEL +0 -0
{mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/entry_points.txt +0 -0
{mlrun-1.5.0rc11.dist-info → mlrun-1.5.0rc13.dist-info}/top_level.txt +0 -0

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -77,6 +77,30 @@ class EventFieldType:
     DRIFT_DETECTED_THRESHOLD = "drift_detected_threshold"
     POSSIBLE_DRIFT_THRESHOLD = "possible_drift_threshold"
+    SAMPLE_PARQUET_PATH = "sample_parquet_path"
+class ApplicationEvent:
+    APPLICATION_NAME = "application_name"
+    CURRENT_STATS = "current_stats"
+    FEATURE_STATS = "feature_stats"
+    SAMPLE_PARQUET_PATH = "sample_parquet_path"
+    SCHEDULE_TIME = "schedule_time"
+    LAST_REQUEST = "last_request"
+    ENDPOINT_ID = "endpoint_id"
+    OUTPUT_STREAM_URI = "output_stream_uri"
+class WriterEvent:
+    APPLICATION_NAME = "application_name"
+    ENDPOINT_ID = "endpoint_id"
+    SCHEDULE_TIME = "schedule_time"
+    RESULT_NAME = "result_name"
+    RESULT_VALUE = "result_value"
+    RESULT_KIND = "result_kind"
+    RESULT_STATUS = "result_status"
+    RESULT_EXTRA_DATA = "result_extra_data"
 class EventLiveStats:
     LATENCY_AVG_5M = "latency_avg_5m"
@@ -106,6 +130,7 @@ class ModelEndpointTarget:
 class ProjectSecretKeys:
     ENDPOINT_STORE_CONNECTION = "MODEL_MONITORING_ENDPOINT_STORE_CONNECTION"
     ACCESS_KEY = "MODEL_MONITORING_ACCESS_KEY"
+    PIPELINES_ACCESS_KEY = "MODEL_MONITORING_PIPELINES_ACCESS_KEY"
     KAFKA_BOOTSTRAP_SERVERS = "KAFKA_BOOTSTRAP_SERVERS"
     STREAM_PATH = "STREAM_PATH"
@@ -120,6 +145,7 @@ class FileTargetKind:
     EVENTS = "events"
     STREAM = "stream"
     PARQUET = "parquet"
+    BATCH_CONTROLLER_PARQUET = "batch_controller_parquet"
     LOG_STREAM = "log_stream"
@@ -143,6 +169,22 @@ class PrometheusMetric:
     DRIFT_STATUS = "drift_status"
+class MonitoringFunctionNames:
+    WRITER = "model-monitoring-writer"
+    BATCH = "model-monitoring-batch"
+    BATCH_APPLICATION = "model-monitoring-batch-application"
+    STREAM = None
+    @staticmethod
+    def all():
+        return [
+            MonitoringFunctionNames.WRITER,
+            MonitoringFunctionNames.STREAM,
+            MonitoringFunctionNames.BATCH,
+            MonitoringFunctionNames.BATCH_APPLICATION,
+        ]
 @dataclass
 class FunctionURI:
     project: str
@@ -208,3 +250,30 @@ class DriftStatus(Enum):
     NO_DRIFT = "NO_DRIFT"
     DRIFT_DETECTED = "DRIFT_DETECTED"
     POSSIBLE_DRIFT = "POSSIBLE_DRIFT"
+class ResultKindApp(enum.Enum):
+    """
+    Enum for the result kind values
+    """
+    data_drift = 0
+    concept_drift = 1
+    model_performance = 2
+    system_performance = 3
+class ResultStatusApp(enum.Enum):
+    """
+    Enum for the result status values, detected means that the app detected some problem.
+    """
+    irrelevant = -1
+    no_detection = 0
+    potential_detection = 1
+    detected = 2
+class ModelMonitoringAppTag:
+    KEY = "type"
+    VAL = "model-monitoring-application"

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -18,7 +18,7 @@ import json
 import typing
 from typing import Any, Dict, List, Optional
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, validator
 from pydantic.main import Extra
 import mlrun.common.model_monitoring
@@ -100,6 +100,27 @@ class ModelEndpointSpec(ObjectSpec):
             json_parse_values=json_parse_values,
         )
+    @validator("monitor_configuration")
+    def set_name(cls, monitor_configuration):
+        return monitor_configuration or {
+            EventFieldType.DRIFT_DETECTED_THRESHOLD: (
+                mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.drift_detected
+            ),
+            EventFieldType.POSSIBLE_DRIFT_THRESHOLD: (
+                mlrun.mlconf.model_endpoint_monitoring.drift_thresholds.default.possible_drift
+            ),
+        }
+    @validator("model_uri")
+    def validate_model_uri(cls, model_uri):
+        """Validate that the model uri includes the required prefix"""
+        prefix, uri = mlrun.datastore.parse_store_uri(model_uri)
+        if prefix and prefix != mlrun.utils.helpers.StorePrefix.Model:
+            return mlrun.datastore.get_store_uri(
+                mlrun.utils.helpers.StorePrefix.Model, uri
+            )
+        return model_uri
 class Histogram(BaseModel):
     buckets: List[float]

mlrun/config.py CHANGED Viewed

@@ -403,6 +403,7 @@ default_config = {
     },
     "model_endpoint_monitoring": {
         "serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
+        "application_stream_args": {"shard_count": 3, "retention_period_hours": 24},
         "drift_thresholds": {"default": {"possible_drift": 0.5, "drift_detected": 0.7}},
         # Store prefixes are used to handle model monitoring storing policies based on project and kind, such as events,
         # stream, and endpoints.
@@ -417,6 +418,7 @@ default_config = {
         # Default http path that points to the monitoring stream nuclio function. Will be used as a stream path
         # when the user is working in CE environment and has not provided any stream path.
         "default_http_sink": "http://nuclio-{project}-model-monitoring-stream.mlrun.svc.cluster.local:8080",
+        "default_http_sink_app": "http://nuclio-{project}-{application_name}.mlrun.svc.cluster.local:8080",
         "batch_processing_function_branch": "master",
         "parquet_batching_max_events": 10000,
         "parquet_batching_timeout_secs": timedelta(minutes=30).total_seconds(),
@@ -981,20 +983,22 @@ class Config:
         kind: str = "",
         target: str = "online",
         artifact_path: str = None,
+        application_name: str = None,
     ) -> str:
         """Get the full path from the configuration based on the provided project and kind.
-        :param project:        Project name.
-        :param kind:           Kind of target path (e.g. events, log_stream, endpoints, etc.)
-        :param target:         Can be either online or offline. If the target is online, then we try to get a specific
-                               path for the provided kind. If it doesn't exist, use the default path.
-                               If the target path is offline and the offline path is already a full path in the
-                               configuration, then the result will be that path as-is. If the offline path is a
-                               relative path, then the result will be based on the project artifact path and the offline
-                               relative path. If project artifact path wasn't provided, then we use MLRun artifact
-                               path instead.
-        :param artifact_path:  Optional artifact path that will be used as a relative path. If not provided, the
-                               relative artifact path will be taken from the global MLRun artifact path.
+        :param project:         Project name.
+        :param kind:            Kind of target path (e.g. events, log_stream, endpoints, etc.)
+        :param target:          Can be either online or offline. If the target is online, then we try to get a specific
+                                path for the provided kind. If it doesn't exist, use the default path.
+                                If the target path is offline and the offline path is already a full path in the
+                                configuration, then the result will be that path as-is. If the offline path is a
+                                relative path, then the result will be based on the project artifact path and the
+                                offline relative path. If project artifact path wasn't provided, then we use MLRun
+                                artifact path instead.
+        :param artifact_path:   Optional artifact path that will be used as a relative path. If not provided, the
+                                relative artifact path will be taken from the global MLRun artifact path.
+        :param application_name:Application name, None for model_monitoring_stream.
         :return: Full configured path for the provided kind.
         """
@@ -1006,8 +1010,22 @@ class Config:
             if store_prefix_dict.get(kind):
                 # Target exist in store prefix and has a valid string value
                 return store_prefix_dict[kind].format(project=project)
+            if (
+                application_name
+                != mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.STREAM
+            ):
+                return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
+                    project=project,
+                    kind=kind
+                    if application_name is None
+                    else f"{kind}-{application_name.lower()}",
+                )
             return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.default.format(
-                project=project, kind=kind
+                project=project,
+                kind=kind
+                if application_name is None
+                else f"{kind}-{application_name.lower()}",
             )
         # Get the current offline path from the configuration

mlrun/datastore/__init__.py CHANGED Viewed

@@ -31,6 +31,7 @@ __all__ = [
     "RedisStore",
     "DatabricksFileSystemDisableCache",
     "DatabricksFileBugFixed",
+    "get_stream_pusher",
 ]
 import fsspec

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -37,9 +37,9 @@ class DatastoreProfile(pydantic.BaseModel):
     @staticmethod
     def generate_secret_key(profile_name: str, project: str):
-        secret_name_separator = "-__-"
+        secret_name_separator = "."
         full_key = (
-            "mlrun.datastore-profiles"
+            "datastore-profiles"
             + secret_name_separator
             + project
             + secret_name_separator

mlrun/datastore/sources.py CHANGED Viewed

@@ -138,7 +138,6 @@ class CSVSource(BaseSourceDriver):
     :parameter path: path to CSV file
     :parameter key_field: the CSV field to be used as the key for events. May be an int (field index) or string
         (field name) if with_header is True. Defaults to None (no key). Can be a list of keys.
-    :parameter time_field: DEPRECATED. Use parse_dates to parse timestamps.
     :parameter schedule: string to configure scheduling of the ingestion job.
     :parameter attributes: additional parameters to pass to storey. For example:
         attributes={"timestamp_format": '%Y%m%d%H'}
@@ -156,29 +155,13 @@ class CSVSource(BaseSourceDriver):
         path: str = None,
         attributes: Dict[str, str] = None,
         key_field: str = None,
-        time_field: str = None,
         schedule: str = None,
         parse_dates: Union[None, int, str, List[int], List[str]] = None,
         **kwargs,
     ):
-        super().__init__(
-            name, path, attributes, key_field, time_field, schedule, **kwargs
-        )
-        if time_field is not None:
-            warnings.warn(
-                "CSVSource's time_field parameter is deprecated in 1.3.0 and will be removed in 1.5.0. "
-                "Use parse_dates instead.",
-                # TODO: remove in 1.5.0
-                FutureWarning,
-            )
-            if isinstance(parse_dates, (int, str)):
-                parse_dates = [parse_dates]
-            if parse_dates is None:
-                parse_dates = [time_field]
-            elif time_field not in parse_dates:
-                parse_dates = copy(parse_dates)
-                parse_dates.append(time_field)
+        super().__init__(name, path, attributes, key_field, schedule=schedule, **kwargs)
+        if parse_dates and not isinstance(parse_dates, list):
+            parse_dates = [parse_dates]
         self._parse_dates = parse_dates
     def to_step(self, key_field=None, time_field=None, context=None):
@@ -724,16 +707,7 @@ class DataFrameSource:
     support_storey = True
-    def __init__(
-        self, df, key_field=None, time_field=None, context=None, iterator=False
-    ):
-        if time_field:
-            warnings.warn(
-                "DataFrameSource's time_field parameter has no effect. "
-                "It is deprecated in 1.3.0 and will be removed in 1.5.0",
-                FutureWarning,
-            )
+    def __init__(self, df, key_field=None, context=None, iterator=False):
         self._df = df
         if isinstance(key_field, str):
             self.key_field = [key_field]

mlrun/datastore/targets.py CHANGED Viewed

@@ -484,6 +484,7 @@ class BaseStoreTarget(DataTargetBase):
         if hasattr(df, "rdd"):
             options = self.get_spark_options(key_column, timestamp_key)
             options.update(kwargs)
+            df = self.prepare_spark_df(df, key_column, timestamp_key, options)
             df.write.mode("overwrite").save(**options)
         elif hasattr(df, "dask"):
             dask_options = self.get_dask_options()
@@ -513,36 +514,41 @@ class BaseStoreTarget(DataTargetBase):
                 dir = os.path.dirname(target_path)
                 if dir:
                     os.makedirs(dir, exist_ok=True)
-            partition_cols = []
-            if target_path.endswith(".parquet") or target_path.endswith(".pq"):
-                partition_cols = None
             target_df = df
-            if timestamp_key and (
-                self.partitioned or self.time_partitioning_granularity
-            ):
-                target_df = df.copy(deep=False)
-                time_partitioning_granularity = self.time_partitioning_granularity
-                if not time_partitioning_granularity and self.partitioned:
-                    time_partitioning_granularity = (
-                        mlrun.utils.helpers.DEFAULT_TIME_PARTITIONING_GRANULARITY
-                    )
-                for unit, fmt in [
-                    ("year", "%Y"),
-                    ("month", "%m"),
-                    ("day", "%d"),
-                    ("hour", "%H"),
-                    ("minute", "%M"),
-                ]:
-                    partition_cols.append(unit)
-                    target_df[unit] = pd.DatetimeIndex(target_df[timestamp_key]).format(
-                        date_format=fmt
-                    )
-                    if unit == time_partitioning_granularity:
-                        break
+            partition_cols = None  # single parquet file
+            if not target_path.endswith(".parquet") and not target_path.endswith(
+                ".pq"
+            ):  # directory
+                partition_cols = []
+                if timestamp_key and (
+                    self.partitioned or self.time_partitioning_granularity
+                ):
+                    target_df = df.copy(deep=False)
+                    time_partitioning_granularity = self.time_partitioning_granularity
+                    if not time_partitioning_granularity and self.partitioned:
+                        time_partitioning_granularity = (
+                            mlrun.utils.helpers.DEFAULT_TIME_PARTITIONING_GRANULARITY
+                        )
+                    for unit, fmt in [
+                        ("year", "%Y"),
+                        ("month", "%m"),
+                        ("day", "%d"),
+                        ("hour", "%H"),
+                        ("minute", "%M"),
+                    ]:
+                        partition_cols.append(unit)
+                        target_df[unit] = pd.DatetimeIndex(
+                            target_df[timestamp_key]
+                        ).format(date_format=fmt)
+                        if unit == time_partitioning_granularity:
+                            break
+                # Partitioning will be performed on timestamp_key and then on self.partition_cols
+                # (We might want to give the user control on this order as additional functionality)
+                partition_cols += self.partition_cols or []
             storage_options = self._get_store().get_storage_options()
             self._write_dataframe(
                 target_df,
-                storage_options,
+                self.storage_options or storage_options,
                 target_path,
                 partition_cols=partition_cols,
                 **kwargs,
@@ -690,7 +696,7 @@ class BaseStoreTarget(DataTargetBase):
         # options used in spark.read.load(**options)
         raise NotImplementedError()
-    def prepare_spark_df(self, df, key_columns):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options={}):
         return df
     def get_dask_options(self):
@@ -924,6 +930,37 @@ class ParquetTarget(BaseStoreTarget):
             return self.path.endswith(".parquet") or self.path.endswith(".pq")
         return False
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
+        # If partitioning by time, add the necessary columns
+        if (
+            timestamp_key
+            and isinstance(spark_options, dict)
+            and "partitionBy" in spark_options
+        ):
+            from pyspark.sql.functions import (
+                dayofmonth,
+                hour,
+                minute,
+                month,
+                second,
+                year,
+            )
+            time_unit_to_op = {
+                "year": year,
+                "month": month,
+                "day": dayofmonth,
+                "hour": hour,
+                "minute": minute,
+                "second": second,
+            }
+            timestamp_col = df[timestamp_key]
+            for partition in spark_options["partitionBy"]:
+                if partition not in df.columns and partition in time_unit_to_op:
+                    op = time_unit_to_op[partition]
+                    df = df.withColumn(partition, op(timestamp_col))
+        return df
 class CSVTarget(BaseStoreTarget):
     kind = TargetTypes.csv
@@ -973,7 +1010,7 @@ class CSVTarget(BaseStoreTarget):
             "header": "true",
         }
-    def prepare_spark_df(self, df, key_columns):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         import pyspark.sql.functions as funcs
         for col_name, col_type in df.dtypes:
@@ -1067,7 +1104,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
             **self.attributes,
         )
-    def prepare_spark_df(self, df, key_columns):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         raise NotImplementedError()
     def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
@@ -1139,7 +1176,7 @@ class NoSqlTarget(NoSqlBaseTarget):
             spark_options["columnUpdate"] = True
         return spark_options
-    def prepare_spark_df(self, df, key_columns):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         from pyspark.sql.functions import col
         spark_udf_directory = os.path.dirname(os.path.abspath(__file__))
@@ -1232,7 +1269,7 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
         endpoint, uri = self._get_server_endpoint()
         return endpoint
-    def prepare_spark_df(self, df, key_columns):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         from pyspark.sql.functions import col
         spark_udf_directory = os.path.dirname(os.path.abspath(__file__))
@@ -1580,16 +1617,6 @@ class SQLTarget(BaseStoreTarget):
         :param parse_dates :    all the field to be parsed as timestamp.
         """
-        # Validate sqlalchemy (not installed by default):
-        try:
-            import sqlalchemy
-            self.sqlalchemy = sqlalchemy
-        except (ModuleNotFoundError, ImportError) as exc:
-            raise mlrun.errors.MLRunMissingDependencyError(
-                "Using 'SQLTarget' requires sqlalchemy package. Use pip install mlrun[sqlalchemy] to install it."
-            ) from exc
         create_according_to_data = False  # TODO: open for user
         if time_fields:
             warnings.warn(
@@ -1696,8 +1723,14 @@ class SQLTarget(BaseStoreTarget):
         time_column=None,
         **kwargs,
     ):
+        try:
+            import sqlalchemy
+        except (ModuleNotFoundError, ImportError) as exc:
+            self._raise_sqlalchemy_import_error(exc)
         db_path, table_name, _, _, _, _ = self._parse_url()
-        engine = self.sqlalchemy.create_engine(db_path)
+        engine = sqlalchemy.create_engine(db_path)
         parse_dates: Optional[List[str]] = self.attributes.get("parse_dates")
         with engine.connect() as conn:
             query, parse_dates = _generate_sql_query_with_time_filter(
@@ -1721,6 +1754,12 @@ class SQLTarget(BaseStoreTarget):
     def write_dataframe(
         self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
     ):
+        try:
+            import sqlalchemy
+        except (ModuleNotFoundError, ImportError) as exc:
+            self._raise_sqlalchemy_import_error(exc)
         self._create_sql_table()
         if hasattr(df, "rdd"):
@@ -1735,7 +1774,7 @@ class SQLTarget(BaseStoreTarget):
                 _,
             ) = self._parse_url()
             create_according_to_data = bool(create_according_to_data)
-            engine = self.sqlalchemy.create_engine(
+            engine = sqlalchemy.create_engine(
                 db_path,
             )
             connection = engine.connect()
@@ -1760,28 +1799,34 @@ class SQLTarget(BaseStoreTarget):
             primary_key,
             create_table,
         ) = self._parse_url()
+        try:
+            import sqlalchemy
+        except (ModuleNotFoundError, ImportError) as exc:
+            self._raise_sqlalchemy_import_error(exc)
         try:
             primary_key = ast.literal_eval(primary_key)
             primary_key_for_check = primary_key
         except Exception:
             primary_key_for_check = [primary_key]
-        engine = self.sqlalchemy.create_engine(db_path)
+        engine = sqlalchemy.create_engine(db_path)
         with engine.connect() as conn:
-            metadata = self.sqlalchemy.MetaData()
+            metadata = sqlalchemy.MetaData()
             table_exists = engine.dialect.has_table(conn, table_name)
             if not table_exists and not create_table:
                 raise ValueError(f"Table named {table_name} is not exist")
             elif not table_exists and create_table:
                 TYPE_TO_SQL_TYPE = {
-                    int: self.sqlalchemy.Integer,
-                    str: self.sqlalchemy.String(self.attributes.get("varchar_len")),
-                    datetime.datetime: self.sqlalchemy.dialects.mysql.DATETIME(fsp=6),
-                    pd.Timestamp: self.sqlalchemy.dialects.mysql.DATETIME(fsp=6),
-                    bool: self.sqlalchemy.Boolean,
-                    float: self.sqlalchemy.Float,
-                    datetime.timedelta: self.sqlalchemy.Interval,
-                    pd.Timedelta: self.sqlalchemy.Interval,
+                    int: sqlalchemy.Integer,
+                    str: sqlalchemy.String(self.attributes.get("varchar_len")),
+                    datetime.datetime: sqlalchemy.dialects.mysql.DATETIME(fsp=6),
+                    pd.Timestamp: sqlalchemy.dialects.mysql.DATETIME(fsp=6),
+                    bool: sqlalchemy.Boolean,
+                    float: sqlalchemy.Float,
+                    datetime.timedelta: sqlalchemy.Interval,
+                    pd.Timedelta: sqlalchemy.Interval,
                 }
                 # creat new table with the given name
                 columns = []
@@ -1790,12 +1835,12 @@ class SQLTarget(BaseStoreTarget):
                     if col_type is None:
                         raise TypeError(f"{col_type} unsupported type")
                     columns.append(
-                        self.sqlalchemy.Column(
+                        sqlalchemy.Column(
                             col, col_type, primary_key=(col in primary_key_for_check)
                         )
                     )
-                self.sqlalchemy.Table(table_name, metadata, *columns)
+                sqlalchemy.Table(table_name, metadata, *columns)
                 metadata.create_all(engine)
                 if_exists = "append"
                 self.path = (
@@ -1804,6 +1849,12 @@ class SQLTarget(BaseStoreTarget):
                 )
                 conn.close()
+    @staticmethod
+    def _raise_sqlalchemy_import_error(exc):
+        raise mlrun.errors.MLRunMissingDependencyError(
+            "Using 'SQLTarget' requires sqlalchemy package. Use pip install mlrun[sqlalchemy] to install it."
+        ) from exc
 kind_to_driver = {
     TargetTypes.parquet: ParquetTarget,

mlrun/db/httpdb.py CHANGED Viewed

@@ -18,6 +18,7 @@ import tempfile
 import time
 import traceback
 import typing
+import warnings
 from datetime import datetime, timedelta
 from os import path, remove
 from typing import Dict, List, Optional, Union
@@ -1411,6 +1412,8 @@ class HTTPRunDB(RunDBInterface):
         namespace=None,
         artifact_path=None,
         ops=None,
+        # TODO: deprecated, remove in 1.6.0
+        ttl=None,
         cleanup_ttl=None,
     ):
         """Submit a KFP pipeline for execution.
@@ -1423,9 +1426,18 @@ class HTTPRunDB(RunDBInterface):
         :param namespace: Kubernetes namespace to execute the pipeline in.
         :param artifact_path: A path to artifacts used by this pipeline.
         :param ops: Transformers to apply on all ops in the pipeline.
+        :param ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the workflow
+            and all its resources are deleted) (deprecated, use cleanup_ttl instead)
         :param cleanup_ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
                             workflow and all its resources are deleted)
         """
+        if ttl:
+            warnings.warn(
+                "'ttl' is deprecated, use 'cleanup_ttl' instead. "
+                "This will be removed in 1.6.0",
+                # TODO: Remove this in 1.6.0
+                FutureWarning,
+            )
         if isinstance(pipeline, str):
             pipe_file = pipeline
@@ -1433,7 +1445,7 @@ class HTTPRunDB(RunDBInterface):
             pipe_file = tempfile.NamedTemporaryFile(suffix=".yaml", delete=False).name
             conf = new_pipe_metadata(
                 artifact_path=artifact_path,
-                cleanup_ttl=cleanup_ttl,
+                cleanup_ttl=cleanup_ttl or ttl,
                 op_transformers=ops,
             )
             kfp.compiler.Compiler().compile(
@@ -1471,15 +1483,17 @@ class HTTPRunDB(RunDBInterface):
                 headers=headers,
             )
         except OSError as err:
-            logger.error(f"error cannot submit pipeline: {err_to_str(err)}")
-            raise OSError(f"error: cannot cannot submit pipeline, {err_to_str(err)}")
+            logger.error("Error: Cannot submit pipeline", err=err_to_str(err))
+            raise OSError(f"Error: Cannot submit pipeline, {err_to_str(err)}")
         if not resp.ok:
-            logger.error(f"bad resp!!\n{resp.text}")
-            raise ValueError(f"bad submit pipeline response, {resp.text}")
+            logger.error("Failed to submit pipeline", respones_text=resp.text)
+            raise ValueError(f"Failed to submit pipeline, {resp.text}")
         resp = resp.json()
-        logger.info(f"submitted pipeline {resp['name']} id={resp['id']}")
+        logger.info(
+            "Pipeline submitted successfully", pipeline_name=resp["name"], id=resp["id"]
+        )
         return resp["id"]
     def list_pipelines(

mlrun/feature_store/__init__.py CHANGED Viewed

@@ -20,6 +20,7 @@ __all__ = [
     "ingest",
     "preview",
     "deploy_ingestion_service",
+    "deploy_ingestion_service_v2",
     "delete_feature_set",
     "delete_feature_vector",
     "get_feature_set",
@@ -41,6 +42,7 @@ from .api import (
     delete_feature_set,
     delete_feature_vector,
     deploy_ingestion_service,
+    deploy_ingestion_service_v2,
     get_feature_set,
     get_feature_vector,
     get_offline_features,

mlrun 1.5.0rc11__py3-none-any.whl → 1.5.0rc13__py3-none-any.whl

Potentially problematic release.

mlrun 1.5.0rc11py3-none-any.whl → 1.5.0rc13py3-none-any.whl