PyPI - mlrun - Versions diffs - 1.7.0rc18__py3-none-any.whl → 1.7.0rc19__py3-none-any.whl - Mend

mlrun 1.7.0rc18py3-none-any.whl → 1.7.0rc19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (63) hide show

mlrun/__main__.py +5 -2
mlrun/common/constants.py +64 -3
mlrun/common/formatters/__init__.py +16 -0
mlrun/common/formatters/base.py +59 -0
mlrun/common/formatters/function.py +41 -0
mlrun/common/runtimes/constants.py +29 -4
mlrun/common/schemas/__init__.py +0 -1
mlrun/common/schemas/api_gateway.py +52 -0
mlrun/common/schemas/frontend_spec.py +1 -0
mlrun/common/schemas/model_monitoring/__init__.py +6 -3
mlrun/common/schemas/model_monitoring/constants.py +2 -7
mlrun/config.py +7 -2
mlrun/datastore/sources.py +16 -22
mlrun/datastore/store_resources.py +5 -1
mlrun/datastore/targets.py +3 -2
mlrun/datastore/utils.py +42 -0
mlrun/execution.py +16 -6
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/job.py +4 -1
mlrun/frameworks/parallel_coordinates.py +2 -1
mlrun/frameworks/tf_keras/__init__.py +4 -1
mlrun/launcher/client.py +4 -2
mlrun/launcher/local.py +8 -2
mlrun/launcher/remote.py +8 -2
mlrun/model.py +5 -1
mlrun/model_monitoring/db/stores/__init__.py +0 -2
mlrun/model_monitoring/db/stores/base/store.py +1 -2
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
mlrun/model_monitoring/db/stores/sqldb/models/base.py +32 -2
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +25 -5
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +207 -139
mlrun/model_monitoring/db/tsdb/__init__.py +1 -1
mlrun/model_monitoring/db/tsdb/base.py +225 -38
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +48 -15
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +182 -16
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +229 -42
mlrun/model_monitoring/helpers.py +13 -0
mlrun/model_monitoring/writer.py +36 -11
mlrun/projects/operations.py +8 -5
mlrun/projects/pipelines.py +42 -15
mlrun/projects/project.py +22 -6
mlrun/runtimes/base.py +2 -1
mlrun/runtimes/local.py +4 -1
mlrun/runtimes/nuclio/api_gateway.py +32 -8
mlrun/runtimes/nuclio/application/application.py +3 -3
mlrun/runtimes/nuclio/function.py +1 -4
mlrun/runtimes/utils.py +5 -6
mlrun/serving/server.py +2 -1
mlrun/utils/helpers.py +8 -6
mlrun/utils/logger.py +28 -1
mlrun/utils/notifications/notification/__init__.py +14 -9
mlrun/utils/notifications/notification_pusher.py +10 -3
mlrun/utils/v3io_clients.py +0 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/METADATA +3 -3
{mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/RECORD +62 -59
mlrun/model_monitoring/db/v3io_tsdb_reader.py +0 -335
{mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc18.dist-info → mlrun-1.7.0rc19.dist-info}/top_level.txt +0 -0

mlrun/__main__.py CHANGED Viewed

@@ -31,6 +31,7 @@ from mlrun_pipelines.mounts import auto_mount as auto_mount_modifier
 from tabulate import tabulate
 import mlrun
+import mlrun.common.constants as mlrun_constants
 import mlrun.common.schemas
 from mlrun.common.helpers import parse_versioned_object_uri
@@ -256,8 +257,10 @@ def run(
             runobj.metadata.labels[k] = v
     if workflow:
-        runobj.metadata.labels["workflow"] = workflow
-        runobj.metadata.labels["mlrun/runner-pod"] = socket.gethostname()
+        runobj.metadata.labels[mlrun_constants.MLRunInternalLabels.workflow] = workflow
+        runobj.metadata.labels[mlrun_constants.MLRunInternalLabels.runner_pod] = (
+            socket.gethostname()
+        )
     if db:
         mlconf.dbpath = db

mlrun/common/constants.py CHANGED Viewed

@@ -12,12 +12,73 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 IMAGE_NAME_ENRICH_REGISTRY_PREFIX = "."  # prefix for image name to enrich with registry
-MLRUN_CREATED_LABEL = "mlrun-created"
-MLRUN_MODEL_CONF = "model-conf"
-MLRUN_SERVING_SPEC_MOUNT_PATH = f"/tmp/mlrun/{MLRUN_MODEL_CONF}"
+MLRUN_SERVING_CONF = "serving-conf"
+MLRUN_SERVING_SPEC_MOUNT_PATH = f"/tmp/mlrun/{MLRUN_SERVING_CONF}"
 MLRUN_SERVING_SPEC_FILENAME = "serving_spec.json"
 MLRUN_SERVING_SPEC_PATH = (
     f"{MLRUN_SERVING_SPEC_MOUNT_PATH}/{MLRUN_SERVING_SPEC_FILENAME}"
 )
+MLRUN_FUNCTIONS_ANNOTATION = "mlrun/mlrun-functions"
 MYSQL_MEDIUMBLOB_SIZE_BYTES = 16 * 1024 * 1024
+MLRUN_LABEL_PREFIX = "mlrun/"
+DASK_LABEL_PREFIX = "dask.org/"
+NUCLIO_LABEL_PREFIX = "nuclio.io/"
+class MLRunInternalLabels:
+    ### dask
+    dask_cluster_name = f"{DASK_LABEL_PREFIX}cluster-name"
+    dask_component = f"{DASK_LABEL_PREFIX}component"
+    ### spark
+    spark_role = "spark-role"
+    ### mpi
+    mpi_job_name = "mpi-job-name"
+    mpi_job_role = "mpi-job-role"
+    mpi_role_type = "mpi_role_type"
+    ### nuclio
+    nuclio_project_name = f"{NUCLIO_LABEL_PREFIX}project-name"
+    nuclio_class = f"{NUCLIO_LABEL_PREFIX}class"
+    ### mlrun
+    mlrun_auth_key = "mlrun-auth-key"
+    mlrun_class = f"{MLRUN_LABEL_PREFIX}class"
+    client_python_version = f"{MLRUN_LABEL_PREFIX}client_python_version"
+    client_version = f"{MLRUN_LABEL_PREFIX}client_version"
+    function = f"{MLRUN_LABEL_PREFIX}function"
+    job = f"{MLRUN_LABEL_PREFIX}job"
+    name = f"{MLRUN_LABEL_PREFIX}name"
+    mlrun_owner = f"{MLRUN_LABEL_PREFIX}owner"
+    owner_domain = f"{MLRUN_LABEL_PREFIX}owner_domain"
+    project = f"{MLRUN_LABEL_PREFIX}project"
+    runner_pod = f"{MLRUN_LABEL_PREFIX}runner-pod"
+    schedule_name = f"{MLRUN_LABEL_PREFIX}schedule-name"
+    scrape_metrics = f"{MLRUN_LABEL_PREFIX}scrape-metrics"
+    tag = f"{MLRUN_LABEL_PREFIX}tag"
+    uid = f"{MLRUN_LABEL_PREFIX}uid"
+    username = f"{MLRUN_LABEL_PREFIX}username"
+    username_domain = f"{MLRUN_LABEL_PREFIX}username_domain"
+    task_name = f"{MLRUN_LABEL_PREFIX}task-name"
+    host = "host"
+    job_type = "job-type"
+    kind = "kind"
+    component = "component"
+    resource_name = "resource_name"
+    created = "mlrun-created"
+    owner = "owner"
+    v3io_user = "v3io_user"
+    workflow = "workflow"
+    feature_vector = "feature-vector"
+    @classmethod
+    def all(cls):
+        return [
+            value
+            for key, value in cls.__dict__.items()
+            if not key.startswith("__") and isinstance(value, str)
+        ]

mlrun/common/formatters/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright 2024 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from .function import FunctionFormat  # noqa

mlrun/common/formatters/base.py ADDED Viewed

@@ -0,0 +1,59 @@
+# Copyright 2024 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import typing
+class ObjectFormat:
+    full = "full"
+    @staticmethod
+    def format_method(_format: str) -> typing.Optional[typing.Callable]:
+        return {
+            ObjectFormat.full: None,
+        }[_format]
+    @classmethod
+    def format_obj(cls, obj: typing.Any, _format: str) -> typing.Any:
+        _format = _format or cls.full
+        format_method = cls.format_method(_format)
+        if not format_method:
+            return obj
+        return format_method(obj)
+    @staticmethod
+    def filter_obj_method(_filter: list[list[str]]) -> typing.Callable:
+        def _filter_method(obj: dict) -> dict:
+            formatted_obj = {}
+            for key_list in _filter:
+                obj_recursive_iterator = obj
+                formatted_obj_recursive_iterator = formatted_obj
+                for idx, key in enumerate(key_list):
+                    if key not in obj_recursive_iterator:
+                        break
+                    value = (
+                        {} if idx < len(key_list) - 1 else obj_recursive_iterator[key]
+                    )
+                    formatted_obj_recursive_iterator.setdefault(key, value)
+                    obj_recursive_iterator = obj_recursive_iterator[key]
+                    formatted_obj_recursive_iterator = formatted_obj_recursive_iterator[
+                        key
+                    ]
+            return formatted_obj
+        return _filter_method

mlrun/common/formatters/function.py ADDED Viewed

@@ -0,0 +1,41 @@
+# Copyright 2024 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import typing
+import mlrun.common.types
+from .base import ObjectFormat
+class FunctionFormat(ObjectFormat, mlrun.common.types.StrEnum):
+    minimal = "minimal"
+    @staticmethod
+    def format_method(_format: str) -> typing.Optional[typing.Callable]:
+        return {
+            FunctionFormat.full: None,
+            FunctionFormat.minimal: FunctionFormat.filter_obj_method(
+                [
+                    ["kind"],
+                    ["metadata"],
+                    ["status"],
+                    ["spec", "description"],
+                    ["spec", "image"],
+                    ["spec", "default_handler"],
+                    ["spec", "entry_points"],
+                ]
+            ),
+        }[_format]

mlrun/common/runtimes/constants.py CHANGED Viewed

@@ -15,6 +15,10 @@
 import enum
 import typing
+import mlrun_pipelines.common.models
+import mlrun.common.constants as mlrun_constants
 class PodPhases:
     """
@@ -122,8 +126,8 @@ class MPIJobCRDVersions:
     @staticmethod
     def role_label_by_version(version):
         return {
-            MPIJobCRDVersions.v1alpha1: "mpi_role_type",
-            MPIJobCRDVersions.v1: "mpi-job-role",
+            MPIJobCRDVersions.v1alpha1: mlrun_constants.MLRunInternalLabels.mpi_role_type,
+            MPIJobCRDVersions.v1: mlrun_constants.MLRunInternalLabels.mpi_job_role,
         }[version]
@@ -191,10 +195,31 @@ class RunStates:
             # TODO: add aborting state once we have it
         ]
+    @staticmethod
+    def run_state_to_pipeline_run_status(run_state: str):
+        if not run_state:
+            return mlrun_pipelines.common.models.RunStatuses.runtime_state_unspecified
+        if run_state not in RunStates.all():
+            raise ValueError(f"Invalid run state: {run_state}")
+        return {
+            RunStates.completed: mlrun_pipelines.common.models.RunStatuses.succeeded,
+            RunStates.error: mlrun_pipelines.common.models.RunStatuses.failed,
+            RunStates.running: mlrun_pipelines.common.models.RunStatuses.running,
+            RunStates.created: mlrun_pipelines.common.models.RunStatuses.pending,
+            RunStates.pending: mlrun_pipelines.common.models.RunStatuses.pending,
+            RunStates.unknown: mlrun_pipelines.common.models.RunStatuses.runtime_state_unspecified,
+            RunStates.aborted: mlrun_pipelines.common.models.RunStatuses.canceled,
+            RunStates.aborting: mlrun_pipelines.common.models.RunStatuses.canceling,
+            RunStates.skipped: mlrun_pipelines.common.models.RunStatuses.skipped,
+        }[run_state]
+# TODO: remove this class in 1.9.0 - use only MlrunInternalLabels
 class RunLabels(enum.Enum):
-    owner = "owner"
-    v3io_user = "v3io_user"
+    owner = mlrun_constants.MLRunInternalLabels.owner
+    v3io_user = mlrun_constants.MLRunInternalLabels.v3io_user
     @staticmethod
     def all():

mlrun/common/schemas/__init__.py CHANGED Viewed

@@ -149,7 +149,6 @@ from .model_monitoring import (
     ModelMonitoringStoreKinds,
     MonitoringFunctionNames,
     PrometheusEndpoints,
-    TimeSeriesConnector,
     TSDBTarget,
     V3IOTSDBTables,
 )

mlrun/common/schemas/api_gateway.py CHANGED Viewed

@@ -18,6 +18,7 @@ from typing import Optional
 import pydantic
 import mlrun.common.types
+from mlrun.common.constants import MLRUN_FUNCTIONS_ANNOTATION
 class APIGatewayAuthenticationMode(mlrun.common.types.StrEnum):
@@ -55,6 +56,7 @@ class APIGatewayMetadata(_APIGatewayBaseModel):
     name: str
     namespace: Optional[str]
     labels: Optional[dict] = {}
+    annotations: Optional[dict] = {}
 class APIGatewayBasicAuth(_APIGatewayBaseModel):
@@ -91,6 +93,56 @@ class APIGateway(_APIGatewayBaseModel):
     spec: APIGatewaySpec
     status: Optional[APIGatewayStatus]
+    def get_function_names(self):
+        return [
+            upstream.nucliofunction.get("name")
+            for upstream in self.spec.upstreams
+            if upstream.nucliofunction.get("name")
+        ]
+    def enrich_mlrun_function_names(self):
+        upstream_with_nuclio_names = []
+        mlrun_function_uris = []
+        for upstream in self.spec.upstreams:
+            uri = upstream.nucliofunction.get("name")
+            project, function_name, tag, _ = (
+                mlrun.common.helpers.parse_versioned_object_uri(uri)
+            )
+            upstream.nucliofunction["name"] = (
+                mlrun.runtimes.nuclio.function.get_fullname(function_name, project, tag)
+            )
+            upstream_with_nuclio_names.append(upstream)
+            mlrun_function_uris.append(uri)
+        self.spec.upstreams = upstream_with_nuclio_names
+        if len(mlrun_function_uris) == 1:
+            self.metadata.annotations[MLRUN_FUNCTIONS_ANNOTATION] = mlrun_function_uris[
+                0
+            ]
+        elif len(mlrun_function_uris) == 2:
+            self.metadata.annotations[MLRUN_FUNCTIONS_ANNOTATION] = "&".join(
+                mlrun_function_uris
+            )
+        return self
+    def replace_nuclio_names_with_mlrun_uri(self):
+        mlrun_functions = self.metadata.annotations.get(MLRUN_FUNCTIONS_ANNOTATION)
+        if mlrun_functions:
+            mlrun_function_uris = (
+                mlrun_functions.split("&")
+                if "&" in mlrun_functions
+                else [mlrun_functions]
+            )
+            if len(mlrun_function_uris) != len(self.spec.upstreams):
+                raise mlrun.errors.MLRunValueError(
+                    "Error when translating nuclio names to mlrun names in api gateway:"
+                    "  number of functions doesn't match the mlrun functions in annotation"
+                )
+            for i in range(len(mlrun_function_uris)):
+                self.spec.upstreams[i].nucliofunction["name"] = mlrun_function_uris[i]
+        return self
 class APIGatewaysOutput(_APIGatewayBaseModel):
     api_gateways: typing.Optional[dict[str, APIGateway]] = {}

mlrun/common/schemas/frontend_spec.py CHANGED Viewed

@@ -70,3 +70,4 @@ class FrontendSpec(pydantic.BaseModel):
     feature_store_data_prefixes: typing.Optional[dict[str, str]]
     allowed_artifact_path_prefixes_list: list[str]
     ce: typing.Optional[dict]
+    internal_labels: list[str] = []

mlrun/common/schemas/model_monitoring/__init__.py CHANGED Viewed

@@ -11,8 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
-# flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
 from .constants import (
     ControllerPolicy,
@@ -30,13 +28,15 @@ from .constants import (
     ModelMonitoringMode,
     ModelMonitoringStoreKinds,
     MonitoringFunctionNames,
+    PredictionsQueryConstants,
     ProjectSecretKeys,
     PrometheusEndpoints,
     PrometheusMetric,
     ResultData,
+    ResultKindApp,
     SchedulingKeys,
+    SpecialApps,
     TDEngineSuperTables,
-    TimeSeriesConnector,
     TSDBTarget,
     V3IOTSDBTables,
     VersionedModel,
@@ -59,7 +59,10 @@ from .model_endpoints import (
     ModelEndpointList,
     ModelEndpointMetadata,
     ModelEndpointMonitoringMetric,
+    ModelEndpointMonitoringMetricNoData,
     ModelEndpointMonitoringMetricType,
+    ModelEndpointMonitoringMetricValues,
+    ModelEndpointMonitoringResultValues,
     ModelEndpointSpec,
     ModelEndpointStatus,
 )

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -158,10 +158,6 @@ class EventKeyMetrics:
     REAL_TIME = "real_time"
-class TimeSeriesConnector:
-    TSDB = "tsdb"
 class ModelEndpointTarget:
     V3IO_NOSQL = "v3io-nosql"
     SQL = "sql"
@@ -197,6 +193,7 @@ class FileTargetKind:
     APPS_PARQUET = "apps_parquet"
     LOG_STREAM = "log_stream"
     APP_RESULTS = "app_results"
+    APP_METRICS = "app_metrics"
     MONITORING_SCHEDULES = "monitoring_schedules"
     MONITORING_APPLICATION = "monitoring_application"
@@ -321,6 +318,7 @@ class ResultKindApp(Enum):
     concept_drift = 1
     model_performance = 2
     system_performance = 3
+    custom = 4
 class ResultStatusApp(IntEnum):
@@ -350,9 +348,6 @@ class TSDBTarget:
     V3IO_TSDB = "v3io-tsdb"
     TDEngine = "tdengine"
     PROMETHEUS = "prometheus"
-    APP_RESULTS_TABLE = "app-results"
-    V3IO_BE = "tsdb"
-    V3IO_RATE = "1/s"
 class HistogramDataDriftApplicationConstants:

mlrun/config.py CHANGED Viewed

@@ -37,6 +37,7 @@ import dotenv
 import semver
 import yaml
+import mlrun.common.constants
 import mlrun.common.schemas
 import mlrun.errors
@@ -87,7 +88,7 @@ default_config = {
     "mpijob_crd_version": "",  # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
     "ipython_widget": True,
     "log_level": "INFO",
-    # log formatter (options: human | json)
+    # log formatter (options: human | human_extended | json)
     "log_formatter": "human",
     "submit_timeout": "180",  # timeout when submitting a new k8s resource
     # runtimes cleanup interval in seconds
@@ -370,7 +371,7 @@ default_config = {
             "add_templated_ingress_host_mode": "never",
             "explicit_ack": "enabled",
             # size of serving spec to move to config maps
-            "serving_spec_env_cutoff": 4096,
+            "serving_spec_env_cutoff": 0,
         },
         "logs": {
             "decode": {
@@ -967,6 +968,10 @@ class Config:
         self.httpdb.clusterization.chief.url = chief_api_url
         return self.httpdb.clusterization.chief.url
+    @staticmethod
+    def internal_labels():
+        return mlrun.common.constants.MLRunInternalLabels.all()
     @staticmethod
     def get_storage_auto_mount_params():
         auto_mount_params = {}

mlrun/datastore/sources.py CHANGED Viewed

@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
-import math
 import operator
 import os
 import warnings
@@ -31,6 +30,7 @@ from nuclio.config import split_path
 import mlrun
 from mlrun.config import config
 from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
+from mlrun.datastore.utils import transform_list_filters_to_tuple
 from mlrun.secrets import SecretsStore
 from ..model import DataSource
@@ -313,12 +313,13 @@ class ParquetSource(BaseSourceDriver):
         schedule: str = None,
         start_time: Optional[Union[datetime, str]] = None,
         end_time: Optional[Union[datetime, str]] = None,
-        additional_filters: Optional[list[tuple]] = None,
+        additional_filters: Optional[list[Union[tuple, list]]] = None,
     ):
         if additional_filters:
             attributes = copy(attributes) or {}
+            additional_filters = transform_list_filters_to_tuple(additional_filters)
             attributes["additional_filters"] = additional_filters
-            self.validate_additional_filters(additional_filters)
         super().__init__(
             name,
             path,
@@ -359,25 +360,6 @@ class ParquetSource(BaseSourceDriver):
         else:
             return time
-    @staticmethod
-    def validate_additional_filters(additional_filters):
-        if not additional_filters:
-            return
-        for filter_tuple in additional_filters:
-            if not filter_tuple:
-                continue
-            col_name, op, value = filter_tuple
-            if isinstance(value, float) and math.isnan(value):
-                raise mlrun.errors.MLRunInvalidArgumentError(
-                    "using NaN in additional_filters is not supported"
-                )
-            elif isinstance(value, (list, tuple, set)):
-                for sub_value in value:
-                    if isinstance(sub_value, float) and math.isnan(sub_value):
-                        raise mlrun.errors.MLRunInvalidArgumentError(
-                            "using NaN in additional_filters is not supported"
-                        )
     def to_step(
         self,
         key_field=None,
@@ -393,6 +375,7 @@ class ParquetSource(BaseSourceDriver):
         attributes.pop("additional_filters", None)
         if context:
             attributes["context"] = context
+        additional_filters = transform_list_filters_to_tuple(additional_filters)
         data_item = mlrun.store_manager.object(self.path)
         store, path, url = mlrun.store_manager.get_or_create_store(self.path)
         return storey.ParquetSource(
@@ -406,6 +389,16 @@ class ParquetSource(BaseSourceDriver):
             **attributes,
         )
+    @classmethod
+    def from_dict(cls, struct=None, fields=None, deprecated_fields: dict = None):
+        new_obj = super().from_dict(
+            struct=struct, fields=fields, deprecated_fields=deprecated_fields
+        )
+        new_obj.attributes["additional_filters"] = transform_list_filters_to_tuple(
+            new_obj.additional_filters
+        )
+        return new_obj
     def get_spark_options(self):
         store, path, _ = mlrun.store_manager.get_or_create_store(self.path)
         spark_options = store.get_spark_options()
@@ -428,6 +421,7 @@ class ParquetSource(BaseSourceDriver):
         additional_filters=None,
     ):
         reader_args = self.attributes.get("reader_args", {})
+        additional_filters = transform_list_filters_to_tuple(additional_filters)
         return mlrun.store_manager.object(url=self.path).as_df(
             columns=columns,
             df_module=df_module,

mlrun/datastore/store_resources.py CHANGED Viewed

@@ -146,7 +146,11 @@ def get_store_resource(
     db = db or mlrun.get_run_db(secrets=secrets)
     kind, uri = parse_store_uri(uri)
-    if kind == StorePrefix.FeatureSet:
+    if not kind:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"Cannot get store resource from invalid URI: {uri}"
+        )
+    elif kind == StorePrefix.FeatureSet:
         project, name, tag, uid = parse_versioned_object_uri(
             uri, project or config.default_project
         )

mlrun/datastore/targets.py CHANGED Viewed

@@ -30,6 +30,7 @@ import mlrun
 import mlrun.utils.helpers
 from mlrun.config import config
 from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
+from mlrun.datastore.utils import transform_list_filters_to_tuple
 from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
 from mlrun.utils import logger, now_date
 from mlrun.utils.helpers import to_parquet
@@ -757,7 +758,7 @@ class BaseStoreTarget(DataTargetBase):
         # options used in spark.read.load(**options)
         raise NotImplementedError()
-    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options={}):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         return df
     def get_dask_options(self):
@@ -999,7 +1000,7 @@ class ParquetTarget(BaseStoreTarget):
             start_time=start_time,
             end_time=end_time,
             time_column=time_column,
-            additional_filters=additional_filters,
+            additional_filters=transform_list_filters_to_tuple(additional_filters),
             **kwargs,
         )
         if not columns:

mlrun/datastore/utils.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import math
 import tarfile
 import tempfile
 import typing
@@ -180,3 +181,44 @@ def get_kafka_brokers_from_dict(options: dict, pop=False) -> typing.Optional[str
             FutureWarning,
         )
     return kafka_bootstrap_servers
+def transform_list_filters_to_tuple(additional_filters):
+    tuple_filters = []
+    if not additional_filters:
+        return tuple_filters
+    validate_additional_filters(additional_filters)
+    for additional_filter in additional_filters:
+        tuple_filters.append(tuple(additional_filter))
+    return tuple_filters
+def validate_additional_filters(additional_filters):
+    nan_error_message = "using NaN in additional_filters is not supported"
+    if additional_filters in [None, [], ()]:
+        return
+    for filter_tuple in additional_filters:
+        if filter_tuple == () or filter_tuple == []:
+            continue
+        if not isinstance(filter_tuple, (list, tuple)):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"mlrun supports additional_filters only as a list of tuples."
+                f" Current additional_filters: {additional_filters}"
+            )
+        if isinstance(filter_tuple[0], (list, tuple)):
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"additional_filters does not support nested list inside filter tuples except in -in- logic."
+                f" Current filter_tuple: {filter_tuple}."
+            )
+        if len(filter_tuple) != 3:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"illegal filter tuple length, {filter_tuple} in additional filters:"
+                f" {additional_filters}"
+            )
+        col_name, op, value = filter_tuple
+        if isinstance(value, float) and math.isnan(value):
+            raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
+        elif isinstance(value, (list, tuple)):
+            for sub_value in value:
+                if isinstance(sub_value, float) and math.isnan(sub_value):
+                    raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)

mlrun 1.7.0rc18__py3-none-any.whl → 1.7.0rc19__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc18py3-none-any.whl → 1.7.0rc19py3-none-any.whl