PyPI - mlrun - Versions diffs - 1.7.0rc13__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl - Mend

mlrun 1.7.0rc13py3-none-any.whl → 1.7.0rc15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (85) hide show

mlrun/__main__.py +0 -105
mlrun/artifacts/__init__.py +1 -2
mlrun/artifacts/base.py +8 -250
mlrun/artifacts/dataset.py +1 -190
mlrun/artifacts/manager.py +2 -41
mlrun/artifacts/model.py +1 -140
mlrun/artifacts/plots.py +1 -375
mlrun/common/schemas/model_monitoring/__init__.py +4 -0
mlrun/common/schemas/model_monitoring/constants.py +24 -3
mlrun/common/schemas/model_monitoring/model_endpoints.py +13 -1
mlrun/common/schemas/project.py +1 -0
mlrun/config.py +14 -4
mlrun/data_types/to_pandas.py +4 -4
mlrun/datastore/base.py +41 -9
mlrun/datastore/datastore_profile.py +50 -3
mlrun/datastore/hdfs.py +5 -0
mlrun/datastore/inmem.py +2 -2
mlrun/datastore/sources.py +43 -2
mlrun/datastore/store_resources.py +2 -6
mlrun/datastore/targets.py +125 -6
mlrun/datastore/v3io.py +1 -1
mlrun/db/auth_utils.py +152 -0
mlrun/db/base.py +1 -1
mlrun/db/httpdb.py +69 -33
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +12 -47
mlrun/feature_store/feature_set.py +9 -0
mlrun/feature_store/retrieval/base.py +9 -4
mlrun/feature_store/retrieval/conversion.py +4 -4
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +2 -0
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +5 -0
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
mlrun/kfpops.py +5 -10
mlrun/launcher/base.py +1 -1
mlrun/launcher/client.py +1 -1
mlrun/lists.py +2 -2
mlrun/model.py +36 -9
mlrun/model_monitoring/api.py +41 -18
mlrun/model_monitoring/application.py +5 -305
mlrun/model_monitoring/applications/__init__.py +11 -0
mlrun/model_monitoring/applications/_application_steps.py +158 -0
mlrun/model_monitoring/applications/base.py +282 -0
mlrun/model_monitoring/applications/context.py +214 -0
mlrun/model_monitoring/applications/evidently_base.py +211 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +92 -77
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +3 -1
mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +1 -1
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +67 -4
mlrun/model_monitoring/evidently_application.py +6 -118
mlrun/model_monitoring/helpers.py +1 -1
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +2 -3
mlrun/model_monitoring/writer.py +69 -39
mlrun/platforms/iguazio.py +2 -2
mlrun/projects/pipelines.py +24 -7
mlrun/projects/project.py +130 -65
mlrun/render.py +2 -10
mlrun/run.py +1 -4
mlrun/runtimes/__init__.py +3 -3
mlrun/runtimes/base.py +3 -3
mlrun/runtimes/funcdoc.py +0 -28
mlrun/runtimes/local.py +1 -1
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/runtimes/nuclio/api_gateway.py +275 -153
mlrun/runtimes/nuclio/function.py +1 -1
mlrun/runtimes/pod.py +5 -5
mlrun/runtimes/utils.py +1 -1
mlrun/serving/states.py +53 -2
mlrun/utils/helpers.py +27 -40
mlrun/utils/notifications/notification/slack.py +31 -8
mlrun/utils/notifications/notification_pusher.py +133 -14
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/METADATA +2 -2
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/RECORD +84 -79
mlrun/runtimes/mpijob/v1alpha1.py +0 -29
/mlrun/{runtimes → common/runtimes}/constants.py +0 -0
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc15.dist-info}/top_level.txt +0 -0

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -99,14 +99,17 @@ class FeatureSetFeatures(MonitoringStrEnum):
 class ApplicationEvent:
     APPLICATION_NAME = "application_name"
-    CURRENT_STATS = "current_stats"
-    FEATURE_STATS = "feature_stats"
-    SAMPLE_PARQUET_PATH = "sample_parquet_path"
     START_INFER_TIME = "start_infer_time"
     END_INFER_TIME = "end_infer_time"
     LAST_REQUEST = "last_request"
     ENDPOINT_ID = "endpoint_id"
     OUTPUT_STREAM_URI = "output_stream_uri"
+    MLRUN_CONTEXT = "mlrun_context"
+    # Deprecated fields - TODO : delete in 1.9.0  (V1 app deprecation)
+    SAMPLE_PARQUET_PATH = "sample_parquet_path"
+    CURRENT_STATS = "current_stats"
+    FEATURE_STATS = "feature_stats"
 class WriterEvent(MonitoringStrEnum):
@@ -114,6 +117,21 @@ class WriterEvent(MonitoringStrEnum):
     ENDPOINT_ID = "endpoint_id"
     START_INFER_TIME = "start_infer_time"
     END_INFER_TIME = "end_infer_time"
+    EVENT_KIND = "event_kind"  # metric or result
+    DATA = "data"
+class WriterEventKind(MonitoringStrEnum):
+    METRIC = "metric"
+    RESULT = "result"
+class MetricData(MonitoringStrEnum):
+    METRIC_NAME = "metric_name"
+    METRIC_VALUE = "metric_value"
+class ResultData(MonitoringStrEnum):
     RESULT_NAME = "result_name"
     RESULT_VALUE = "result_value"
     RESULT_KIND = "result_kind"
@@ -303,6 +321,9 @@ class ModelMonitoringAppLabel:
     KEY = "mlrun__type"
     VAL = "mlrun__model-monitoring-application"
+    def __str__(self) -> str:
+        return f"{self.KEY}={self.VAL}"
 class ControllerPolicy:
     BASE_PERIOD = "base_period"

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 import enum
 import json
@@ -21,6 +20,7 @@ from pydantic import BaseModel, Field, validator
 from pydantic.main import Extra
 import mlrun.common.model_monitoring
+import mlrun.common.types
 from ..object import ObjectKind, ObjectSpec, ObjectStatus
 from .constants import (
@@ -292,6 +292,18 @@ class ModelEndpointList(BaseModel):
     endpoints: list[ModelEndpoint] = []
+class ModelEndpointMonitoringMetricType(mlrun.common.types.StrEnum):
+    RESULT = "result"
+class ModelEndpointMonitoringMetric(BaseModel):
+    project: str
+    app: str
+    type: ModelEndpointMonitoringMetricType
+    name: str
+    full_name: str
 def _mapping_attributes(
     base_model: BaseModel,
     flattened_dictionary: dict,

mlrun/common/schemas/project.py CHANGED Viewed

@@ -110,6 +110,7 @@ class ProjectSummary(pydantic.BaseModel):
     files_count: int
     feature_sets_count: int
     models_count: int
+    runs_completed_recent_count: int
     runs_failed_recent_count: int
     runs_running_count: int
     schedules_count: int

mlrun/config.py CHANGED Viewed

@@ -188,6 +188,7 @@ default_config = {
     "background_tasks": {
         # enabled / disabled
         "timeout_mode": "enabled",
+        "function_deletion_batch_size": 10,
         # timeout in seconds to wait for background task to be updated / finished by the worker responsible for the task
         "default_timeouts": {
             "operations": {
@@ -196,6 +197,7 @@ default_config = {
                 "run_abortion": "600",
                 "abort_grace_period": "10",
                 "delete_project": "900",
+                "delete_function": "900",
             },
             "runtimes": {"dask": "600"},
         },
@@ -359,7 +361,7 @@ default_config = {
             #                  is set to ClusterIP
             #  ---------------------------------------------------------------------
             # Note: adding a mode requires special handling on
-            # - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
+            # - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
             # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
             "add_templated_ingress_host_mode": "never",
             "explicit_ack": "enabled",
@@ -552,7 +554,7 @@ default_config = {
             "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
             # "authority" is optional and generalizes [userinfo "@"] host [":" port]
             "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
-            "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/nosql",
+            "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
         },
         "default_targets": "parquet,nosql",
         "default_job_image": "mlrun/mlrun",
@@ -690,7 +692,11 @@ default_config = {
     "grafana_url": "",
     "alerts": {
         # supported modes: "enabled", "disabled".
-        "mode": "disabled"
+        "mode": "enabled"
+    },
+    "auth_with_client_id": {
+        "enabled": False,
+        "request_timeout": 5,
     },
 }
@@ -1396,7 +1402,11 @@ def read_env(env=None, prefix=env_prefix):
         log_formatter = mlrun.utils.create_formatter_instance(
             mlrun.utils.FormatterKinds(log_formatter_name)
         )
-        mlrun.utils.logger.get_handler("default").setFormatter(log_formatter)
+        current_handler = mlrun.utils.logger.get_handler("default")
+        current_formatter_name = current_handler.formatter.__class__.__name__
+        desired_formatter_name = log_formatter.__class__.__name__
+        if current_formatter_name != desired_formatter_name:
+            current_handler.setFormatter(log_formatter)
     # The default function pod resource values are of type str; however, when reading from environment variable numbers,
     # it converts them to type int if contains only number, so we want to convert them to str.

mlrun/data_types/to_pandas.py CHANGED Viewed

@@ -65,10 +65,10 @@ def toPandas(spark_df):
                 msg = (
                     "toPandas attempted Arrow optimization because "
                     "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
-                    "failed by the reason below:\n  %s\n"
+                    f"failed by the reason below:\n  {e}\n"
                     "Attempting non-optimization as "
                     "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
-                    "true." % str(e)
+                    "true."
                 )
                 warnings.warn(msg)
                 use_arrow = False
@@ -78,7 +78,7 @@ def toPandas(spark_df):
                     "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
                     "reached the error below and will not continue because automatic fallback "
                     "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
-                    "false.\n  %s" % str(e)
+                    f"false.\n  {e}"
                 )
                 warnings.warn(msg)
                 raise
@@ -144,7 +144,7 @@ def toPandas(spark_df):
                     "reached the error below and can not continue. Note that "
                     "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
                     "effect on failures in the middle of "
-                    "computation.\n  %s" % str(e)
+                    f"computation.\n  {e}"
                 )
                 warnings.warn(msg)
                 raise

mlrun/datastore/base.py CHANGED Viewed

@@ -179,11 +179,23 @@ class DataStore:
         return {}
     @staticmethod
-    def _parquet_reader(df_module, url, file_system, time_column, start_time, end_time):
+    def _parquet_reader(
+        df_module,
+        url,
+        file_system,
+        time_column,
+        start_time,
+        end_time,
+        additional_filters,
+    ):
         from storey.utils import find_filters, find_partitions
         def set_filters(
-            partitions_time_attributes, start_time_inner, end_time_inner, kwargs
+            partitions_time_attributes,
+            start_time_inner,
+            end_time_inner,
+            filters_inner,
+            kwargs,
         ):
             filters = []
             find_filters(
@@ -193,20 +205,23 @@ class DataStore:
                 filters,
                 time_column,
             )
+            if filters and filters_inner:
+                filters[0] += filters_inner
             kwargs["filters"] = filters
         def reader(*args, **kwargs):
-            if start_time or end_time:
-                if time_column is None:
-                    raise mlrun.errors.MLRunInvalidArgumentError(
-                        "When providing start_time or end_time, must provide time_column"
-                    )
+            if time_column is None and (start_time or end_time):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "When providing start_time or end_time, must provide time_column"
+                )
+            if start_time or end_time or additional_filters:
                 partitions_time_attributes = find_partitions(url, file_system)
                 set_filters(
                     partitions_time_attributes,
                     start_time,
                     end_time,
+                    additional_filters,
                     kwargs,
                 )
                 try:
@@ -217,6 +232,7 @@ class DataStore:
                     ):
                         raise ex
+                    # TODO: fix timezone issue (ML-6308)
                     if start_time.tzinfo:
                         start_time_inner = start_time.replace(tzinfo=None)
                         end_time_inner = end_time.replace(tzinfo=None)
@@ -228,6 +244,7 @@ class DataStore:
                         partitions_time_attributes,
                         start_time_inner,
                         end_time_inner,
+                        additional_filters,
                         kwargs,
                     )
                     return df_module.read_parquet(*args, **kwargs)
@@ -246,6 +263,7 @@ class DataStore:
         start_time=None,
         end_time=None,
         time_column=None,
+        additional_filters=None,
         **kwargs,
     ):
         df_module = df_module or pd
@@ -310,7 +328,13 @@ class DataStore:
                 kwargs["columns"] = columns
             reader = self._parquet_reader(
-                df_module, url, file_system, time_column, start_time, end_time
+                df_module,
+                url,
+                file_system,
+                time_column,
+                start_time,
+                end_time,
+                additional_filters,
             )
         elif file_url.endswith(".json") or format == "json":
@@ -539,6 +563,7 @@ class DataItem:
         time_column=None,
         start_time=None,
         end_time=None,
+        additional_filters=None,
         **kwargs,
     ):
         """return a dataframe object (generated from the dataitem).
@@ -550,6 +575,12 @@ class DataItem:
         :param end_time:    filters out data after this time
         :param time_column: Store timestamp_key will be used if None.
                             The results will be filtered by this column and start_time & end_time.
+        :param additional_filters: List of additional_filter conditions as tuples.
+                                    Each tuple should be in the format (column_name, operator, value).
+                                    Supported operators: "=", ">=", "<=", ">", "<".
+                                    Example: [("Product", "=", "Computer")]
+                                    For all supported filters, please see:
+                                    https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
         """
         df = self._store.as_df(
             self._url,
@@ -560,6 +591,7 @@ class DataItem:
             time_column=time_column,
             start_time=start_time,
             end_time=end_time,
+            additional_filters=additional_filters,
             **kwargs,
         )
         return df

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -185,6 +185,17 @@ class DatastoreProfileS3(DatastoreProfile):
     assume_role_arn: typing.Optional[str] = None
     access_key_id: typing.Optional[str] = None
     secret_key: typing.Optional[str] = None
+    bucket: typing.Optional[str] = None
+    @pydantic.validator("bucket")
+    def check_bucket(cls, v):
+        if not v:
+            warnings.warn(
+                "The 'bucket' attribute will be mandatory starting from version 1.9",
+                FutureWarning,
+                stacklevel=2,
+            )
+        return v
     def secrets(self) -> dict:
         res = {}
@@ -203,7 +214,13 @@ class DatastoreProfileS3(DatastoreProfile):
         return res
     def url(self, subpath):
-        return f"s3:/{subpath}"
+        # TODO: There is an inconsistency with DatastoreProfileGCS. In DatastoreProfileGCS,
+        # we assume that the subpath can begin without a '/' character,
+        # while here we assume it always starts with one.
+        if self.bucket:
+            return f"s3://{self.bucket}{subpath}"
+        else:
+            return f"s3:/{subpath}"
 class DatastoreProfileRedis(DatastoreProfile):
@@ -272,6 +289,17 @@ class DatastoreProfileGCS(DatastoreProfile):
     _private_attributes = ("gcp_credentials",)
     credentials_path: typing.Optional[str] = None  # path to file.
     gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
+    bucket: typing.Optional[str] = None
+    @pydantic.validator("bucket")
+    def check_bucket(cls, v):
+        if not v:
+            warnings.warn(
+                "The 'bucket' attribute will be mandatory starting from version 1.9",
+                FutureWarning,
+                stacklevel=2,
+            )
+        return v
     @pydantic.validator("gcp_credentials", pre=True, always=True)
     def convert_dict_to_json(cls, v):
@@ -280,10 +308,15 @@ class DatastoreProfileGCS(DatastoreProfile):
         return v
     def url(self, subpath) -> str:
+        # TODO: but there's something wrong with the subpath being assumed to not start with a slash here,
+        # but the opposite assumption is made in S3.
         if subpath.startswith("/"):
             #  in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
             subpath = subpath[1:]
-        return f"gcs://{subpath}"
+        if self.bucket:
+            return f"gcs://{self.bucket}/{subpath}"
+        else:
+            return f"gcs://{subpath}"
     def secrets(self) -> dict:
         res = {}
@@ -311,12 +344,26 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
     client_secret: typing.Optional[str] = None
     sas_token: typing.Optional[str] = None
     credential: typing.Optional[str] = None
+    bucket: typing.Optional[str] = None
+    @pydantic.validator("bucket")
+    def check_bucket(cls, v):
+        if not v:
+            warnings.warn(
+                "The 'bucket' attribute will be mandatory starting from version 1.9",
+                FutureWarning,
+                stacklevel=2,
+            )
+        return v
     def url(self, subpath) -> str:
         if subpath.startswith("/"):
             #  in azure the path after schema is starts with bucket, wherefore it should not start with "/".
             subpath = subpath[1:]
-        return f"az://{subpath}"
+        if self.bucket:
+            return f"az://{self.bucket}/{subpath}"
+        else:
+            return f"az://{subpath}"
     def secrets(self) -> dict:
         res = {}

mlrun/datastore/hdfs.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+from urllib.parse import urlparse
 import fsspec
@@ -49,3 +50,7 @@ class HdfsStore(DataStore):
     @property
     def spark_url(self):
         return f"hdfs://{self.host}:{self.port}"
+    def rm(self, url, recursive=False, maxdepth=None):
+        path = urlparse(url).path
+        self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)

mlrun/datastore/inmem.py CHANGED Viewed

@@ -80,8 +80,8 @@ class InMemoryStore(DataStore):
             reader = df_module.read_json
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(f"file type unhandled {url}")
-        # InMemoryStore store do not filter on time
-        for field in ["time_column", "start_time", "end_time"]:
+        # InMemoryStore store – don't pass filters
+        for field in ["time_column", "start_time", "end_time", "additional_filters"]:
             kwargs.pop(field, None)
         return reader(item, **kwargs)

mlrun/datastore/sources.py CHANGED Viewed

@@ -102,8 +102,12 @@ class BaseSourceDriver(DataSource):
         start_time=None,
         end_time=None,
         time_field=None,
+        additional_filters=None,
     ):
         """return the source data as dataframe"""
+        mlrun.utils.helpers.additional_filters_warning(
+            additional_filters, self.__class__
+        )
         return mlrun.store_manager.object(url=self.path).as_df(
             columns=columns,
             df_module=df_module,
@@ -245,7 +249,11 @@ class CSVSource(BaseSourceDriver):
         start_time=None,
         end_time=None,
         time_field=None,
+        additional_filters=None,
     ):
+        mlrun.utils.helpers.additional_filters_warning(
+            additional_filters, self.__class__
+        )
         reader_args = self.attributes.get("reader_args", {})
         return mlrun.store_manager.object(url=self.path).as_df(
             columns=columns,
@@ -281,6 +289,12 @@ class ParquetSource(BaseSourceDriver):
     :parameter start_time: filters out data before this time
     :parameter end_time: filters out data after this time
     :parameter attributes: additional parameters to pass to storey.
+    :param additional_filters: List of additional_filter conditions as tuples.
+                               Each tuple should be in the format (column_name, operator, value).
+                               Supported operators: "=", ">=", "<=", ">", "<".
+                               Example: [("Product", "=", "Computer")]
+                               For all supported filters, please see:
+                               https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
     """
     kind = "parquet"
@@ -297,6 +311,7 @@ class ParquetSource(BaseSourceDriver):
         schedule: str = None,
         start_time: Optional[Union[datetime, str]] = None,
         end_time: Optional[Union[datetime, str]] = None,
+        additional_filters: Optional[list[tuple]] = None,
     ):
         super().__init__(
             name,
@@ -308,6 +323,7 @@ class ParquetSource(BaseSourceDriver):
             start_time,
             end_time,
         )
+        self.additional_filters = additional_filters
     @property
     def start_time(self):
@@ -341,6 +357,7 @@ class ParquetSource(BaseSourceDriver):
         start_time=None,
         end_time=None,
         context=None,
+        additional_filters=None,
     ):
         import storey
@@ -358,6 +375,7 @@ class ParquetSource(BaseSourceDriver):
             end_filter=self.end_time,
             start_filter=self.start_time,
             filter_column=self.time_field or time_field,
+            additional_filters=self.additional_filters or additional_filters,
             **attributes,
         )
@@ -380,6 +398,7 @@ class ParquetSource(BaseSourceDriver):
         start_time=None,
         end_time=None,
         time_field=None,
+        additional_filters=None,
     ):
         reader_args = self.attributes.get("reader_args", {})
         return mlrun.store_manager.object(url=self.path).as_df(
@@ -389,6 +408,7 @@ class ParquetSource(BaseSourceDriver):
             end_time=end_time or self.end_time,
             time_column=time_field or self.time_field,
             format="parquet",
+            additional_filters=additional_filters or self.additional_filters,
             **reader_args,
         )
@@ -519,10 +539,15 @@ class BigQuerySource(BaseSourceDriver):
         start_time=None,
         end_time=None,
         time_field=None,
+        additional_filters=None,
     ):
         from google.cloud import bigquery
         from google.cloud.bigquery_storage_v1 import BigQueryReadClient
+        mlrun.utils.helpers.additional_filters_warning(
+            additional_filters, self.__class__
+        )
         def schema_to_dtypes(schema):
             from mlrun.data_types.data_types import gbq_to_pandas_dtype
@@ -562,7 +587,6 @@ class BigQuerySource(BaseSourceDriver):
         else:
             df = rows_iterator.to_dataframe(dtypes=dtypes)
-        # TODO : filter as part of the query
         return select_columns_from_df(
             filter_df_start_end_time(
                 df,
@@ -740,7 +764,19 @@ class DataFrameSource:
             context=self.context or context,
         )
-    def to_dataframe(self, **kwargs):
+    def to_dataframe(
+        self,
+        columns=None,
+        df_module=None,
+        entities=None,
+        start_time=None,
+        end_time=None,
+        time_field=None,
+        additional_filters=None,
+    ):
+        mlrun.utils.helpers.additional_filters_warning(
+            additional_filters, self.__class__
+        )
         return self._df
     def is_iterator(self):
@@ -935,6 +971,7 @@ class KafkaSource(OnlineSource):
         start_time=None,
         end_time=None,
         time_field=None,
+        additional_filters=None,
     ):
         raise mlrun.MLRunInvalidArgumentError(
             "KafkaSource does not support batch processing"
@@ -1075,9 +1112,13 @@ class SQLSource(BaseSourceDriver):
         start_time=None,
         end_time=None,
         time_field=None,
+        additional_filters=None,
     ):
         import sqlalchemy as sqlalchemy
+        mlrun.utils.helpers.additional_filters_warning(
+            additional_filters, self.__class__
+        )
         db_path = self.attributes.get("db_path")
         table_name = self.attributes.get("table_name")
         parse_dates = self.attributes.get("parse_dates")

mlrun/datastore/store_resources.py CHANGED Viewed

@@ -17,7 +17,7 @@
 import mlrun
 import mlrun.artifacts
 from mlrun.config import config
-from mlrun.utils.helpers import is_legacy_artifact, parse_artifact_uri
+from mlrun.utils.helpers import parse_artifact_uri
 from ..common.helpers import parse_versioned_object_uri
 from ..platforms.iguazio import parse_path
@@ -167,11 +167,7 @@ def get_store_resource(
         )
         if resource.get("kind", "") == "link":
             # todo: support other link types (not just iter, move this to the db/api layer
-            link_iteration = (
-                resource.get("link_iteration", 0)
-                if is_legacy_artifact(resource)
-                else resource["spec"].get("link_iteration", 0)
-            )
+            link_iteration = resource["spec"].get("link_iteration", 0)
             resource = db.read_artifact(
                 key,

mlrun 1.7.0rc13__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc13py3-none-any.whl → 1.7.0rc15py3-none-any.whl