mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (76) hide show
  1. mlrun/__main__.py +0 -105
  2. mlrun/artifacts/__init__.py +1 -2
  3. mlrun/artifacts/base.py +8 -250
  4. mlrun/artifacts/dataset.py +1 -190
  5. mlrun/artifacts/manager.py +2 -41
  6. mlrun/artifacts/model.py +1 -140
  7. mlrun/artifacts/plots.py +1 -375
  8. mlrun/common/schemas/model_monitoring/__init__.py +4 -0
  9. mlrun/common/schemas/model_monitoring/constants.py +24 -3
  10. mlrun/common/schemas/model_monitoring/model_endpoints.py +13 -1
  11. mlrun/config.py +3 -3
  12. mlrun/data_types/to_pandas.py +4 -4
  13. mlrun/datastore/base.py +41 -9
  14. mlrun/datastore/datastore_profile.py +50 -3
  15. mlrun/datastore/inmem.py +2 -2
  16. mlrun/datastore/sources.py +43 -2
  17. mlrun/datastore/store_resources.py +2 -6
  18. mlrun/datastore/targets.py +106 -39
  19. mlrun/db/httpdb.py +4 -4
  20. mlrun/feature_store/__init__.py +0 -2
  21. mlrun/feature_store/api.py +12 -47
  22. mlrun/feature_store/feature_set.py +9 -0
  23. mlrun/feature_store/retrieval/base.py +9 -4
  24. mlrun/feature_store/retrieval/conversion.py +4 -4
  25. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  26. mlrun/feature_store/retrieval/job.py +2 -0
  27. mlrun/feature_store/retrieval/local_merger.py +2 -0
  28. mlrun/feature_store/retrieval/spark_merger.py +5 -0
  29. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
  30. mlrun/kfpops.py +5 -10
  31. mlrun/launcher/base.py +1 -1
  32. mlrun/launcher/client.py +1 -1
  33. mlrun/lists.py +2 -2
  34. mlrun/model.py +18 -9
  35. mlrun/model_monitoring/api.py +41 -18
  36. mlrun/model_monitoring/application.py +5 -305
  37. mlrun/model_monitoring/applications/__init__.py +11 -0
  38. mlrun/model_monitoring/applications/_application_steps.py +158 -0
  39. mlrun/model_monitoring/applications/base.py +282 -0
  40. mlrun/model_monitoring/applications/context.py +214 -0
  41. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  42. mlrun/model_monitoring/applications/histogram_data_drift.py +92 -77
  43. mlrun/model_monitoring/applications/results.py +99 -0
  44. mlrun/model_monitoring/controller.py +3 -1
  45. mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
  46. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +1 -1
  47. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +67 -4
  48. mlrun/model_monitoring/evidently_application.py +6 -118
  49. mlrun/model_monitoring/helpers.py +1 -1
  50. mlrun/model_monitoring/model_endpoint.py +3 -2
  51. mlrun/model_monitoring/stream_processing.py +2 -3
  52. mlrun/model_monitoring/writer.py +69 -39
  53. mlrun/platforms/iguazio.py +2 -2
  54. mlrun/projects/project.py +18 -31
  55. mlrun/render.py +2 -10
  56. mlrun/run.py +1 -3
  57. mlrun/runtimes/__init__.py +3 -3
  58. mlrun/runtimes/base.py +3 -3
  59. mlrun/runtimes/funcdoc.py +0 -28
  60. mlrun/runtimes/local.py +1 -1
  61. mlrun/runtimes/mpijob/__init__.py +0 -20
  62. mlrun/runtimes/mpijob/v1.py +1 -1
  63. mlrun/runtimes/nuclio/function.py +1 -1
  64. mlrun/runtimes/utils.py +1 -1
  65. mlrun/utils/helpers.py +27 -40
  66. mlrun/utils/notifications/notification/slack.py +4 -2
  67. mlrun/utils/notifications/notification_pusher.py +133 -14
  68. mlrun/utils/version/version.json +2 -2
  69. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/METADATA +2 -2
  70. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/RECORD +75 -71
  71. mlrun/runtimes/mpijob/v1alpha1.py +0 -29
  72. /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
  73. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/LICENSE +0 -0
  74. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/WHEEL +0 -0
  75. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/entry_points.txt +0 -0
  76. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/top_level.txt +0 -0
@@ -99,14 +99,17 @@ class FeatureSetFeatures(MonitoringStrEnum):
99
99
 
100
100
  class ApplicationEvent:
101
101
  APPLICATION_NAME = "application_name"
102
- CURRENT_STATS = "current_stats"
103
- FEATURE_STATS = "feature_stats"
104
- SAMPLE_PARQUET_PATH = "sample_parquet_path"
105
102
  START_INFER_TIME = "start_infer_time"
106
103
  END_INFER_TIME = "end_infer_time"
107
104
  LAST_REQUEST = "last_request"
108
105
  ENDPOINT_ID = "endpoint_id"
109
106
  OUTPUT_STREAM_URI = "output_stream_uri"
107
+ MLRUN_CONTEXT = "mlrun_context"
108
+
109
+ # Deprecated fields - TODO : delete in 1.9.0 (V1 app deprecation)
110
+ SAMPLE_PARQUET_PATH = "sample_parquet_path"
111
+ CURRENT_STATS = "current_stats"
112
+ FEATURE_STATS = "feature_stats"
110
113
 
111
114
 
112
115
  class WriterEvent(MonitoringStrEnum):
@@ -114,6 +117,21 @@ class WriterEvent(MonitoringStrEnum):
114
117
  ENDPOINT_ID = "endpoint_id"
115
118
  START_INFER_TIME = "start_infer_time"
116
119
  END_INFER_TIME = "end_infer_time"
120
+ EVENT_KIND = "event_kind" # metric or result
121
+ DATA = "data"
122
+
123
+
124
+ class WriterEventKind(MonitoringStrEnum):
125
+ METRIC = "metric"
126
+ RESULT = "result"
127
+
128
+
129
+ class MetricData(MonitoringStrEnum):
130
+ METRIC_NAME = "metric_name"
131
+ METRIC_VALUE = "metric_value"
132
+
133
+
134
+ class ResultData(MonitoringStrEnum):
117
135
  RESULT_NAME = "result_name"
118
136
  RESULT_VALUE = "result_value"
119
137
  RESULT_KIND = "result_kind"
@@ -303,6 +321,9 @@ class ModelMonitoringAppLabel:
303
321
  KEY = "mlrun__type"
304
322
  VAL = "mlrun__model-monitoring-application"
305
323
 
324
+ def __str__(self) -> str:
325
+ return f"{self.KEY}={self.VAL}"
326
+
306
327
 
307
328
  class ControllerPolicy:
308
329
  BASE_PERIOD = "base_period"
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  import enum
17
16
  import json
@@ -21,6 +20,7 @@ from pydantic import BaseModel, Field, validator
21
20
  from pydantic.main import Extra
22
21
 
23
22
  import mlrun.common.model_monitoring
23
+ import mlrun.common.types
24
24
 
25
25
  from ..object import ObjectKind, ObjectSpec, ObjectStatus
26
26
  from .constants import (
@@ -292,6 +292,18 @@ class ModelEndpointList(BaseModel):
292
292
  endpoints: list[ModelEndpoint] = []
293
293
 
294
294
 
295
+ class ModelEndpointMonitoringMetricType(mlrun.common.types.StrEnum):
296
+ RESULT = "result"
297
+
298
+
299
+ class ModelEndpointMonitoringMetric(BaseModel):
300
+ project: str
301
+ app: str
302
+ type: ModelEndpointMonitoringMetricType
303
+ name: str
304
+ full_name: str
305
+
306
+
295
307
  def _mapping_attributes(
296
308
  base_model: BaseModel,
297
309
  flattened_dictionary: dict,
mlrun/config.py CHANGED
@@ -361,7 +361,7 @@ default_config = {
361
361
  # is set to ClusterIP
362
362
  # ---------------------------------------------------------------------
363
363
  # Note: adding a mode requires special handling on
364
- # - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
364
+ # - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
365
365
  # - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
366
366
  "add_templated_ingress_host_mode": "never",
367
367
  "explicit_ack": "enabled",
@@ -554,7 +554,7 @@ default_config = {
554
554
  "nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
555
555
  # "authority" is optional and generalizes [userinfo "@"] host [":" port]
556
556
  "redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
557
- "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/nosql",
557
+ "dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
558
558
  },
559
559
  "default_targets": "parquet,nosql",
560
560
  "default_job_image": "mlrun/mlrun",
@@ -692,7 +692,7 @@ default_config = {
692
692
  "grafana_url": "",
693
693
  "alerts": {
694
694
  # supported modes: "enabled", "disabled".
695
- "mode": "disabled"
695
+ "mode": "enabled"
696
696
  },
697
697
  "auth_with_client_id": {
698
698
  "enabled": False,
@@ -65,10 +65,10 @@ def toPandas(spark_df):
65
65
  msg = (
66
66
  "toPandas attempted Arrow optimization because "
67
67
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
68
- "failed by the reason below:\n %s\n"
68
+ f"failed by the reason below:\n {e}\n"
69
69
  "Attempting non-optimization as "
70
70
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
71
- "true." % str(e)
71
+ "true."
72
72
  )
73
73
  warnings.warn(msg)
74
74
  use_arrow = False
@@ -78,7 +78,7 @@ def toPandas(spark_df):
78
78
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
79
79
  "reached the error below and will not continue because automatic fallback "
80
80
  "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
81
- "false.\n %s" % str(e)
81
+ f"false.\n {e}"
82
82
  )
83
83
  warnings.warn(msg)
84
84
  raise
@@ -144,7 +144,7 @@ def toPandas(spark_df):
144
144
  "reached the error below and can not continue. Note that "
145
145
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
146
146
  "effect on failures in the middle of "
147
- "computation.\n %s" % str(e)
147
+ f"computation.\n {e}"
148
148
  )
149
149
  warnings.warn(msg)
150
150
  raise
mlrun/datastore/base.py CHANGED
@@ -179,11 +179,23 @@ class DataStore:
179
179
  return {}
180
180
 
181
181
  @staticmethod
182
- def _parquet_reader(df_module, url, file_system, time_column, start_time, end_time):
182
+ def _parquet_reader(
183
+ df_module,
184
+ url,
185
+ file_system,
186
+ time_column,
187
+ start_time,
188
+ end_time,
189
+ additional_filters,
190
+ ):
183
191
  from storey.utils import find_filters, find_partitions
184
192
 
185
193
  def set_filters(
186
- partitions_time_attributes, start_time_inner, end_time_inner, kwargs
194
+ partitions_time_attributes,
195
+ start_time_inner,
196
+ end_time_inner,
197
+ filters_inner,
198
+ kwargs,
187
199
  ):
188
200
  filters = []
189
201
  find_filters(
@@ -193,20 +205,23 @@ class DataStore:
193
205
  filters,
194
206
  time_column,
195
207
  )
208
+ if filters and filters_inner:
209
+ filters[0] += filters_inner
210
+
196
211
  kwargs["filters"] = filters
197
212
 
198
213
  def reader(*args, **kwargs):
199
- if start_time or end_time:
200
- if time_column is None:
201
- raise mlrun.errors.MLRunInvalidArgumentError(
202
- "When providing start_time or end_time, must provide time_column"
203
- )
204
-
214
+ if time_column is None and (start_time or end_time):
215
+ raise mlrun.errors.MLRunInvalidArgumentError(
216
+ "When providing start_time or end_time, must provide time_column"
217
+ )
218
+ if start_time or end_time or additional_filters:
205
219
  partitions_time_attributes = find_partitions(url, file_system)
206
220
  set_filters(
207
221
  partitions_time_attributes,
208
222
  start_time,
209
223
  end_time,
224
+ additional_filters,
210
225
  kwargs,
211
226
  )
212
227
  try:
@@ -217,6 +232,7 @@ class DataStore:
217
232
  ):
218
233
  raise ex
219
234
 
235
+ # TODO: fix timezone issue (ML-6308)
220
236
  if start_time.tzinfo:
221
237
  start_time_inner = start_time.replace(tzinfo=None)
222
238
  end_time_inner = end_time.replace(tzinfo=None)
@@ -228,6 +244,7 @@ class DataStore:
228
244
  partitions_time_attributes,
229
245
  start_time_inner,
230
246
  end_time_inner,
247
+ additional_filters,
231
248
  kwargs,
232
249
  )
233
250
  return df_module.read_parquet(*args, **kwargs)
@@ -246,6 +263,7 @@ class DataStore:
246
263
  start_time=None,
247
264
  end_time=None,
248
265
  time_column=None,
266
+ additional_filters=None,
249
267
  **kwargs,
250
268
  ):
251
269
  df_module = df_module or pd
@@ -310,7 +328,13 @@ class DataStore:
310
328
  kwargs["columns"] = columns
311
329
 
312
330
  reader = self._parquet_reader(
313
- df_module, url, file_system, time_column, start_time, end_time
331
+ df_module,
332
+ url,
333
+ file_system,
334
+ time_column,
335
+ start_time,
336
+ end_time,
337
+ additional_filters,
314
338
  )
315
339
 
316
340
  elif file_url.endswith(".json") or format == "json":
@@ -539,6 +563,7 @@ class DataItem:
539
563
  time_column=None,
540
564
  start_time=None,
541
565
  end_time=None,
566
+ additional_filters=None,
542
567
  **kwargs,
543
568
  ):
544
569
  """return a dataframe object (generated from the dataitem).
@@ -550,6 +575,12 @@ class DataItem:
550
575
  :param end_time: filters out data after this time
551
576
  :param time_column: Store timestamp_key will be used if None.
552
577
  The results will be filtered by this column and start_time & end_time.
578
+ :param additional_filters: List of additional_filter conditions as tuples.
579
+ Each tuple should be in the format (column_name, operator, value).
580
+ Supported operators: "=", ">=", "<=", ">", "<".
581
+ Example: [("Product", "=", "Computer")]
582
+ For all supported filters, please see:
583
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
553
584
  """
554
585
  df = self._store.as_df(
555
586
  self._url,
@@ -560,6 +591,7 @@ class DataItem:
560
591
  time_column=time_column,
561
592
  start_time=start_time,
562
593
  end_time=end_time,
594
+ additional_filters=additional_filters,
563
595
  **kwargs,
564
596
  )
565
597
  return df
@@ -185,6 +185,17 @@ class DatastoreProfileS3(DatastoreProfile):
185
185
  assume_role_arn: typing.Optional[str] = None
186
186
  access_key_id: typing.Optional[str] = None
187
187
  secret_key: typing.Optional[str] = None
188
+ bucket: typing.Optional[str] = None
189
+
190
+ @pydantic.validator("bucket")
191
+ def check_bucket(cls, v):
192
+ if not v:
193
+ warnings.warn(
194
+ "The 'bucket' attribute will be mandatory starting from version 1.9",
195
+ FutureWarning,
196
+ stacklevel=2,
197
+ )
198
+ return v
188
199
 
189
200
  def secrets(self) -> dict:
190
201
  res = {}
@@ -203,7 +214,13 @@ class DatastoreProfileS3(DatastoreProfile):
203
214
  return res
204
215
 
205
216
  def url(self, subpath):
206
- return f"s3:/{subpath}"
217
+ # TODO: There is an inconsistency with DatastoreProfileGCS. In DatastoreProfileGCS,
218
+ # we assume that the subpath can begin without a '/' character,
219
+ # while here we assume it always starts with one.
220
+ if self.bucket:
221
+ return f"s3://{self.bucket}{subpath}"
222
+ else:
223
+ return f"s3:/{subpath}"
207
224
 
208
225
 
209
226
  class DatastoreProfileRedis(DatastoreProfile):
@@ -272,6 +289,17 @@ class DatastoreProfileGCS(DatastoreProfile):
272
289
  _private_attributes = ("gcp_credentials",)
273
290
  credentials_path: typing.Optional[str] = None # path to file.
274
291
  gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
292
+ bucket: typing.Optional[str] = None
293
+
294
+ @pydantic.validator("bucket")
295
+ def check_bucket(cls, v):
296
+ if not v:
297
+ warnings.warn(
298
+ "The 'bucket' attribute will be mandatory starting from version 1.9",
299
+ FutureWarning,
300
+ stacklevel=2,
301
+ )
302
+ return v
275
303
 
276
304
  @pydantic.validator("gcp_credentials", pre=True, always=True)
277
305
  def convert_dict_to_json(cls, v):
@@ -280,10 +308,15 @@ class DatastoreProfileGCS(DatastoreProfile):
280
308
  return v
281
309
 
282
310
  def url(self, subpath) -> str:
311
+ # TODO: but there's something wrong with the subpath being assumed to not start with a slash here,
312
+ # but the opposite assumption is made in S3.
283
313
  if subpath.startswith("/"):
284
314
  # in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
285
315
  subpath = subpath[1:]
286
- return f"gcs://{subpath}"
316
+ if self.bucket:
317
+ return f"gcs://{self.bucket}/{subpath}"
318
+ else:
319
+ return f"gcs://{subpath}"
287
320
 
288
321
  def secrets(self) -> dict:
289
322
  res = {}
@@ -311,12 +344,26 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
311
344
  client_secret: typing.Optional[str] = None
312
345
  sas_token: typing.Optional[str] = None
313
346
  credential: typing.Optional[str] = None
347
+ bucket: typing.Optional[str] = None
348
+
349
+ @pydantic.validator("bucket")
350
+ def check_bucket(cls, v):
351
+ if not v:
352
+ warnings.warn(
353
+ "The 'bucket' attribute will be mandatory starting from version 1.9",
354
+ FutureWarning,
355
+ stacklevel=2,
356
+ )
357
+ return v
314
358
 
315
359
  def url(self, subpath) -> str:
316
360
  if subpath.startswith("/"):
317
361
  # in azure the path after schema is starts with bucket, wherefore it should not start with "/".
318
362
  subpath = subpath[1:]
319
- return f"az://{subpath}"
363
+ if self.bucket:
364
+ return f"az://{self.bucket}/{subpath}"
365
+ else:
366
+ return f"az://{subpath}"
320
367
 
321
368
  def secrets(self) -> dict:
322
369
  res = {}
mlrun/datastore/inmem.py CHANGED
@@ -80,8 +80,8 @@ class InMemoryStore(DataStore):
80
80
  reader = df_module.read_json
81
81
  else:
82
82
  raise mlrun.errors.MLRunInvalidArgumentError(f"file type unhandled {url}")
83
- # InMemoryStore store do not filter on time
84
- for field in ["time_column", "start_time", "end_time"]:
83
+ # InMemoryStore store don't pass filters
84
+ for field in ["time_column", "start_time", "end_time", "additional_filters"]:
85
85
  kwargs.pop(field, None)
86
86
 
87
87
  return reader(item, **kwargs)
@@ -102,8 +102,12 @@ class BaseSourceDriver(DataSource):
102
102
  start_time=None,
103
103
  end_time=None,
104
104
  time_field=None,
105
+ additional_filters=None,
105
106
  ):
106
107
  """return the source data as dataframe"""
108
+ mlrun.utils.helpers.additional_filters_warning(
109
+ additional_filters, self.__class__
110
+ )
107
111
  return mlrun.store_manager.object(url=self.path).as_df(
108
112
  columns=columns,
109
113
  df_module=df_module,
@@ -245,7 +249,11 @@ class CSVSource(BaseSourceDriver):
245
249
  start_time=None,
246
250
  end_time=None,
247
251
  time_field=None,
252
+ additional_filters=None,
248
253
  ):
254
+ mlrun.utils.helpers.additional_filters_warning(
255
+ additional_filters, self.__class__
256
+ )
249
257
  reader_args = self.attributes.get("reader_args", {})
250
258
  return mlrun.store_manager.object(url=self.path).as_df(
251
259
  columns=columns,
@@ -281,6 +289,12 @@ class ParquetSource(BaseSourceDriver):
281
289
  :parameter start_time: filters out data before this time
282
290
  :parameter end_time: filters out data after this time
283
291
  :parameter attributes: additional parameters to pass to storey.
292
+ :param additional_filters: List of additional_filter conditions as tuples.
293
+ Each tuple should be in the format (column_name, operator, value).
294
+ Supported operators: "=", ">=", "<=", ">", "<".
295
+ Example: [("Product", "=", "Computer")]
296
+ For all supported filters, please see:
297
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
284
298
  """
285
299
 
286
300
  kind = "parquet"
@@ -297,6 +311,7 @@ class ParquetSource(BaseSourceDriver):
297
311
  schedule: str = None,
298
312
  start_time: Optional[Union[datetime, str]] = None,
299
313
  end_time: Optional[Union[datetime, str]] = None,
314
+ additional_filters: Optional[list[tuple]] = None,
300
315
  ):
301
316
  super().__init__(
302
317
  name,
@@ -308,6 +323,7 @@ class ParquetSource(BaseSourceDriver):
308
323
  start_time,
309
324
  end_time,
310
325
  )
326
+ self.additional_filters = additional_filters
311
327
 
312
328
  @property
313
329
  def start_time(self):
@@ -341,6 +357,7 @@ class ParquetSource(BaseSourceDriver):
341
357
  start_time=None,
342
358
  end_time=None,
343
359
  context=None,
360
+ additional_filters=None,
344
361
  ):
345
362
  import storey
346
363
 
@@ -358,6 +375,7 @@ class ParquetSource(BaseSourceDriver):
358
375
  end_filter=self.end_time,
359
376
  start_filter=self.start_time,
360
377
  filter_column=self.time_field or time_field,
378
+ additional_filters=self.additional_filters or additional_filters,
361
379
  **attributes,
362
380
  )
363
381
 
@@ -380,6 +398,7 @@ class ParquetSource(BaseSourceDriver):
380
398
  start_time=None,
381
399
  end_time=None,
382
400
  time_field=None,
401
+ additional_filters=None,
383
402
  ):
384
403
  reader_args = self.attributes.get("reader_args", {})
385
404
  return mlrun.store_manager.object(url=self.path).as_df(
@@ -389,6 +408,7 @@ class ParquetSource(BaseSourceDriver):
389
408
  end_time=end_time or self.end_time,
390
409
  time_column=time_field or self.time_field,
391
410
  format="parquet",
411
+ additional_filters=additional_filters or self.additional_filters,
392
412
  **reader_args,
393
413
  )
394
414
 
@@ -519,10 +539,15 @@ class BigQuerySource(BaseSourceDriver):
519
539
  start_time=None,
520
540
  end_time=None,
521
541
  time_field=None,
542
+ additional_filters=None,
522
543
  ):
523
544
  from google.cloud import bigquery
524
545
  from google.cloud.bigquery_storage_v1 import BigQueryReadClient
525
546
 
547
+ mlrun.utils.helpers.additional_filters_warning(
548
+ additional_filters, self.__class__
549
+ )
550
+
526
551
  def schema_to_dtypes(schema):
527
552
  from mlrun.data_types.data_types import gbq_to_pandas_dtype
528
553
 
@@ -562,7 +587,6 @@ class BigQuerySource(BaseSourceDriver):
562
587
  else:
563
588
  df = rows_iterator.to_dataframe(dtypes=dtypes)
564
589
 
565
- # TODO : filter as part of the query
566
590
  return select_columns_from_df(
567
591
  filter_df_start_end_time(
568
592
  df,
@@ -740,7 +764,19 @@ class DataFrameSource:
740
764
  context=self.context or context,
741
765
  )
742
766
 
743
- def to_dataframe(self, **kwargs):
767
+ def to_dataframe(
768
+ self,
769
+ columns=None,
770
+ df_module=None,
771
+ entities=None,
772
+ start_time=None,
773
+ end_time=None,
774
+ time_field=None,
775
+ additional_filters=None,
776
+ ):
777
+ mlrun.utils.helpers.additional_filters_warning(
778
+ additional_filters, self.__class__
779
+ )
744
780
  return self._df
745
781
 
746
782
  def is_iterator(self):
@@ -935,6 +971,7 @@ class KafkaSource(OnlineSource):
935
971
  start_time=None,
936
972
  end_time=None,
937
973
  time_field=None,
974
+ additional_filters=None,
938
975
  ):
939
976
  raise mlrun.MLRunInvalidArgumentError(
940
977
  "KafkaSource does not support batch processing"
@@ -1075,9 +1112,13 @@ class SQLSource(BaseSourceDriver):
1075
1112
  start_time=None,
1076
1113
  end_time=None,
1077
1114
  time_field=None,
1115
+ additional_filters=None,
1078
1116
  ):
1079
1117
  import sqlalchemy as sqlalchemy
1080
1118
 
1119
+ mlrun.utils.helpers.additional_filters_warning(
1120
+ additional_filters, self.__class__
1121
+ )
1081
1122
  db_path = self.attributes.get("db_path")
1082
1123
  table_name = self.attributes.get("table_name")
1083
1124
  parse_dates = self.attributes.get("parse_dates")
@@ -17,7 +17,7 @@
17
17
  import mlrun
18
18
  import mlrun.artifacts
19
19
  from mlrun.config import config
20
- from mlrun.utils.helpers import is_legacy_artifact, parse_artifact_uri
20
+ from mlrun.utils.helpers import parse_artifact_uri
21
21
 
22
22
  from ..common.helpers import parse_versioned_object_uri
23
23
  from ..platforms.iguazio import parse_path
@@ -167,11 +167,7 @@ def get_store_resource(
167
167
  )
168
168
  if resource.get("kind", "") == "link":
169
169
  # todo: support other link types (not just iter, move this to the db/api layer
170
- link_iteration = (
171
- resource.get("link_iteration", 0)
172
- if is_legacy_artifact(resource)
173
- else resource["spec"].get("link_iteration", 0)
174
- )
170
+ link_iteration = resource["spec"].get("link_iteration", 0)
175
171
 
176
172
  resource = db.read_artifact(
177
173
  key,