mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (76) hide show
  1. mlrun/__main__.py +0 -105
  2. mlrun/artifacts/__init__.py +1 -2
  3. mlrun/artifacts/base.py +8 -250
  4. mlrun/artifacts/dataset.py +1 -190
  5. mlrun/artifacts/manager.py +2 -41
  6. mlrun/artifacts/model.py +1 -140
  7. mlrun/artifacts/plots.py +1 -375
  8. mlrun/common/schemas/model_monitoring/__init__.py +4 -0
  9. mlrun/common/schemas/model_monitoring/constants.py +24 -3
  10. mlrun/common/schemas/model_monitoring/model_endpoints.py +13 -1
  11. mlrun/config.py +3 -3
  12. mlrun/data_types/to_pandas.py +4 -4
  13. mlrun/datastore/base.py +41 -9
  14. mlrun/datastore/datastore_profile.py +50 -3
  15. mlrun/datastore/inmem.py +2 -2
  16. mlrun/datastore/sources.py +43 -2
  17. mlrun/datastore/store_resources.py +2 -6
  18. mlrun/datastore/targets.py +106 -39
  19. mlrun/db/httpdb.py +4 -4
  20. mlrun/feature_store/__init__.py +0 -2
  21. mlrun/feature_store/api.py +12 -47
  22. mlrun/feature_store/feature_set.py +9 -0
  23. mlrun/feature_store/retrieval/base.py +9 -4
  24. mlrun/feature_store/retrieval/conversion.py +4 -4
  25. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  26. mlrun/feature_store/retrieval/job.py +2 -0
  27. mlrun/feature_store/retrieval/local_merger.py +2 -0
  28. mlrun/feature_store/retrieval/spark_merger.py +5 -0
  29. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
  30. mlrun/kfpops.py +5 -10
  31. mlrun/launcher/base.py +1 -1
  32. mlrun/launcher/client.py +1 -1
  33. mlrun/lists.py +2 -2
  34. mlrun/model.py +18 -9
  35. mlrun/model_monitoring/api.py +41 -18
  36. mlrun/model_monitoring/application.py +5 -305
  37. mlrun/model_monitoring/applications/__init__.py +11 -0
  38. mlrun/model_monitoring/applications/_application_steps.py +158 -0
  39. mlrun/model_monitoring/applications/base.py +282 -0
  40. mlrun/model_monitoring/applications/context.py +214 -0
  41. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  42. mlrun/model_monitoring/applications/histogram_data_drift.py +92 -77
  43. mlrun/model_monitoring/applications/results.py +99 -0
  44. mlrun/model_monitoring/controller.py +3 -1
  45. mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
  46. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +1 -1
  47. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +67 -4
  48. mlrun/model_monitoring/evidently_application.py +6 -118
  49. mlrun/model_monitoring/helpers.py +1 -1
  50. mlrun/model_monitoring/model_endpoint.py +3 -2
  51. mlrun/model_monitoring/stream_processing.py +2 -3
  52. mlrun/model_monitoring/writer.py +69 -39
  53. mlrun/platforms/iguazio.py +2 -2
  54. mlrun/projects/project.py +18 -31
  55. mlrun/render.py +2 -10
  56. mlrun/run.py +1 -3
  57. mlrun/runtimes/__init__.py +3 -3
  58. mlrun/runtimes/base.py +3 -3
  59. mlrun/runtimes/funcdoc.py +0 -28
  60. mlrun/runtimes/local.py +1 -1
  61. mlrun/runtimes/mpijob/__init__.py +0 -20
  62. mlrun/runtimes/mpijob/v1.py +1 -1
  63. mlrun/runtimes/nuclio/function.py +1 -1
  64. mlrun/runtimes/utils.py +1 -1
  65. mlrun/utils/helpers.py +27 -40
  66. mlrun/utils/notifications/notification/slack.py +4 -2
  67. mlrun/utils/notifications/notification_pusher.py +133 -14
  68. mlrun/utils/version/version.json +2 -2
  69. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/METADATA +2 -2
  70. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/RECORD +75 -71
  71. mlrun/runtimes/mpijob/v1alpha1.py +0 -29
  72. /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
  73. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/LICENSE +0 -0
  74. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/WHEEL +0 -0
  75. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/entry_points.txt +0 -0
  76. {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/top_level.txt +0 -0
@@ -656,6 +656,29 @@ class BaseStoreTarget(DataTargetBase):
656
656
  def _target_path_object(self):
657
657
  """return the actual/computed target path"""
658
658
  is_single_file = hasattr(self, "is_single_file") and self.is_single_file()
659
+
660
+ if self._resource and self.path:
661
+ parsed_url = urlparse(self.path)
662
+ # When the URL consists only from scheme and endpoint and no path,
663
+ # make a default path for DS and redis targets.
664
+ # Also ignore KafkaTarget when it uses the ds scheme (no default path for KafkaTarget)
665
+ if (
666
+ not isinstance(self, KafkaTarget)
667
+ and parsed_url.scheme in ["ds", "redis", "rediss"]
668
+ and (not parsed_url.path or parsed_url.path == "/")
669
+ ):
670
+ return TargetPathObject(
671
+ _get_target_path(
672
+ self,
673
+ self._resource,
674
+ self.run_id is not None,
675
+ netloc=parsed_url.netloc,
676
+ scheme=parsed_url.scheme,
677
+ ),
678
+ self.run_id,
679
+ is_single_file,
680
+ )
681
+
659
682
  return self.get_path() or (
660
683
  TargetPathObject(
661
684
  _get_target_path(self, self._resource, self.run_id is not None),
@@ -714,9 +737,13 @@ class BaseStoreTarget(DataTargetBase):
714
737
  start_time=None,
715
738
  end_time=None,
716
739
  time_column=None,
740
+ additional_filters=None,
717
741
  **kwargs,
718
742
  ):
719
743
  """return the target data as dataframe"""
744
+ mlrun.utils.helpers.additional_filters_warning(
745
+ additional_filters, self.__class__
746
+ )
720
747
  return mlrun.get_dataitem(self.get_target_path()).as_df(
721
748
  columns=columns,
722
749
  df_module=df_module,
@@ -961,6 +988,7 @@ class ParquetTarget(BaseStoreTarget):
961
988
  start_time=None,
962
989
  end_time=None,
963
990
  time_column=None,
991
+ additional_filters=None,
964
992
  **kwargs,
965
993
  ):
966
994
  """return the target data as dataframe"""
@@ -971,6 +999,7 @@ class ParquetTarget(BaseStoreTarget):
971
999
  start_time=start_time,
972
1000
  end_time=end_time,
973
1001
  time_column=time_column,
1002
+ additional_filters=additional_filters,
974
1003
  **kwargs,
975
1004
  )
976
1005
  if not columns:
@@ -1101,8 +1130,12 @@ class CSVTarget(BaseStoreTarget):
1101
1130
  start_time=None,
1102
1131
  end_time=None,
1103
1132
  time_column=None,
1133
+ additional_filters=None,
1104
1134
  **kwargs,
1105
1135
  ):
1136
+ mlrun.utils.helpers.additional_filters_warning(
1137
+ additional_filters, self.__class__
1138
+ )
1106
1139
  df = super().as_df(
1107
1140
  columns=columns,
1108
1141
  df_module=df_module,
@@ -1209,6 +1242,7 @@ class SnowflakeTarget(BaseStoreTarget):
1209
1242
  start_time=None,
1210
1243
  end_time=None,
1211
1244
  time_column=None,
1245
+ additional_filters=None,
1212
1246
  **kwargs,
1213
1247
  ):
1214
1248
  raise NotImplementedError()
@@ -1275,7 +1309,17 @@ class NoSqlBaseTarget(BaseStoreTarget):
1275
1309
  def get_dask_options(self):
1276
1310
  return {"format": "csv"}
1277
1311
 
1278
- def as_df(self, columns=None, df_module=None, **kwargs):
1312
+ def as_df(
1313
+ self,
1314
+ columns=None,
1315
+ df_module=None,
1316
+ entities=None,
1317
+ start_time=None,
1318
+ end_time=None,
1319
+ time_column=None,
1320
+ additional_filters=None,
1321
+ **kwargs,
1322
+ ):
1279
1323
  raise NotImplementedError()
1280
1324
 
1281
1325
  def write_dataframe(
@@ -1390,39 +1434,6 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1390
1434
  support_spark = True
1391
1435
  writer_step_name = "RedisNoSqlTarget"
1392
1436
 
1393
- @property
1394
- def _target_path_object(self):
1395
- url = self.path or mlrun.mlconf.redis.url
1396
- if self._resource and url:
1397
- parsed_url = urlparse(url)
1398
- if not parsed_url.path or parsed_url.path == "/":
1399
- kind_prefix = (
1400
- "sets"
1401
- if self._resource.kind
1402
- == mlrun.common.schemas.ObjectKind.feature_set
1403
- else "vectors"
1404
- )
1405
- kind = self.kind
1406
- name = self._resource.metadata.name
1407
- project = (
1408
- self._resource.metadata.project or mlrun.mlconf.default_project
1409
- )
1410
- data_prefix = get_default_prefix_for_target(kind).format(
1411
- ds_profile_name=parsed_url.netloc,
1412
- authority=parsed_url.netloc,
1413
- project=project,
1414
- kind=kind,
1415
- name=name,
1416
- )
1417
- if url.startswith("rediss://"):
1418
- data_prefix = data_prefix.replace("redis://", "rediss://", 1)
1419
- if not self.run_id:
1420
- version = self._resource.metadata.tag or "latest"
1421
- name = f"{name}-{version}"
1422
- url = f"{data_prefix}/{kind_prefix}/{name}"
1423
- return TargetPathObject(url, self.run_id, False)
1424
- return super()._target_path_object
1425
-
1426
1437
  # Fetch server url from the RedisNoSqlTarget::__init__() 'path' parameter.
1427
1438
  # If not set fetch it from 'mlrun.mlconf.redis.url' (MLRUN_REDIS__URL environment variable).
1428
1439
  # Then look for username and password at REDIS_xxx secrets
@@ -1544,7 +1555,17 @@ class StreamTarget(BaseStoreTarget):
1544
1555
  **self.attributes,
1545
1556
  )
1546
1557
 
1547
- def as_df(self, columns=None, df_module=None, **kwargs):
1558
+ def as_df(
1559
+ self,
1560
+ columns=None,
1561
+ df_module=None,
1562
+ entities=None,
1563
+ start_time=None,
1564
+ end_time=None,
1565
+ time_column=None,
1566
+ additional_filters=None,
1567
+ **kwargs,
1568
+ ):
1548
1569
  raise NotImplementedError()
1549
1570
 
1550
1571
 
@@ -1649,7 +1670,17 @@ class KafkaTarget(BaseStoreTarget):
1649
1670
  **attributes,
1650
1671
  )
1651
1672
 
1652
- def as_df(self, columns=None, df_module=None, **kwargs):
1673
+ def as_df(
1674
+ self,
1675
+ columns=None,
1676
+ df_module=None,
1677
+ entities=None,
1678
+ start_time=None,
1679
+ end_time=None,
1680
+ time_column=None,
1681
+ additional_filters=None,
1682
+ **kwargs,
1683
+ ):
1653
1684
  raise NotImplementedError()
1654
1685
 
1655
1686
  def purge(self):
@@ -1696,7 +1727,17 @@ class TSDBTarget(BaseStoreTarget):
1696
1727
  **self.attributes,
1697
1728
  )
1698
1729
 
1699
- def as_df(self, columns=None, df_module=None, **kwargs):
1730
+ def as_df(
1731
+ self,
1732
+ columns=None,
1733
+ df_module=None,
1734
+ entities=None,
1735
+ start_time=None,
1736
+ end_time=None,
1737
+ time_column=None,
1738
+ additional_filters=None,
1739
+ **kwargs,
1740
+ ):
1700
1741
  raise NotImplementedError()
1701
1742
 
1702
1743
  def write_dataframe(
@@ -1807,11 +1848,16 @@ class DFTarget(BaseStoreTarget):
1807
1848
  self,
1808
1849
  columns=None,
1809
1850
  df_module=None,
1851
+ entities=None,
1810
1852
  start_time=None,
1811
1853
  end_time=None,
1812
1854
  time_column=None,
1855
+ additional_filters=None,
1813
1856
  **kwargs,
1814
1857
  ):
1858
+ mlrun.utils.helpers.additional_filters_warning(
1859
+ additional_filters, self.__class__
1860
+ )
1815
1861
  return select_columns_from_df(
1816
1862
  filter_df_start_end_time(
1817
1863
  self._df,
@@ -1986,6 +2032,7 @@ class SQLTarget(BaseStoreTarget):
1986
2032
  start_time=None,
1987
2033
  end_time=None,
1988
2034
  time_column=None,
2035
+ additional_filters=None,
1989
2036
  **kwargs,
1990
2037
  ):
1991
2038
  try:
@@ -1994,6 +2041,10 @@ class SQLTarget(BaseStoreTarget):
1994
2041
  except (ModuleNotFoundError, ImportError) as exc:
1995
2042
  self._raise_sqlalchemy_import_error(exc)
1996
2043
 
2044
+ mlrun.utils.helpers.additional_filters_warning(
2045
+ additional_filters, self.__class__
2046
+ )
2047
+
1997
2048
  db_path, table_name, _, _, _, _ = self._parse_url()
1998
2049
  engine = sqlalchemy.create_engine(db_path)
1999
2050
  parse_dates: Optional[list[str]] = self.attributes.get("parse_dates")
@@ -2140,7 +2191,7 @@ kind_to_driver = {
2140
2191
  }
2141
2192
 
2142
2193
 
2143
- def _get_target_path(driver, resource, run_id_mode=False):
2194
+ def _get_target_path(driver, resource, run_id_mode=False, netloc=None, scheme=""):
2144
2195
  """return the default target path given the resource and target kind"""
2145
2196
  kind = driver.kind
2146
2197
  suffix = driver.suffix
@@ -2157,11 +2208,27 @@ def _get_target_path(driver, resource, run_id_mode=False):
2157
2208
  )
2158
2209
  name = resource.metadata.name
2159
2210
  project = resource.metadata.project or mlrun.mlconf.default_project
2160
- data_prefix = get_default_prefix_for_target(kind).format(
2211
+
2212
+ default_kind_name = kind
2213
+ if scheme == "ds":
2214
+ # "dsnosql" is not an actual target like Parquet or Redis; rather, it serves
2215
+ # as a placeholder that can be used in any specified target
2216
+ default_kind_name = "dsnosql"
2217
+ if scheme == "redis" or scheme == "rediss":
2218
+ default_kind_name = TargetTypes.redisnosql
2219
+
2220
+ netloc = netloc or ""
2221
+ data_prefix = get_default_prefix_for_target(default_kind_name).format(
2222
+ ds_profile_name=netloc, # In case of ds profile, set its the name
2223
+ authority=netloc, # In case of redis, replace {authority} with netloc
2161
2224
  project=project,
2162
2225
  kind=kind,
2163
2226
  name=name,
2164
2227
  )
2228
+
2229
+ if scheme == "rediss":
2230
+ data_prefix = data_prefix.replace("redis://", "rediss://", 1)
2231
+
2165
2232
  # todo: handle ver tag changes, may need to copy files?
2166
2233
  if not run_id_mode:
2167
2234
  version = resource.metadata.tag
mlrun/db/httpdb.py CHANGED
@@ -659,10 +659,10 @@ class HTTPRunDB(RunDBInterface):
659
659
  nil_resp += 1
660
660
 
661
661
  if watch and state in [
662
- mlrun.runtimes.constants.RunStates.pending,
663
- mlrun.runtimes.constants.RunStates.running,
664
- mlrun.runtimes.constants.RunStates.created,
665
- mlrun.runtimes.constants.RunStates.aborting,
662
+ mlrun.common.runtimes.constants.RunStates.pending,
663
+ mlrun.common.runtimes.constants.RunStates.running,
664
+ mlrun.common.runtimes.constants.RunStates.created,
665
+ mlrun.common.runtimes.constants.RunStates.aborting,
666
666
  ]:
667
667
  continue
668
668
  else:
@@ -19,7 +19,6 @@ __all__ = [
19
19
  "get_online_feature_service",
20
20
  "ingest",
21
21
  "preview",
22
- "deploy_ingestion_service",
23
22
  "deploy_ingestion_service_v2",
24
23
  "delete_feature_set",
25
24
  "delete_feature_vector",
@@ -41,7 +40,6 @@ from ..features import Entity, Feature
41
40
  from .api import (
42
41
  delete_feature_set,
43
42
  delete_feature_vector,
44
- deploy_ingestion_service,
45
43
  deploy_ingestion_service_v2,
46
44
  get_feature_set,
47
45
  get_feature_vector,
@@ -113,6 +113,7 @@ def get_offline_features(
113
113
  order_by: Union[str, list[str]] = None,
114
114
  spark_service: str = None,
115
115
  timestamp_for_filtering: Union[str, dict[str, str]] = None,
116
+ additional_filters: list = None,
116
117
  ):
117
118
  """retrieve offline feature vector results
118
119
 
@@ -175,6 +176,13 @@ def get_offline_features(
175
176
  By default, the filter executes on the timestamp_key of each feature set.
176
177
  Note: the time filtering is performed on each feature set before the
177
178
  merge process using start_time and end_time params.
179
+ :param additional_filters: List of additional_filter conditions as tuples.
180
+ Each tuple should be in the format (column_name, operator, value).
181
+ Supported operators: "=", ">=", "<=", ">", "<".
182
+ Example: [("Product", "=", "Computer")]
183
+ For all supported filters, please see:
184
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
185
+
178
186
 
179
187
  """
180
188
  return _get_offline_features(
@@ -194,6 +202,7 @@ def get_offline_features(
194
202
  order_by,
195
203
  spark_service,
196
204
  timestamp_for_filtering,
205
+ additional_filters,
197
206
  )
198
207
 
199
208
 
@@ -214,6 +223,7 @@ def _get_offline_features(
214
223
  order_by: Union[str, list[str]] = None,
215
224
  spark_service: str = None,
216
225
  timestamp_for_filtering: Union[str, dict[str, str]] = None,
226
+ additional_filters=None,
217
227
  ) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
218
228
  if entity_rows is None and entity_timestamp_column is not None:
219
229
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -252,6 +262,7 @@ def _get_offline_features(
252
262
  start_time=start_time,
253
263
  end_time=end_time,
254
264
  timestamp_for_filtering=timestamp_for_filtering,
265
+ additional_filters=additional_filters,
255
266
  )
256
267
 
257
268
  merger = merger_engine(feature_vector, **(engine_args or {}))
@@ -267,6 +278,7 @@ def _get_offline_features(
267
278
  update_stats=update_stats,
268
279
  query=query,
269
280
  order_by=order_by,
281
+ additional_filters=additional_filters,
270
282
  )
271
283
 
272
284
 
@@ -1005,53 +1017,6 @@ def _deploy_ingestion_service_v2(
1005
1017
  return function.deploy(), function
1006
1018
 
1007
1019
 
1008
- @deprecated(
1009
- version="1.5.0",
1010
- reason="'deploy_ingestion_service' will be removed in 1.7.0, use 'deploy_ingestion_service_v2' instead",
1011
- category=FutureWarning,
1012
- )
1013
- def deploy_ingestion_service(
1014
- featureset: Union[FeatureSet, str],
1015
- source: DataSource = None,
1016
- targets: list[DataTargetBase] = None,
1017
- name: str = None,
1018
- run_config: RunConfig = None,
1019
- verbose=False,
1020
- ) -> str:
1021
- """Start real-time ingestion service using nuclio function
1022
-
1023
- Deploy a real-time function implementing feature ingestion pipeline
1024
- the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
1025
-
1026
- the `run_config` parameter allow specifying the function and job configuration,
1027
- see: :py:class:`~mlrun.feature_store.RunConfig`
1028
-
1029
- example::
1030
-
1031
- source = HTTPSource()
1032
- func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
1033
- config = RunConfig(function=func)
1034
- my_set.deploy_ingestion_service(source, run_config=config)
1035
-
1036
- :param featureset: feature set object or uri
1037
- :param source: data source object describing the online or offline source
1038
- :param targets: list of data target objects
1039
- :param name: name for the job/function
1040
- :param run_config: service runtime configuration (function object/uri, resources, etc..)
1041
- :param verbose: verbose log
1042
-
1043
- :return: URL to access the deployed ingestion service
1044
- """
1045
- endpoint, _ = featureset.deploy_ingestion_service(
1046
- source=source,
1047
- targets=targets,
1048
- name=name,
1049
- run_config=run_config,
1050
- verbose=verbose,
1051
- )
1052
- return endpoint
1053
-
1054
-
1055
1020
  def _ingest_with_spark(
1056
1021
  spark=None,
1057
1022
  featureset: Union[FeatureSet, str] = None,
@@ -917,6 +917,7 @@ class FeatureSet(ModelObj):
917
917
  start_time=None,
918
918
  end_time=None,
919
919
  time_column=None,
920
+ additional_filters=None,
920
921
  **kwargs,
921
922
  ):
922
923
  """return featureset (offline) data as dataframe
@@ -928,6 +929,12 @@ class FeatureSet(ModelObj):
928
929
  :param end_time: filter by end time
929
930
  :param time_column: specify the time column name in the file
930
931
  :param kwargs: additional reader (csv, parquet, ..) args
932
+ :param additional_filters: List of additional_filter conditions as tuples.
933
+ Each tuple should be in the format (column_name, operator, value).
934
+ Supported operators: "=", ">=", "<=", ">", "<".
935
+ Example: [("Product", "=", "Computer")]
936
+ For all supported filters, please see:
937
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
931
938
  :return: DataFrame
932
939
  """
933
940
  entities = list(self.spec.entities.keys())
@@ -946,6 +953,7 @@ class FeatureSet(ModelObj):
946
953
  start_time=start_time,
947
954
  end_time=end_time,
948
955
  time_field=time_column,
956
+ additional_filters=additional_filters,
949
957
  **kwargs,
950
958
  )
951
959
  # to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
@@ -965,6 +973,7 @@ class FeatureSet(ModelObj):
965
973
  start_time=start_time,
966
974
  end_time=end_time,
967
975
  time_column=time_column,
976
+ additional_filters=additional_filters,
968
977
  **kwargs,
969
978
  )
970
979
  return result
@@ -88,6 +88,7 @@ class BaseMerger(abc.ABC):
88
88
  update_stats=None,
89
89
  query=None,
90
90
  order_by=None,
91
+ additional_filters=None,
91
92
  ):
92
93
  self._target = target
93
94
 
@@ -134,6 +135,7 @@ class BaseMerger(abc.ABC):
134
135
  timestamp_for_filtering=timestamp_for_filtering,
135
136
  query=query,
136
137
  order_by=order_by,
138
+ additional_filters=additional_filters,
137
139
  )
138
140
 
139
141
  def _write_to_offline_target(self, timestamp_key=None):
@@ -186,6 +188,7 @@ class BaseMerger(abc.ABC):
186
188
  timestamp_for_filtering=None,
187
189
  query=None,
188
190
  order_by=None,
191
+ additional_filters=None,
189
192
  ):
190
193
  self._create_engine_env()
191
194
 
@@ -212,7 +215,7 @@ class BaseMerger(abc.ABC):
212
215
  feature_sets.append(None)
213
216
  join_types.append(None)
214
217
 
215
- filtered = False
218
+ timestamp_filtered = False
216
219
  for step in join_graph.steps:
217
220
  name = step.right_feature_set_name
218
221
  feature_set = feature_set_objects[name]
@@ -250,7 +253,7 @@ class BaseMerger(abc.ABC):
250
253
  if self._drop_indexes:
251
254
  self._append_drop_column(time_column)
252
255
  if (start_time or end_time) and time_column:
253
- filtered = True
256
+ timestamp_filtered = True
254
257
 
255
258
  df = self._get_engine_df(
256
259
  feature_set,
@@ -259,6 +262,7 @@ class BaseMerger(abc.ABC):
259
262
  start_time if time_column else None,
260
263
  end_time if time_column else None,
261
264
  time_column,
265
+ additional_filters,
262
266
  )
263
267
 
264
268
  fs_entities_and_timestamp = list(feature_set.spec.entities.keys())
@@ -302,8 +306,8 @@ class BaseMerger(abc.ABC):
302
306
  new_columns.append((column, alias))
303
307
  self._update_alias(dictionary={name: alias for name, alias in new_columns})
304
308
 
305
- # None of the feature sets was filtered as required
306
- if not filtered and (start_time or end_time):
309
+ # None of the feature sets was timestamp filtered as required
310
+ if not timestamp_filtered and (start_time or end_time):
307
311
  raise mlrun.errors.MLRunRuntimeError(
308
312
  "start_time and end_time can only be provided in conjunction with "
309
313
  "a timestamp column, or when the at least one feature_set has a timestamp key"
@@ -755,6 +759,7 @@ class BaseMerger(abc.ABC):
755
759
  start_time: typing.Union[str, datetime] = None,
756
760
  end_time: typing.Union[str, datetime] = None,
757
761
  time_column: typing.Optional[str] = None,
762
+ additional_filters=None,
758
763
  ):
759
764
  """
760
765
  Return the feature_set data frame according to the args
@@ -79,10 +79,10 @@ class PandasConversionMixin:
79
79
  msg = (
80
80
  "toPandas attempted Arrow optimization because "
81
81
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
82
- "failed by the reason below:\n %s\n"
82
+ f"failed by the reason below:\n {e}\n"
83
83
  "Attempting non-optimization as "
84
84
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
85
- "true." % str(e)
85
+ "true."
86
86
  )
87
87
  warnings.warn(msg)
88
88
  use_arrow = False
@@ -92,7 +92,7 @@ class PandasConversionMixin:
92
92
  "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
93
93
  "reached the error below and will not continue because automatic fallback "
94
94
  "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
95
- "false.\n %s" % str(e)
95
+ f"false.\n {e}"
96
96
  )
97
97
  warnings.warn(msg)
98
98
  raise
@@ -158,7 +158,7 @@ class PandasConversionMixin:
158
158
  "reached the error below and can not continue. Note that "
159
159
  "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
160
160
  "effect on failures in the middle of "
161
- "computation.\n %s" % str(e)
161
+ f"computation.\n {e}"
162
162
  )
163
163
  warnings.warn(msg)
164
164
  raise
@@ -145,6 +145,7 @@ class DaskFeatureMerger(BaseMerger):
145
145
  start_time=None,
146
146
  end_time=None,
147
147
  time_column=None,
148
+ additional_filters=None,
148
149
  ):
149
150
  import dask.dataframe as dd
150
151
 
@@ -155,6 +156,7 @@ class DaskFeatureMerger(BaseMerger):
155
156
  end_time=end_time,
156
157
  time_column=time_column,
157
158
  index=False,
159
+ additional_filters=additional_filters,
158
160
  )
159
161
 
160
162
  return self._reset_index(df).persist()
@@ -42,6 +42,7 @@ def run_merge_job(
42
42
  start_time=None,
43
43
  end_time=None,
44
44
  timestamp_for_filtering=None,
45
+ additional_filters=None,
45
46
  ):
46
47
  name = vector.metadata.name
47
48
  if not target or not hasattr(target, "to_dict"):
@@ -116,6 +117,7 @@ def run_merge_job(
116
117
  "end_time": end_time,
117
118
  "timestamp_for_filtering": timestamp_for_filtering,
118
119
  "engine_args": engine_args,
120
+ "additional_filters": additional_filters,
119
121
  },
120
122
  inputs={"entity_rows": entity_rows} if entity_rows is not None else {},
121
123
  )
@@ -114,12 +114,14 @@ class LocalFeatureMerger(BaseMerger):
114
114
  start_time=None,
115
115
  end_time=None,
116
116
  time_column=None,
117
+ additional_filters=None,
117
118
  ):
118
119
  df = feature_set.to_dataframe(
119
120
  columns=column_names,
120
121
  start_time=start_time,
121
122
  end_time=end_time,
122
123
  time_column=time_column,
124
+ additional_filters=additional_filters,
123
125
  )
124
126
  if df.index.names[0]:
125
127
  df.reset_index(inplace=True)
@@ -225,7 +225,12 @@ class SparkFeatureMerger(BaseMerger):
225
225
  start_time=None,
226
226
  end_time=None,
227
227
  time_column=None,
228
+ additional_filters=None,
228
229
  ):
230
+ mlrun.utils.helpers.additional_filters_warning(
231
+ additional_filters, self.__class__
232
+ )
233
+
229
234
  source_kwargs = {}
230
235
  if feature_set.spec.passthrough:
231
236
  if not feature_set.spec.source:
@@ -547,9 +547,9 @@ class TensorboardLogger(Logger, Generic[DLTypes.WeightType]):
547
547
  "inputs",
548
548
  "parameters",
549
549
  ]:
550
- text += "\n * **{}**: {}".format(
551
- property_name.capitalize(),
552
- self._markdown_print(value=property_value, tabs=2),
550
+ text += (
551
+ f"\n * **{property_name.capitalize()}**: "
552
+ f"{self._markdown_print(value=property_value, tabs=2)}"
553
553
  )
554
554
  else:
555
555
  for property_name, property_value in self._extract_epoch_results().items():
@@ -614,13 +614,8 @@ class TensorboardLogger(Logger, Generic[DLTypes.WeightType]):
614
614
  :return: The generated link.
615
615
  """
616
616
  return (
617
- '<a href="{}/{}/{}/jobs/monitor/{}/overview" target="_blank">{}</a>'.format(
618
- config.resolve_ui_url(),
619
- config.ui.projects_prefix,
620
- context.project,
621
- context.uid,
622
- link_text,
623
- )
617
+ f'<a href="{config.resolve_ui_url()}/{config.ui.projects_prefix}/{context.project}'
618
+ f'/jobs/monitor/{context.uid}/overview" target="_blank">{link_text}</a>'
624
619
  )
625
620
 
626
621
  @staticmethod
mlrun/kfpops.py CHANGED
@@ -33,7 +33,6 @@ from .utils import (
33
33
  get_in,
34
34
  get_workflow_url,
35
35
  is_ipython,
36
- is_legacy_artifact,
37
36
  logger,
38
37
  run_keys,
39
38
  version,
@@ -121,14 +120,8 @@ def get_kfp_outputs(artifacts, labels, project):
121
120
  outputs = []
122
121
  out_dict = {}
123
122
  for output in artifacts:
124
- if is_legacy_artifact(output):
125
- key = output["key"]
126
- # The spec in a legacy artifact is contained in the main object, so using this assignment saves us a lot
127
- # of if/else in the rest of this function.
128
- output_spec = output
129
- else:
130
- key = output.get("metadata")["key"]
131
- output_spec = output.get("spec", {})
123
+ key = output.get("metadata")["key"]
124
+ output_spec = output.get("spec", {})
132
125
 
133
126
  target = output_spec.get("target_path", "")
134
127
  target = output_spec.get("inline", target)
@@ -655,7 +648,9 @@ def add_default_env(k8s_client, cop):
655
648
  )
656
649
  )
657
650
 
658
- auth_env_var = mlrun.runtimes.constants.FunctionEnvironmentVariables.auth_session
651
+ auth_env_var = (
652
+ mlrun.common.runtimes.constants.FunctionEnvironmentVariables.auth_session
653
+ )
659
654
  if auth_env_var in os.environ or "V3IO_ACCESS_KEY" in os.environ:
660
655
  cop.container.add_env_variable(
661
656
  k8s_client.V1EnvVar(
mlrun/launcher/base.py CHANGED
@@ -403,7 +403,7 @@ class BaseLauncher(abc.ABC):
403
403
  )
404
404
  if (
405
405
  run.status.state
406
- in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
406
+ in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
407
407
  ):
408
408
  if runtime._is_remote and not runtime.is_child:
409
409
  logger.error(