mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +0 -105
- mlrun/artifacts/__init__.py +1 -2
- mlrun/artifacts/base.py +8 -250
- mlrun/artifacts/dataset.py +1 -190
- mlrun/artifacts/manager.py +2 -41
- mlrun/artifacts/model.py +1 -140
- mlrun/artifacts/plots.py +1 -375
- mlrun/common/schemas/model_monitoring/__init__.py +4 -0
- mlrun/common/schemas/model_monitoring/constants.py +24 -3
- mlrun/common/schemas/model_monitoring/model_endpoints.py +13 -1
- mlrun/config.py +3 -3
- mlrun/data_types/to_pandas.py +4 -4
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore_profile.py +50 -3
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/sources.py +43 -2
- mlrun/datastore/store_resources.py +2 -6
- mlrun/datastore/targets.py +106 -39
- mlrun/db/httpdb.py +4 -4
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +4 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +2 -0
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +5 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
- mlrun/kfpops.py +5 -10
- mlrun/launcher/base.py +1 -1
- mlrun/launcher/client.py +1 -1
- mlrun/lists.py +2 -2
- mlrun/model.py +18 -9
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +158 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +92 -77
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +1 -1
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +67 -4
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +1 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +2 -3
- mlrun/model_monitoring/writer.py +69 -39
- mlrun/platforms/iguazio.py +2 -2
- mlrun/projects/project.py +18 -31
- mlrun/render.py +2 -10
- mlrun/run.py +1 -3
- mlrun/runtimes/__init__.py +3 -3
- mlrun/runtimes/base.py +3 -3
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/function.py +1 -1
- mlrun/runtimes/utils.py +1 -1
- mlrun/utils/helpers.py +27 -40
- mlrun/utils/notifications/notification/slack.py +4 -2
- mlrun/utils/notifications/notification_pusher.py +133 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/METADATA +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/RECORD +75 -71
- mlrun/runtimes/mpijob/v1alpha1.py +0 -29
- /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/top_level.txt +0 -0
mlrun/datastore/targets.py
CHANGED
|
@@ -656,6 +656,29 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
656
656
|
def _target_path_object(self):
|
|
657
657
|
"""return the actual/computed target path"""
|
|
658
658
|
is_single_file = hasattr(self, "is_single_file") and self.is_single_file()
|
|
659
|
+
|
|
660
|
+
if self._resource and self.path:
|
|
661
|
+
parsed_url = urlparse(self.path)
|
|
662
|
+
# When the URL consists only from scheme and endpoint and no path,
|
|
663
|
+
# make a default path for DS and redis targets.
|
|
664
|
+
# Also ignore KafkaTarget when it uses the ds scheme (no default path for KafkaTarget)
|
|
665
|
+
if (
|
|
666
|
+
not isinstance(self, KafkaTarget)
|
|
667
|
+
and parsed_url.scheme in ["ds", "redis", "rediss"]
|
|
668
|
+
and (not parsed_url.path or parsed_url.path == "/")
|
|
669
|
+
):
|
|
670
|
+
return TargetPathObject(
|
|
671
|
+
_get_target_path(
|
|
672
|
+
self,
|
|
673
|
+
self._resource,
|
|
674
|
+
self.run_id is not None,
|
|
675
|
+
netloc=parsed_url.netloc,
|
|
676
|
+
scheme=parsed_url.scheme,
|
|
677
|
+
),
|
|
678
|
+
self.run_id,
|
|
679
|
+
is_single_file,
|
|
680
|
+
)
|
|
681
|
+
|
|
659
682
|
return self.get_path() or (
|
|
660
683
|
TargetPathObject(
|
|
661
684
|
_get_target_path(self, self._resource, self.run_id is not None),
|
|
@@ -714,9 +737,13 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
714
737
|
start_time=None,
|
|
715
738
|
end_time=None,
|
|
716
739
|
time_column=None,
|
|
740
|
+
additional_filters=None,
|
|
717
741
|
**kwargs,
|
|
718
742
|
):
|
|
719
743
|
"""return the target data as dataframe"""
|
|
744
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
745
|
+
additional_filters, self.__class__
|
|
746
|
+
)
|
|
720
747
|
return mlrun.get_dataitem(self.get_target_path()).as_df(
|
|
721
748
|
columns=columns,
|
|
722
749
|
df_module=df_module,
|
|
@@ -961,6 +988,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
961
988
|
start_time=None,
|
|
962
989
|
end_time=None,
|
|
963
990
|
time_column=None,
|
|
991
|
+
additional_filters=None,
|
|
964
992
|
**kwargs,
|
|
965
993
|
):
|
|
966
994
|
"""return the target data as dataframe"""
|
|
@@ -971,6 +999,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
971
999
|
start_time=start_time,
|
|
972
1000
|
end_time=end_time,
|
|
973
1001
|
time_column=time_column,
|
|
1002
|
+
additional_filters=additional_filters,
|
|
974
1003
|
**kwargs,
|
|
975
1004
|
)
|
|
976
1005
|
if not columns:
|
|
@@ -1101,8 +1130,12 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1101
1130
|
start_time=None,
|
|
1102
1131
|
end_time=None,
|
|
1103
1132
|
time_column=None,
|
|
1133
|
+
additional_filters=None,
|
|
1104
1134
|
**kwargs,
|
|
1105
1135
|
):
|
|
1136
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
1137
|
+
additional_filters, self.__class__
|
|
1138
|
+
)
|
|
1106
1139
|
df = super().as_df(
|
|
1107
1140
|
columns=columns,
|
|
1108
1141
|
df_module=df_module,
|
|
@@ -1209,6 +1242,7 @@ class SnowflakeTarget(BaseStoreTarget):
|
|
|
1209
1242
|
start_time=None,
|
|
1210
1243
|
end_time=None,
|
|
1211
1244
|
time_column=None,
|
|
1245
|
+
additional_filters=None,
|
|
1212
1246
|
**kwargs,
|
|
1213
1247
|
):
|
|
1214
1248
|
raise NotImplementedError()
|
|
@@ -1275,7 +1309,17 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1275
1309
|
def get_dask_options(self):
|
|
1276
1310
|
return {"format": "csv"}
|
|
1277
1311
|
|
|
1278
|
-
def as_df(
|
|
1312
|
+
def as_df(
|
|
1313
|
+
self,
|
|
1314
|
+
columns=None,
|
|
1315
|
+
df_module=None,
|
|
1316
|
+
entities=None,
|
|
1317
|
+
start_time=None,
|
|
1318
|
+
end_time=None,
|
|
1319
|
+
time_column=None,
|
|
1320
|
+
additional_filters=None,
|
|
1321
|
+
**kwargs,
|
|
1322
|
+
):
|
|
1279
1323
|
raise NotImplementedError()
|
|
1280
1324
|
|
|
1281
1325
|
def write_dataframe(
|
|
@@ -1390,39 +1434,6 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1390
1434
|
support_spark = True
|
|
1391
1435
|
writer_step_name = "RedisNoSqlTarget"
|
|
1392
1436
|
|
|
1393
|
-
@property
|
|
1394
|
-
def _target_path_object(self):
|
|
1395
|
-
url = self.path or mlrun.mlconf.redis.url
|
|
1396
|
-
if self._resource and url:
|
|
1397
|
-
parsed_url = urlparse(url)
|
|
1398
|
-
if not parsed_url.path or parsed_url.path == "/":
|
|
1399
|
-
kind_prefix = (
|
|
1400
|
-
"sets"
|
|
1401
|
-
if self._resource.kind
|
|
1402
|
-
== mlrun.common.schemas.ObjectKind.feature_set
|
|
1403
|
-
else "vectors"
|
|
1404
|
-
)
|
|
1405
|
-
kind = self.kind
|
|
1406
|
-
name = self._resource.metadata.name
|
|
1407
|
-
project = (
|
|
1408
|
-
self._resource.metadata.project or mlrun.mlconf.default_project
|
|
1409
|
-
)
|
|
1410
|
-
data_prefix = get_default_prefix_for_target(kind).format(
|
|
1411
|
-
ds_profile_name=parsed_url.netloc,
|
|
1412
|
-
authority=parsed_url.netloc,
|
|
1413
|
-
project=project,
|
|
1414
|
-
kind=kind,
|
|
1415
|
-
name=name,
|
|
1416
|
-
)
|
|
1417
|
-
if url.startswith("rediss://"):
|
|
1418
|
-
data_prefix = data_prefix.replace("redis://", "rediss://", 1)
|
|
1419
|
-
if not self.run_id:
|
|
1420
|
-
version = self._resource.metadata.tag or "latest"
|
|
1421
|
-
name = f"{name}-{version}"
|
|
1422
|
-
url = f"{data_prefix}/{kind_prefix}/{name}"
|
|
1423
|
-
return TargetPathObject(url, self.run_id, False)
|
|
1424
|
-
return super()._target_path_object
|
|
1425
|
-
|
|
1426
1437
|
# Fetch server url from the RedisNoSqlTarget::__init__() 'path' parameter.
|
|
1427
1438
|
# If not set fetch it from 'mlrun.mlconf.redis.url' (MLRUN_REDIS__URL environment variable).
|
|
1428
1439
|
# Then look for username and password at REDIS_xxx secrets
|
|
@@ -1544,7 +1555,17 @@ class StreamTarget(BaseStoreTarget):
|
|
|
1544
1555
|
**self.attributes,
|
|
1545
1556
|
)
|
|
1546
1557
|
|
|
1547
|
-
def as_df(
|
|
1558
|
+
def as_df(
|
|
1559
|
+
self,
|
|
1560
|
+
columns=None,
|
|
1561
|
+
df_module=None,
|
|
1562
|
+
entities=None,
|
|
1563
|
+
start_time=None,
|
|
1564
|
+
end_time=None,
|
|
1565
|
+
time_column=None,
|
|
1566
|
+
additional_filters=None,
|
|
1567
|
+
**kwargs,
|
|
1568
|
+
):
|
|
1548
1569
|
raise NotImplementedError()
|
|
1549
1570
|
|
|
1550
1571
|
|
|
@@ -1649,7 +1670,17 @@ class KafkaTarget(BaseStoreTarget):
|
|
|
1649
1670
|
**attributes,
|
|
1650
1671
|
)
|
|
1651
1672
|
|
|
1652
|
-
def as_df(
|
|
1673
|
+
def as_df(
|
|
1674
|
+
self,
|
|
1675
|
+
columns=None,
|
|
1676
|
+
df_module=None,
|
|
1677
|
+
entities=None,
|
|
1678
|
+
start_time=None,
|
|
1679
|
+
end_time=None,
|
|
1680
|
+
time_column=None,
|
|
1681
|
+
additional_filters=None,
|
|
1682
|
+
**kwargs,
|
|
1683
|
+
):
|
|
1653
1684
|
raise NotImplementedError()
|
|
1654
1685
|
|
|
1655
1686
|
def purge(self):
|
|
@@ -1696,7 +1727,17 @@ class TSDBTarget(BaseStoreTarget):
|
|
|
1696
1727
|
**self.attributes,
|
|
1697
1728
|
)
|
|
1698
1729
|
|
|
1699
|
-
def as_df(
|
|
1730
|
+
def as_df(
|
|
1731
|
+
self,
|
|
1732
|
+
columns=None,
|
|
1733
|
+
df_module=None,
|
|
1734
|
+
entities=None,
|
|
1735
|
+
start_time=None,
|
|
1736
|
+
end_time=None,
|
|
1737
|
+
time_column=None,
|
|
1738
|
+
additional_filters=None,
|
|
1739
|
+
**kwargs,
|
|
1740
|
+
):
|
|
1700
1741
|
raise NotImplementedError()
|
|
1701
1742
|
|
|
1702
1743
|
def write_dataframe(
|
|
@@ -1807,11 +1848,16 @@ class DFTarget(BaseStoreTarget):
|
|
|
1807
1848
|
self,
|
|
1808
1849
|
columns=None,
|
|
1809
1850
|
df_module=None,
|
|
1851
|
+
entities=None,
|
|
1810
1852
|
start_time=None,
|
|
1811
1853
|
end_time=None,
|
|
1812
1854
|
time_column=None,
|
|
1855
|
+
additional_filters=None,
|
|
1813
1856
|
**kwargs,
|
|
1814
1857
|
):
|
|
1858
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
1859
|
+
additional_filters, self.__class__
|
|
1860
|
+
)
|
|
1815
1861
|
return select_columns_from_df(
|
|
1816
1862
|
filter_df_start_end_time(
|
|
1817
1863
|
self._df,
|
|
@@ -1986,6 +2032,7 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1986
2032
|
start_time=None,
|
|
1987
2033
|
end_time=None,
|
|
1988
2034
|
time_column=None,
|
|
2035
|
+
additional_filters=None,
|
|
1989
2036
|
**kwargs,
|
|
1990
2037
|
):
|
|
1991
2038
|
try:
|
|
@@ -1994,6 +2041,10 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1994
2041
|
except (ModuleNotFoundError, ImportError) as exc:
|
|
1995
2042
|
self._raise_sqlalchemy_import_error(exc)
|
|
1996
2043
|
|
|
2044
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
2045
|
+
additional_filters, self.__class__
|
|
2046
|
+
)
|
|
2047
|
+
|
|
1997
2048
|
db_path, table_name, _, _, _, _ = self._parse_url()
|
|
1998
2049
|
engine = sqlalchemy.create_engine(db_path)
|
|
1999
2050
|
parse_dates: Optional[list[str]] = self.attributes.get("parse_dates")
|
|
@@ -2140,7 +2191,7 @@ kind_to_driver = {
|
|
|
2140
2191
|
}
|
|
2141
2192
|
|
|
2142
2193
|
|
|
2143
|
-
def _get_target_path(driver, resource, run_id_mode=False):
|
|
2194
|
+
def _get_target_path(driver, resource, run_id_mode=False, netloc=None, scheme=""):
|
|
2144
2195
|
"""return the default target path given the resource and target kind"""
|
|
2145
2196
|
kind = driver.kind
|
|
2146
2197
|
suffix = driver.suffix
|
|
@@ -2157,11 +2208,27 @@ def _get_target_path(driver, resource, run_id_mode=False):
|
|
|
2157
2208
|
)
|
|
2158
2209
|
name = resource.metadata.name
|
|
2159
2210
|
project = resource.metadata.project or mlrun.mlconf.default_project
|
|
2160
|
-
|
|
2211
|
+
|
|
2212
|
+
default_kind_name = kind
|
|
2213
|
+
if scheme == "ds":
|
|
2214
|
+
# "dsnosql" is not an actual target like Parquet or Redis; rather, it serves
|
|
2215
|
+
# as a placeholder that can be used in any specified target
|
|
2216
|
+
default_kind_name = "dsnosql"
|
|
2217
|
+
if scheme == "redis" or scheme == "rediss":
|
|
2218
|
+
default_kind_name = TargetTypes.redisnosql
|
|
2219
|
+
|
|
2220
|
+
netloc = netloc or ""
|
|
2221
|
+
data_prefix = get_default_prefix_for_target(default_kind_name).format(
|
|
2222
|
+
ds_profile_name=netloc, # In case of ds profile, set its the name
|
|
2223
|
+
authority=netloc, # In case of redis, replace {authority} with netloc
|
|
2161
2224
|
project=project,
|
|
2162
2225
|
kind=kind,
|
|
2163
2226
|
name=name,
|
|
2164
2227
|
)
|
|
2228
|
+
|
|
2229
|
+
if scheme == "rediss":
|
|
2230
|
+
data_prefix = data_prefix.replace("redis://", "rediss://", 1)
|
|
2231
|
+
|
|
2165
2232
|
# todo: handle ver tag changes, may need to copy files?
|
|
2166
2233
|
if not run_id_mode:
|
|
2167
2234
|
version = resource.metadata.tag
|
mlrun/db/httpdb.py
CHANGED
|
@@ -659,10 +659,10 @@ class HTTPRunDB(RunDBInterface):
|
|
|
659
659
|
nil_resp += 1
|
|
660
660
|
|
|
661
661
|
if watch and state in [
|
|
662
|
-
mlrun.runtimes.constants.RunStates.pending,
|
|
663
|
-
mlrun.runtimes.constants.RunStates.running,
|
|
664
|
-
mlrun.runtimes.constants.RunStates.created,
|
|
665
|
-
mlrun.runtimes.constants.RunStates.aborting,
|
|
662
|
+
mlrun.common.runtimes.constants.RunStates.pending,
|
|
663
|
+
mlrun.common.runtimes.constants.RunStates.running,
|
|
664
|
+
mlrun.common.runtimes.constants.RunStates.created,
|
|
665
|
+
mlrun.common.runtimes.constants.RunStates.aborting,
|
|
666
666
|
]:
|
|
667
667
|
continue
|
|
668
668
|
else:
|
mlrun/feature_store/__init__.py
CHANGED
|
@@ -19,7 +19,6 @@ __all__ = [
|
|
|
19
19
|
"get_online_feature_service",
|
|
20
20
|
"ingest",
|
|
21
21
|
"preview",
|
|
22
|
-
"deploy_ingestion_service",
|
|
23
22
|
"deploy_ingestion_service_v2",
|
|
24
23
|
"delete_feature_set",
|
|
25
24
|
"delete_feature_vector",
|
|
@@ -41,7 +40,6 @@ from ..features import Entity, Feature
|
|
|
41
40
|
from .api import (
|
|
42
41
|
delete_feature_set,
|
|
43
42
|
delete_feature_vector,
|
|
44
|
-
deploy_ingestion_service,
|
|
45
43
|
deploy_ingestion_service_v2,
|
|
46
44
|
get_feature_set,
|
|
47
45
|
get_feature_vector,
|
mlrun/feature_store/api.py
CHANGED
|
@@ -113,6 +113,7 @@ def get_offline_features(
|
|
|
113
113
|
order_by: Union[str, list[str]] = None,
|
|
114
114
|
spark_service: str = None,
|
|
115
115
|
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
116
|
+
additional_filters: list = None,
|
|
116
117
|
):
|
|
117
118
|
"""retrieve offline feature vector results
|
|
118
119
|
|
|
@@ -175,6 +176,13 @@ def get_offline_features(
|
|
|
175
176
|
By default, the filter executes on the timestamp_key of each feature set.
|
|
176
177
|
Note: the time filtering is performed on each feature set before the
|
|
177
178
|
merge process using start_time and end_time params.
|
|
179
|
+
:param additional_filters: List of additional_filter conditions as tuples.
|
|
180
|
+
Each tuple should be in the format (column_name, operator, value).
|
|
181
|
+
Supported operators: "=", ">=", "<=", ">", "<".
|
|
182
|
+
Example: [("Product", "=", "Computer")]
|
|
183
|
+
For all supported filters, please see:
|
|
184
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
185
|
+
|
|
178
186
|
|
|
179
187
|
"""
|
|
180
188
|
return _get_offline_features(
|
|
@@ -194,6 +202,7 @@ def get_offline_features(
|
|
|
194
202
|
order_by,
|
|
195
203
|
spark_service,
|
|
196
204
|
timestamp_for_filtering,
|
|
205
|
+
additional_filters,
|
|
197
206
|
)
|
|
198
207
|
|
|
199
208
|
|
|
@@ -214,6 +223,7 @@ def _get_offline_features(
|
|
|
214
223
|
order_by: Union[str, list[str]] = None,
|
|
215
224
|
spark_service: str = None,
|
|
216
225
|
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
226
|
+
additional_filters=None,
|
|
217
227
|
) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
|
|
218
228
|
if entity_rows is None and entity_timestamp_column is not None:
|
|
219
229
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -252,6 +262,7 @@ def _get_offline_features(
|
|
|
252
262
|
start_time=start_time,
|
|
253
263
|
end_time=end_time,
|
|
254
264
|
timestamp_for_filtering=timestamp_for_filtering,
|
|
265
|
+
additional_filters=additional_filters,
|
|
255
266
|
)
|
|
256
267
|
|
|
257
268
|
merger = merger_engine(feature_vector, **(engine_args or {}))
|
|
@@ -267,6 +278,7 @@ def _get_offline_features(
|
|
|
267
278
|
update_stats=update_stats,
|
|
268
279
|
query=query,
|
|
269
280
|
order_by=order_by,
|
|
281
|
+
additional_filters=additional_filters,
|
|
270
282
|
)
|
|
271
283
|
|
|
272
284
|
|
|
@@ -1005,53 +1017,6 @@ def _deploy_ingestion_service_v2(
|
|
|
1005
1017
|
return function.deploy(), function
|
|
1006
1018
|
|
|
1007
1019
|
|
|
1008
|
-
@deprecated(
|
|
1009
|
-
version="1.5.0",
|
|
1010
|
-
reason="'deploy_ingestion_service' will be removed in 1.7.0, use 'deploy_ingestion_service_v2' instead",
|
|
1011
|
-
category=FutureWarning,
|
|
1012
|
-
)
|
|
1013
|
-
def deploy_ingestion_service(
|
|
1014
|
-
featureset: Union[FeatureSet, str],
|
|
1015
|
-
source: DataSource = None,
|
|
1016
|
-
targets: list[DataTargetBase] = None,
|
|
1017
|
-
name: str = None,
|
|
1018
|
-
run_config: RunConfig = None,
|
|
1019
|
-
verbose=False,
|
|
1020
|
-
) -> str:
|
|
1021
|
-
"""Start real-time ingestion service using nuclio function
|
|
1022
|
-
|
|
1023
|
-
Deploy a real-time function implementing feature ingestion pipeline
|
|
1024
|
-
the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
|
|
1025
|
-
|
|
1026
|
-
the `run_config` parameter allow specifying the function and job configuration,
|
|
1027
|
-
see: :py:class:`~mlrun.feature_store.RunConfig`
|
|
1028
|
-
|
|
1029
|
-
example::
|
|
1030
|
-
|
|
1031
|
-
source = HTTPSource()
|
|
1032
|
-
func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
|
|
1033
|
-
config = RunConfig(function=func)
|
|
1034
|
-
my_set.deploy_ingestion_service(source, run_config=config)
|
|
1035
|
-
|
|
1036
|
-
:param featureset: feature set object or uri
|
|
1037
|
-
:param source: data source object describing the online or offline source
|
|
1038
|
-
:param targets: list of data target objects
|
|
1039
|
-
:param name: name for the job/function
|
|
1040
|
-
:param run_config: service runtime configuration (function object/uri, resources, etc..)
|
|
1041
|
-
:param verbose: verbose log
|
|
1042
|
-
|
|
1043
|
-
:return: URL to access the deployed ingestion service
|
|
1044
|
-
"""
|
|
1045
|
-
endpoint, _ = featureset.deploy_ingestion_service(
|
|
1046
|
-
source=source,
|
|
1047
|
-
targets=targets,
|
|
1048
|
-
name=name,
|
|
1049
|
-
run_config=run_config,
|
|
1050
|
-
verbose=verbose,
|
|
1051
|
-
)
|
|
1052
|
-
return endpoint
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
1020
|
def _ingest_with_spark(
|
|
1056
1021
|
spark=None,
|
|
1057
1022
|
featureset: Union[FeatureSet, str] = None,
|
|
@@ -917,6 +917,7 @@ class FeatureSet(ModelObj):
|
|
|
917
917
|
start_time=None,
|
|
918
918
|
end_time=None,
|
|
919
919
|
time_column=None,
|
|
920
|
+
additional_filters=None,
|
|
920
921
|
**kwargs,
|
|
921
922
|
):
|
|
922
923
|
"""return featureset (offline) data as dataframe
|
|
@@ -928,6 +929,12 @@ class FeatureSet(ModelObj):
|
|
|
928
929
|
:param end_time: filter by end time
|
|
929
930
|
:param time_column: specify the time column name in the file
|
|
930
931
|
:param kwargs: additional reader (csv, parquet, ..) args
|
|
932
|
+
:param additional_filters: List of additional_filter conditions as tuples.
|
|
933
|
+
Each tuple should be in the format (column_name, operator, value).
|
|
934
|
+
Supported operators: "=", ">=", "<=", ">", "<".
|
|
935
|
+
Example: [("Product", "=", "Computer")]
|
|
936
|
+
For all supported filters, please see:
|
|
937
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
931
938
|
:return: DataFrame
|
|
932
939
|
"""
|
|
933
940
|
entities = list(self.spec.entities.keys())
|
|
@@ -946,6 +953,7 @@ class FeatureSet(ModelObj):
|
|
|
946
953
|
start_time=start_time,
|
|
947
954
|
end_time=end_time,
|
|
948
955
|
time_field=time_column,
|
|
956
|
+
additional_filters=additional_filters,
|
|
949
957
|
**kwargs,
|
|
950
958
|
)
|
|
951
959
|
# to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
|
|
@@ -965,6 +973,7 @@ class FeatureSet(ModelObj):
|
|
|
965
973
|
start_time=start_time,
|
|
966
974
|
end_time=end_time,
|
|
967
975
|
time_column=time_column,
|
|
976
|
+
additional_filters=additional_filters,
|
|
968
977
|
**kwargs,
|
|
969
978
|
)
|
|
970
979
|
return result
|
|
@@ -88,6 +88,7 @@ class BaseMerger(abc.ABC):
|
|
|
88
88
|
update_stats=None,
|
|
89
89
|
query=None,
|
|
90
90
|
order_by=None,
|
|
91
|
+
additional_filters=None,
|
|
91
92
|
):
|
|
92
93
|
self._target = target
|
|
93
94
|
|
|
@@ -134,6 +135,7 @@ class BaseMerger(abc.ABC):
|
|
|
134
135
|
timestamp_for_filtering=timestamp_for_filtering,
|
|
135
136
|
query=query,
|
|
136
137
|
order_by=order_by,
|
|
138
|
+
additional_filters=additional_filters,
|
|
137
139
|
)
|
|
138
140
|
|
|
139
141
|
def _write_to_offline_target(self, timestamp_key=None):
|
|
@@ -186,6 +188,7 @@ class BaseMerger(abc.ABC):
|
|
|
186
188
|
timestamp_for_filtering=None,
|
|
187
189
|
query=None,
|
|
188
190
|
order_by=None,
|
|
191
|
+
additional_filters=None,
|
|
189
192
|
):
|
|
190
193
|
self._create_engine_env()
|
|
191
194
|
|
|
@@ -212,7 +215,7 @@ class BaseMerger(abc.ABC):
|
|
|
212
215
|
feature_sets.append(None)
|
|
213
216
|
join_types.append(None)
|
|
214
217
|
|
|
215
|
-
|
|
218
|
+
timestamp_filtered = False
|
|
216
219
|
for step in join_graph.steps:
|
|
217
220
|
name = step.right_feature_set_name
|
|
218
221
|
feature_set = feature_set_objects[name]
|
|
@@ -250,7 +253,7 @@ class BaseMerger(abc.ABC):
|
|
|
250
253
|
if self._drop_indexes:
|
|
251
254
|
self._append_drop_column(time_column)
|
|
252
255
|
if (start_time or end_time) and time_column:
|
|
253
|
-
|
|
256
|
+
timestamp_filtered = True
|
|
254
257
|
|
|
255
258
|
df = self._get_engine_df(
|
|
256
259
|
feature_set,
|
|
@@ -259,6 +262,7 @@ class BaseMerger(abc.ABC):
|
|
|
259
262
|
start_time if time_column else None,
|
|
260
263
|
end_time if time_column else None,
|
|
261
264
|
time_column,
|
|
265
|
+
additional_filters,
|
|
262
266
|
)
|
|
263
267
|
|
|
264
268
|
fs_entities_and_timestamp = list(feature_set.spec.entities.keys())
|
|
@@ -302,8 +306,8 @@ class BaseMerger(abc.ABC):
|
|
|
302
306
|
new_columns.append((column, alias))
|
|
303
307
|
self._update_alias(dictionary={name: alias for name, alias in new_columns})
|
|
304
308
|
|
|
305
|
-
# None of the feature sets was filtered as required
|
|
306
|
-
if not
|
|
309
|
+
# None of the feature sets was timestamp filtered as required
|
|
310
|
+
if not timestamp_filtered and (start_time or end_time):
|
|
307
311
|
raise mlrun.errors.MLRunRuntimeError(
|
|
308
312
|
"start_time and end_time can only be provided in conjunction with "
|
|
309
313
|
"a timestamp column, or when the at least one feature_set has a timestamp key"
|
|
@@ -755,6 +759,7 @@ class BaseMerger(abc.ABC):
|
|
|
755
759
|
start_time: typing.Union[str, datetime] = None,
|
|
756
760
|
end_time: typing.Union[str, datetime] = None,
|
|
757
761
|
time_column: typing.Optional[str] = None,
|
|
762
|
+
additional_filters=None,
|
|
758
763
|
):
|
|
759
764
|
"""
|
|
760
765
|
Return the feature_set data frame according to the args
|
|
@@ -79,10 +79,10 @@ class PandasConversionMixin:
|
|
|
79
79
|
msg = (
|
|
80
80
|
"toPandas attempted Arrow optimization because "
|
|
81
81
|
"'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
|
|
82
|
-
"failed by the reason below:\n
|
|
82
|
+
f"failed by the reason below:\n {e}\n"
|
|
83
83
|
"Attempting non-optimization as "
|
|
84
84
|
"'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
|
|
85
|
-
"true."
|
|
85
|
+
"true."
|
|
86
86
|
)
|
|
87
87
|
warnings.warn(msg)
|
|
88
88
|
use_arrow = False
|
|
@@ -92,7 +92,7 @@ class PandasConversionMixin:
|
|
|
92
92
|
"'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
|
|
93
93
|
"reached the error below and will not continue because automatic fallback "
|
|
94
94
|
"with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
|
|
95
|
-
"false.\n
|
|
95
|
+
f"false.\n {e}"
|
|
96
96
|
)
|
|
97
97
|
warnings.warn(msg)
|
|
98
98
|
raise
|
|
@@ -158,7 +158,7 @@ class PandasConversionMixin:
|
|
|
158
158
|
"reached the error below and can not continue. Note that "
|
|
159
159
|
"'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
|
|
160
160
|
"effect on failures in the middle of "
|
|
161
|
-
"computation.\n
|
|
161
|
+
f"computation.\n {e}"
|
|
162
162
|
)
|
|
163
163
|
warnings.warn(msg)
|
|
164
164
|
raise
|
|
@@ -145,6 +145,7 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
145
145
|
start_time=None,
|
|
146
146
|
end_time=None,
|
|
147
147
|
time_column=None,
|
|
148
|
+
additional_filters=None,
|
|
148
149
|
):
|
|
149
150
|
import dask.dataframe as dd
|
|
150
151
|
|
|
@@ -155,6 +156,7 @@ class DaskFeatureMerger(BaseMerger):
|
|
|
155
156
|
end_time=end_time,
|
|
156
157
|
time_column=time_column,
|
|
157
158
|
index=False,
|
|
159
|
+
additional_filters=additional_filters,
|
|
158
160
|
)
|
|
159
161
|
|
|
160
162
|
return self._reset_index(df).persist()
|
|
@@ -42,6 +42,7 @@ def run_merge_job(
|
|
|
42
42
|
start_time=None,
|
|
43
43
|
end_time=None,
|
|
44
44
|
timestamp_for_filtering=None,
|
|
45
|
+
additional_filters=None,
|
|
45
46
|
):
|
|
46
47
|
name = vector.metadata.name
|
|
47
48
|
if not target or not hasattr(target, "to_dict"):
|
|
@@ -116,6 +117,7 @@ def run_merge_job(
|
|
|
116
117
|
"end_time": end_time,
|
|
117
118
|
"timestamp_for_filtering": timestamp_for_filtering,
|
|
118
119
|
"engine_args": engine_args,
|
|
120
|
+
"additional_filters": additional_filters,
|
|
119
121
|
},
|
|
120
122
|
inputs={"entity_rows": entity_rows} if entity_rows is not None else {},
|
|
121
123
|
)
|
|
@@ -114,12 +114,14 @@ class LocalFeatureMerger(BaseMerger):
|
|
|
114
114
|
start_time=None,
|
|
115
115
|
end_time=None,
|
|
116
116
|
time_column=None,
|
|
117
|
+
additional_filters=None,
|
|
117
118
|
):
|
|
118
119
|
df = feature_set.to_dataframe(
|
|
119
120
|
columns=column_names,
|
|
120
121
|
start_time=start_time,
|
|
121
122
|
end_time=end_time,
|
|
122
123
|
time_column=time_column,
|
|
124
|
+
additional_filters=additional_filters,
|
|
123
125
|
)
|
|
124
126
|
if df.index.names[0]:
|
|
125
127
|
df.reset_index(inplace=True)
|
|
@@ -225,7 +225,12 @@ class SparkFeatureMerger(BaseMerger):
|
|
|
225
225
|
start_time=None,
|
|
226
226
|
end_time=None,
|
|
227
227
|
time_column=None,
|
|
228
|
+
additional_filters=None,
|
|
228
229
|
):
|
|
230
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
231
|
+
additional_filters, self.__class__
|
|
232
|
+
)
|
|
233
|
+
|
|
229
234
|
source_kwargs = {}
|
|
230
235
|
if feature_set.spec.passthrough:
|
|
231
236
|
if not feature_set.spec.source:
|
|
@@ -547,9 +547,9 @@ class TensorboardLogger(Logger, Generic[DLTypes.WeightType]):
|
|
|
547
547
|
"inputs",
|
|
548
548
|
"parameters",
|
|
549
549
|
]:
|
|
550
|
-
text +=
|
|
551
|
-
property_name.capitalize()
|
|
552
|
-
self._markdown_print(value=property_value, tabs=2)
|
|
550
|
+
text += (
|
|
551
|
+
f"\n * **{property_name.capitalize()}**: "
|
|
552
|
+
f"{self._markdown_print(value=property_value, tabs=2)}"
|
|
553
553
|
)
|
|
554
554
|
else:
|
|
555
555
|
for property_name, property_value in self._extract_epoch_results().items():
|
|
@@ -614,13 +614,8 @@ class TensorboardLogger(Logger, Generic[DLTypes.WeightType]):
|
|
|
614
614
|
:return: The generated link.
|
|
615
615
|
"""
|
|
616
616
|
return (
|
|
617
|
-
'<a href="{}/{}/{}
|
|
618
|
-
|
|
619
|
-
config.ui.projects_prefix,
|
|
620
|
-
context.project,
|
|
621
|
-
context.uid,
|
|
622
|
-
link_text,
|
|
623
|
-
)
|
|
617
|
+
f'<a href="{config.resolve_ui_url()}/{config.ui.projects_prefix}/{context.project}'
|
|
618
|
+
f'/jobs/monitor/{context.uid}/overview" target="_blank">{link_text}</a>'
|
|
624
619
|
)
|
|
625
620
|
|
|
626
621
|
@staticmethod
|
mlrun/kfpops.py
CHANGED
|
@@ -33,7 +33,6 @@ from .utils import (
|
|
|
33
33
|
get_in,
|
|
34
34
|
get_workflow_url,
|
|
35
35
|
is_ipython,
|
|
36
|
-
is_legacy_artifact,
|
|
37
36
|
logger,
|
|
38
37
|
run_keys,
|
|
39
38
|
version,
|
|
@@ -121,14 +120,8 @@ def get_kfp_outputs(artifacts, labels, project):
|
|
|
121
120
|
outputs = []
|
|
122
121
|
out_dict = {}
|
|
123
122
|
for output in artifacts:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
# The spec in a legacy artifact is contained in the main object, so using this assignment saves us a lot
|
|
127
|
-
# of if/else in the rest of this function.
|
|
128
|
-
output_spec = output
|
|
129
|
-
else:
|
|
130
|
-
key = output.get("metadata")["key"]
|
|
131
|
-
output_spec = output.get("spec", {})
|
|
123
|
+
key = output.get("metadata")["key"]
|
|
124
|
+
output_spec = output.get("spec", {})
|
|
132
125
|
|
|
133
126
|
target = output_spec.get("target_path", "")
|
|
134
127
|
target = output_spec.get("inline", target)
|
|
@@ -655,7 +648,9 @@ def add_default_env(k8s_client, cop):
|
|
|
655
648
|
)
|
|
656
649
|
)
|
|
657
650
|
|
|
658
|
-
auth_env_var =
|
|
651
|
+
auth_env_var = (
|
|
652
|
+
mlrun.common.runtimes.constants.FunctionEnvironmentVariables.auth_session
|
|
653
|
+
)
|
|
659
654
|
if auth_env_var in os.environ or "V3IO_ACCESS_KEY" in os.environ:
|
|
660
655
|
cop.container.add_env_variable(
|
|
661
656
|
k8s_client.V1EnvVar(
|
mlrun/launcher/base.py
CHANGED
|
@@ -403,7 +403,7 @@ class BaseLauncher(abc.ABC):
|
|
|
403
403
|
)
|
|
404
404
|
if (
|
|
405
405
|
run.status.state
|
|
406
|
-
in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
|
|
406
|
+
in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
|
|
407
407
|
):
|
|
408
408
|
if runtime._is_remote and not runtime.is_child:
|
|
409
409
|
logger.error(
|