mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +18 -109
- mlrun/{runtimes/mpijob/v1alpha1.py → alerts/__init__.py} +2 -16
- mlrun/alerts/alert.py +141 -0
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +36 -253
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +20 -41
- mlrun/artifacts/model.py +8 -140
- mlrun/artifacts/plots.py +14 -375
- mlrun/common/schemas/__init__.py +4 -2
- mlrun/common/schemas/alert.py +46 -4
- mlrun/common/schemas/api_gateway.py +4 -0
- mlrun/common/schemas/artifact.py +15 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +8 -1
- mlrun/common/schemas/model_monitoring/constants.py +40 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +73 -2
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +7 -4
- mlrun/data_types/to_pandas.py +4 -4
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore_profile.py +54 -4
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/sources.py +43 -2
- mlrun/datastore/store_resources.py +2 -6
- mlrun/datastore/targets.py +106 -39
- mlrun/db/base.py +23 -3
- mlrun/db/httpdb.py +101 -47
- mlrun/db/nopdb.py +20 -2
- mlrun/errors.py +5 -0
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +4 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +2 -0
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +5 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
- mlrun/launcher/base.py +4 -3
- mlrun/launcher/client.py +1 -1
- mlrun/lists.py +4 -2
- mlrun/model.py +25 -11
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/base/store.py +9 -36
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +104 -187
- mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
- mlrun/model_monitoring/db/tsdb/base.py +135 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +404 -0
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +1 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +48 -213
- mlrun/model_monitoring/writer.py +101 -121
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +11 -7
- mlrun/projects/pipelines.py +13 -76
- mlrun/projects/project.py +73 -45
- mlrun/render.py +11 -13
- mlrun/run.py +6 -41
- mlrun/runtimes/__init__.py +3 -3
- mlrun/runtimes/base.py +6 -6
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +75 -9
- mlrun/runtimes/nuclio/function.py +9 -35
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +1 -39
- mlrun/utils/helpers.py +72 -71
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +12 -5
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +134 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/METADATA +4 -3
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/RECORD +105 -95
- mlrun/kfpops.py +0 -865
- mlrun/platforms/other.py +0 -305
- /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/top_level.txt +0 -0
|
@@ -185,6 +185,18 @@ class DatastoreProfileS3(DatastoreProfile):
|
|
|
185
185
|
assume_role_arn: typing.Optional[str] = None
|
|
186
186
|
access_key_id: typing.Optional[str] = None
|
|
187
187
|
secret_key: typing.Optional[str] = None
|
|
188
|
+
bucket: typing.Optional[str] = None
|
|
189
|
+
|
|
190
|
+
@pydantic.validator("bucket")
|
|
191
|
+
@classmethod
|
|
192
|
+
def check_bucket(cls, v):
|
|
193
|
+
if not v:
|
|
194
|
+
warnings.warn(
|
|
195
|
+
"The 'bucket' attribute will be mandatory starting from version 1.9",
|
|
196
|
+
FutureWarning,
|
|
197
|
+
stacklevel=2,
|
|
198
|
+
)
|
|
199
|
+
return v
|
|
188
200
|
|
|
189
201
|
def secrets(self) -> dict:
|
|
190
202
|
res = {}
|
|
@@ -203,7 +215,13 @@ class DatastoreProfileS3(DatastoreProfile):
|
|
|
203
215
|
return res
|
|
204
216
|
|
|
205
217
|
def url(self, subpath):
|
|
206
|
-
|
|
218
|
+
# TODO: There is an inconsistency with DatastoreProfileGCS. In DatastoreProfileGCS,
|
|
219
|
+
# we assume that the subpath can begin without a '/' character,
|
|
220
|
+
# while here we assume it always starts with one.
|
|
221
|
+
if self.bucket:
|
|
222
|
+
return f"s3://{self.bucket}{subpath}"
|
|
223
|
+
else:
|
|
224
|
+
return f"s3:/{subpath}"
|
|
207
225
|
|
|
208
226
|
|
|
209
227
|
class DatastoreProfileRedis(DatastoreProfile):
|
|
@@ -272,6 +290,18 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
272
290
|
_private_attributes = ("gcp_credentials",)
|
|
273
291
|
credentials_path: typing.Optional[str] = None # path to file.
|
|
274
292
|
gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
|
|
293
|
+
bucket: typing.Optional[str] = None
|
|
294
|
+
|
|
295
|
+
@pydantic.validator("bucket")
|
|
296
|
+
@classmethod
|
|
297
|
+
def check_bucket(cls, v):
|
|
298
|
+
if not v:
|
|
299
|
+
warnings.warn(
|
|
300
|
+
"The 'bucket' attribute will be mandatory starting from version 1.9",
|
|
301
|
+
FutureWarning,
|
|
302
|
+
stacklevel=2,
|
|
303
|
+
)
|
|
304
|
+
return v
|
|
275
305
|
|
|
276
306
|
@pydantic.validator("gcp_credentials", pre=True, always=True)
|
|
277
307
|
def convert_dict_to_json(cls, v):
|
|
@@ -280,10 +310,15 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
280
310
|
return v
|
|
281
311
|
|
|
282
312
|
def url(self, subpath) -> str:
|
|
313
|
+
# TODO: but there's something wrong with the subpath being assumed to not start with a slash here,
|
|
314
|
+
# but the opposite assumption is made in S3.
|
|
283
315
|
if subpath.startswith("/"):
|
|
284
316
|
# in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
|
|
285
317
|
subpath = subpath[1:]
|
|
286
|
-
|
|
318
|
+
if self.bucket:
|
|
319
|
+
return f"gcs://{self.bucket}/{subpath}"
|
|
320
|
+
else:
|
|
321
|
+
return f"gcs://{subpath}"
|
|
287
322
|
|
|
288
323
|
def secrets(self) -> dict:
|
|
289
324
|
res = {}
|
|
@@ -311,12 +346,27 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
|
311
346
|
client_secret: typing.Optional[str] = None
|
|
312
347
|
sas_token: typing.Optional[str] = None
|
|
313
348
|
credential: typing.Optional[str] = None
|
|
349
|
+
container: typing.Optional[str] = None
|
|
350
|
+
|
|
351
|
+
@pydantic.validator("container")
|
|
352
|
+
@classmethod
|
|
353
|
+
def check_container(cls, v):
|
|
354
|
+
if not v:
|
|
355
|
+
warnings.warn(
|
|
356
|
+
"The 'container' attribute will be mandatory starting from version 1.9",
|
|
357
|
+
FutureWarning,
|
|
358
|
+
stacklevel=2,
|
|
359
|
+
)
|
|
360
|
+
return v
|
|
314
361
|
|
|
315
362
|
def url(self, subpath) -> str:
|
|
316
363
|
if subpath.startswith("/"):
|
|
317
|
-
# in azure the path after schema is starts with
|
|
364
|
+
# in azure the path after schema is starts with container, wherefore it should not start with "/".
|
|
318
365
|
subpath = subpath[1:]
|
|
319
|
-
|
|
366
|
+
if self.container:
|
|
367
|
+
return f"az://{self.container}/{subpath}"
|
|
368
|
+
else:
|
|
369
|
+
return f"az://{subpath}"
|
|
320
370
|
|
|
321
371
|
def secrets(self) -> dict:
|
|
322
372
|
res = {}
|
mlrun/datastore/inmem.py
CHANGED
|
@@ -80,8 +80,8 @@ class InMemoryStore(DataStore):
|
|
|
80
80
|
reader = df_module.read_json
|
|
81
81
|
else:
|
|
82
82
|
raise mlrun.errors.MLRunInvalidArgumentError(f"file type unhandled {url}")
|
|
83
|
-
# InMemoryStore store
|
|
84
|
-
for field in ["time_column", "start_time", "end_time"]:
|
|
83
|
+
# InMemoryStore store – don't pass filters
|
|
84
|
+
for field in ["time_column", "start_time", "end_time", "additional_filters"]:
|
|
85
85
|
kwargs.pop(field, None)
|
|
86
86
|
|
|
87
87
|
return reader(item, **kwargs)
|
mlrun/datastore/sources.py
CHANGED
|
@@ -102,8 +102,12 @@ class BaseSourceDriver(DataSource):
|
|
|
102
102
|
start_time=None,
|
|
103
103
|
end_time=None,
|
|
104
104
|
time_field=None,
|
|
105
|
+
additional_filters=None,
|
|
105
106
|
):
|
|
106
107
|
"""return the source data as dataframe"""
|
|
108
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
109
|
+
additional_filters, self.__class__
|
|
110
|
+
)
|
|
107
111
|
return mlrun.store_manager.object(url=self.path).as_df(
|
|
108
112
|
columns=columns,
|
|
109
113
|
df_module=df_module,
|
|
@@ -245,7 +249,11 @@ class CSVSource(BaseSourceDriver):
|
|
|
245
249
|
start_time=None,
|
|
246
250
|
end_time=None,
|
|
247
251
|
time_field=None,
|
|
252
|
+
additional_filters=None,
|
|
248
253
|
):
|
|
254
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
255
|
+
additional_filters, self.__class__
|
|
256
|
+
)
|
|
249
257
|
reader_args = self.attributes.get("reader_args", {})
|
|
250
258
|
return mlrun.store_manager.object(url=self.path).as_df(
|
|
251
259
|
columns=columns,
|
|
@@ -281,6 +289,12 @@ class ParquetSource(BaseSourceDriver):
|
|
|
281
289
|
:parameter start_time: filters out data before this time
|
|
282
290
|
:parameter end_time: filters out data after this time
|
|
283
291
|
:parameter attributes: additional parameters to pass to storey.
|
|
292
|
+
:param additional_filters: List of additional_filter conditions as tuples.
|
|
293
|
+
Each tuple should be in the format (column_name, operator, value).
|
|
294
|
+
Supported operators: "=", ">=", "<=", ">", "<".
|
|
295
|
+
Example: [("Product", "=", "Computer")]
|
|
296
|
+
For all supported filters, please see:
|
|
297
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
284
298
|
"""
|
|
285
299
|
|
|
286
300
|
kind = "parquet"
|
|
@@ -297,6 +311,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
297
311
|
schedule: str = None,
|
|
298
312
|
start_time: Optional[Union[datetime, str]] = None,
|
|
299
313
|
end_time: Optional[Union[datetime, str]] = None,
|
|
314
|
+
additional_filters: Optional[list[tuple]] = None,
|
|
300
315
|
):
|
|
301
316
|
super().__init__(
|
|
302
317
|
name,
|
|
@@ -308,6 +323,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
308
323
|
start_time,
|
|
309
324
|
end_time,
|
|
310
325
|
)
|
|
326
|
+
self.additional_filters = additional_filters
|
|
311
327
|
|
|
312
328
|
@property
|
|
313
329
|
def start_time(self):
|
|
@@ -341,6 +357,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
341
357
|
start_time=None,
|
|
342
358
|
end_time=None,
|
|
343
359
|
context=None,
|
|
360
|
+
additional_filters=None,
|
|
344
361
|
):
|
|
345
362
|
import storey
|
|
346
363
|
|
|
@@ -358,6 +375,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
358
375
|
end_filter=self.end_time,
|
|
359
376
|
start_filter=self.start_time,
|
|
360
377
|
filter_column=self.time_field or time_field,
|
|
378
|
+
additional_filters=self.additional_filters or additional_filters,
|
|
361
379
|
**attributes,
|
|
362
380
|
)
|
|
363
381
|
|
|
@@ -380,6 +398,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
380
398
|
start_time=None,
|
|
381
399
|
end_time=None,
|
|
382
400
|
time_field=None,
|
|
401
|
+
additional_filters=None,
|
|
383
402
|
):
|
|
384
403
|
reader_args = self.attributes.get("reader_args", {})
|
|
385
404
|
return mlrun.store_manager.object(url=self.path).as_df(
|
|
@@ -389,6 +408,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
389
408
|
end_time=end_time or self.end_time,
|
|
390
409
|
time_column=time_field or self.time_field,
|
|
391
410
|
format="parquet",
|
|
411
|
+
additional_filters=additional_filters or self.additional_filters,
|
|
392
412
|
**reader_args,
|
|
393
413
|
)
|
|
394
414
|
|
|
@@ -519,10 +539,15 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
519
539
|
start_time=None,
|
|
520
540
|
end_time=None,
|
|
521
541
|
time_field=None,
|
|
542
|
+
additional_filters=None,
|
|
522
543
|
):
|
|
523
544
|
from google.cloud import bigquery
|
|
524
545
|
from google.cloud.bigquery_storage_v1 import BigQueryReadClient
|
|
525
546
|
|
|
547
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
548
|
+
additional_filters, self.__class__
|
|
549
|
+
)
|
|
550
|
+
|
|
526
551
|
def schema_to_dtypes(schema):
|
|
527
552
|
from mlrun.data_types.data_types import gbq_to_pandas_dtype
|
|
528
553
|
|
|
@@ -562,7 +587,6 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
562
587
|
else:
|
|
563
588
|
df = rows_iterator.to_dataframe(dtypes=dtypes)
|
|
564
589
|
|
|
565
|
-
# TODO : filter as part of the query
|
|
566
590
|
return select_columns_from_df(
|
|
567
591
|
filter_df_start_end_time(
|
|
568
592
|
df,
|
|
@@ -740,7 +764,19 @@ class DataFrameSource:
|
|
|
740
764
|
context=self.context or context,
|
|
741
765
|
)
|
|
742
766
|
|
|
743
|
-
def to_dataframe(
|
|
767
|
+
def to_dataframe(
|
|
768
|
+
self,
|
|
769
|
+
columns=None,
|
|
770
|
+
df_module=None,
|
|
771
|
+
entities=None,
|
|
772
|
+
start_time=None,
|
|
773
|
+
end_time=None,
|
|
774
|
+
time_field=None,
|
|
775
|
+
additional_filters=None,
|
|
776
|
+
):
|
|
777
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
778
|
+
additional_filters, self.__class__
|
|
779
|
+
)
|
|
744
780
|
return self._df
|
|
745
781
|
|
|
746
782
|
def is_iterator(self):
|
|
@@ -935,6 +971,7 @@ class KafkaSource(OnlineSource):
|
|
|
935
971
|
start_time=None,
|
|
936
972
|
end_time=None,
|
|
937
973
|
time_field=None,
|
|
974
|
+
additional_filters=None,
|
|
938
975
|
):
|
|
939
976
|
raise mlrun.MLRunInvalidArgumentError(
|
|
940
977
|
"KafkaSource does not support batch processing"
|
|
@@ -1075,9 +1112,13 @@ class SQLSource(BaseSourceDriver):
|
|
|
1075
1112
|
start_time=None,
|
|
1076
1113
|
end_time=None,
|
|
1077
1114
|
time_field=None,
|
|
1115
|
+
additional_filters=None,
|
|
1078
1116
|
):
|
|
1079
1117
|
import sqlalchemy as sqlalchemy
|
|
1080
1118
|
|
|
1119
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
1120
|
+
additional_filters, self.__class__
|
|
1121
|
+
)
|
|
1081
1122
|
db_path = self.attributes.get("db_path")
|
|
1082
1123
|
table_name = self.attributes.get("table_name")
|
|
1083
1124
|
parse_dates = self.attributes.get("parse_dates")
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
import mlrun
|
|
18
18
|
import mlrun.artifacts
|
|
19
19
|
from mlrun.config import config
|
|
20
|
-
from mlrun.utils.helpers import
|
|
20
|
+
from mlrun.utils.helpers import parse_artifact_uri
|
|
21
21
|
|
|
22
22
|
from ..common.helpers import parse_versioned_object_uri
|
|
23
23
|
from ..platforms.iguazio import parse_path
|
|
@@ -167,11 +167,7 @@ def get_store_resource(
|
|
|
167
167
|
)
|
|
168
168
|
if resource.get("kind", "") == "link":
|
|
169
169
|
# todo: support other link types (not just iter, move this to the db/api layer
|
|
170
|
-
link_iteration = (
|
|
171
|
-
resource.get("link_iteration", 0)
|
|
172
|
-
if is_legacy_artifact(resource)
|
|
173
|
-
else resource["spec"].get("link_iteration", 0)
|
|
174
|
-
)
|
|
170
|
+
link_iteration = resource["spec"].get("link_iteration", 0)
|
|
175
171
|
|
|
176
172
|
resource = db.read_artifact(
|
|
177
173
|
key,
|
mlrun/datastore/targets.py
CHANGED
|
@@ -656,6 +656,29 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
656
656
|
def _target_path_object(self):
|
|
657
657
|
"""return the actual/computed target path"""
|
|
658
658
|
is_single_file = hasattr(self, "is_single_file") and self.is_single_file()
|
|
659
|
+
|
|
660
|
+
if self._resource and self.path:
|
|
661
|
+
parsed_url = urlparse(self.path)
|
|
662
|
+
# When the URL consists only from scheme and endpoint and no path,
|
|
663
|
+
# make a default path for DS and redis targets.
|
|
664
|
+
# Also ignore KafkaTarget when it uses the ds scheme (no default path for KafkaTarget)
|
|
665
|
+
if (
|
|
666
|
+
not isinstance(self, KafkaTarget)
|
|
667
|
+
and parsed_url.scheme in ["ds", "redis", "rediss"]
|
|
668
|
+
and (not parsed_url.path or parsed_url.path == "/")
|
|
669
|
+
):
|
|
670
|
+
return TargetPathObject(
|
|
671
|
+
_get_target_path(
|
|
672
|
+
self,
|
|
673
|
+
self._resource,
|
|
674
|
+
self.run_id is not None,
|
|
675
|
+
netloc=parsed_url.netloc,
|
|
676
|
+
scheme=parsed_url.scheme,
|
|
677
|
+
),
|
|
678
|
+
self.run_id,
|
|
679
|
+
is_single_file,
|
|
680
|
+
)
|
|
681
|
+
|
|
659
682
|
return self.get_path() or (
|
|
660
683
|
TargetPathObject(
|
|
661
684
|
_get_target_path(self, self._resource, self.run_id is not None),
|
|
@@ -714,9 +737,13 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
714
737
|
start_time=None,
|
|
715
738
|
end_time=None,
|
|
716
739
|
time_column=None,
|
|
740
|
+
additional_filters=None,
|
|
717
741
|
**kwargs,
|
|
718
742
|
):
|
|
719
743
|
"""return the target data as dataframe"""
|
|
744
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
745
|
+
additional_filters, self.__class__
|
|
746
|
+
)
|
|
720
747
|
return mlrun.get_dataitem(self.get_target_path()).as_df(
|
|
721
748
|
columns=columns,
|
|
722
749
|
df_module=df_module,
|
|
@@ -961,6 +988,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
961
988
|
start_time=None,
|
|
962
989
|
end_time=None,
|
|
963
990
|
time_column=None,
|
|
991
|
+
additional_filters=None,
|
|
964
992
|
**kwargs,
|
|
965
993
|
):
|
|
966
994
|
"""return the target data as dataframe"""
|
|
@@ -971,6 +999,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
971
999
|
start_time=start_time,
|
|
972
1000
|
end_time=end_time,
|
|
973
1001
|
time_column=time_column,
|
|
1002
|
+
additional_filters=additional_filters,
|
|
974
1003
|
**kwargs,
|
|
975
1004
|
)
|
|
976
1005
|
if not columns:
|
|
@@ -1101,8 +1130,12 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1101
1130
|
start_time=None,
|
|
1102
1131
|
end_time=None,
|
|
1103
1132
|
time_column=None,
|
|
1133
|
+
additional_filters=None,
|
|
1104
1134
|
**kwargs,
|
|
1105
1135
|
):
|
|
1136
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
1137
|
+
additional_filters, self.__class__
|
|
1138
|
+
)
|
|
1106
1139
|
df = super().as_df(
|
|
1107
1140
|
columns=columns,
|
|
1108
1141
|
df_module=df_module,
|
|
@@ -1209,6 +1242,7 @@ class SnowflakeTarget(BaseStoreTarget):
|
|
|
1209
1242
|
start_time=None,
|
|
1210
1243
|
end_time=None,
|
|
1211
1244
|
time_column=None,
|
|
1245
|
+
additional_filters=None,
|
|
1212
1246
|
**kwargs,
|
|
1213
1247
|
):
|
|
1214
1248
|
raise NotImplementedError()
|
|
@@ -1275,7 +1309,17 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1275
1309
|
def get_dask_options(self):
|
|
1276
1310
|
return {"format": "csv"}
|
|
1277
1311
|
|
|
1278
|
-
def as_df(
|
|
1312
|
+
def as_df(
|
|
1313
|
+
self,
|
|
1314
|
+
columns=None,
|
|
1315
|
+
df_module=None,
|
|
1316
|
+
entities=None,
|
|
1317
|
+
start_time=None,
|
|
1318
|
+
end_time=None,
|
|
1319
|
+
time_column=None,
|
|
1320
|
+
additional_filters=None,
|
|
1321
|
+
**kwargs,
|
|
1322
|
+
):
|
|
1279
1323
|
raise NotImplementedError()
|
|
1280
1324
|
|
|
1281
1325
|
def write_dataframe(
|
|
@@ -1390,39 +1434,6 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1390
1434
|
support_spark = True
|
|
1391
1435
|
writer_step_name = "RedisNoSqlTarget"
|
|
1392
1436
|
|
|
1393
|
-
@property
|
|
1394
|
-
def _target_path_object(self):
|
|
1395
|
-
url = self.path or mlrun.mlconf.redis.url
|
|
1396
|
-
if self._resource and url:
|
|
1397
|
-
parsed_url = urlparse(url)
|
|
1398
|
-
if not parsed_url.path or parsed_url.path == "/":
|
|
1399
|
-
kind_prefix = (
|
|
1400
|
-
"sets"
|
|
1401
|
-
if self._resource.kind
|
|
1402
|
-
== mlrun.common.schemas.ObjectKind.feature_set
|
|
1403
|
-
else "vectors"
|
|
1404
|
-
)
|
|
1405
|
-
kind = self.kind
|
|
1406
|
-
name = self._resource.metadata.name
|
|
1407
|
-
project = (
|
|
1408
|
-
self._resource.metadata.project or mlrun.mlconf.default_project
|
|
1409
|
-
)
|
|
1410
|
-
data_prefix = get_default_prefix_for_target(kind).format(
|
|
1411
|
-
ds_profile_name=parsed_url.netloc,
|
|
1412
|
-
authority=parsed_url.netloc,
|
|
1413
|
-
project=project,
|
|
1414
|
-
kind=kind,
|
|
1415
|
-
name=name,
|
|
1416
|
-
)
|
|
1417
|
-
if url.startswith("rediss://"):
|
|
1418
|
-
data_prefix = data_prefix.replace("redis://", "rediss://", 1)
|
|
1419
|
-
if not self.run_id:
|
|
1420
|
-
version = self._resource.metadata.tag or "latest"
|
|
1421
|
-
name = f"{name}-{version}"
|
|
1422
|
-
url = f"{data_prefix}/{kind_prefix}/{name}"
|
|
1423
|
-
return TargetPathObject(url, self.run_id, False)
|
|
1424
|
-
return super()._target_path_object
|
|
1425
|
-
|
|
1426
1437
|
# Fetch server url from the RedisNoSqlTarget::__init__() 'path' parameter.
|
|
1427
1438
|
# If not set fetch it from 'mlrun.mlconf.redis.url' (MLRUN_REDIS__URL environment variable).
|
|
1428
1439
|
# Then look for username and password at REDIS_xxx secrets
|
|
@@ -1544,7 +1555,17 @@ class StreamTarget(BaseStoreTarget):
|
|
|
1544
1555
|
**self.attributes,
|
|
1545
1556
|
)
|
|
1546
1557
|
|
|
1547
|
-
def as_df(
|
|
1558
|
+
def as_df(
|
|
1559
|
+
self,
|
|
1560
|
+
columns=None,
|
|
1561
|
+
df_module=None,
|
|
1562
|
+
entities=None,
|
|
1563
|
+
start_time=None,
|
|
1564
|
+
end_time=None,
|
|
1565
|
+
time_column=None,
|
|
1566
|
+
additional_filters=None,
|
|
1567
|
+
**kwargs,
|
|
1568
|
+
):
|
|
1548
1569
|
raise NotImplementedError()
|
|
1549
1570
|
|
|
1550
1571
|
|
|
@@ -1649,7 +1670,17 @@ class KafkaTarget(BaseStoreTarget):
|
|
|
1649
1670
|
**attributes,
|
|
1650
1671
|
)
|
|
1651
1672
|
|
|
1652
|
-
def as_df(
|
|
1673
|
+
def as_df(
|
|
1674
|
+
self,
|
|
1675
|
+
columns=None,
|
|
1676
|
+
df_module=None,
|
|
1677
|
+
entities=None,
|
|
1678
|
+
start_time=None,
|
|
1679
|
+
end_time=None,
|
|
1680
|
+
time_column=None,
|
|
1681
|
+
additional_filters=None,
|
|
1682
|
+
**kwargs,
|
|
1683
|
+
):
|
|
1653
1684
|
raise NotImplementedError()
|
|
1654
1685
|
|
|
1655
1686
|
def purge(self):
|
|
@@ -1696,7 +1727,17 @@ class TSDBTarget(BaseStoreTarget):
|
|
|
1696
1727
|
**self.attributes,
|
|
1697
1728
|
)
|
|
1698
1729
|
|
|
1699
|
-
def as_df(
|
|
1730
|
+
def as_df(
|
|
1731
|
+
self,
|
|
1732
|
+
columns=None,
|
|
1733
|
+
df_module=None,
|
|
1734
|
+
entities=None,
|
|
1735
|
+
start_time=None,
|
|
1736
|
+
end_time=None,
|
|
1737
|
+
time_column=None,
|
|
1738
|
+
additional_filters=None,
|
|
1739
|
+
**kwargs,
|
|
1740
|
+
):
|
|
1700
1741
|
raise NotImplementedError()
|
|
1701
1742
|
|
|
1702
1743
|
def write_dataframe(
|
|
@@ -1807,11 +1848,16 @@ class DFTarget(BaseStoreTarget):
|
|
|
1807
1848
|
self,
|
|
1808
1849
|
columns=None,
|
|
1809
1850
|
df_module=None,
|
|
1851
|
+
entities=None,
|
|
1810
1852
|
start_time=None,
|
|
1811
1853
|
end_time=None,
|
|
1812
1854
|
time_column=None,
|
|
1855
|
+
additional_filters=None,
|
|
1813
1856
|
**kwargs,
|
|
1814
1857
|
):
|
|
1858
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
1859
|
+
additional_filters, self.__class__
|
|
1860
|
+
)
|
|
1815
1861
|
return select_columns_from_df(
|
|
1816
1862
|
filter_df_start_end_time(
|
|
1817
1863
|
self._df,
|
|
@@ -1986,6 +2032,7 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1986
2032
|
start_time=None,
|
|
1987
2033
|
end_time=None,
|
|
1988
2034
|
time_column=None,
|
|
2035
|
+
additional_filters=None,
|
|
1989
2036
|
**kwargs,
|
|
1990
2037
|
):
|
|
1991
2038
|
try:
|
|
@@ -1994,6 +2041,10 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1994
2041
|
except (ModuleNotFoundError, ImportError) as exc:
|
|
1995
2042
|
self._raise_sqlalchemy_import_error(exc)
|
|
1996
2043
|
|
|
2044
|
+
mlrun.utils.helpers.additional_filters_warning(
|
|
2045
|
+
additional_filters, self.__class__
|
|
2046
|
+
)
|
|
2047
|
+
|
|
1997
2048
|
db_path, table_name, _, _, _, _ = self._parse_url()
|
|
1998
2049
|
engine = sqlalchemy.create_engine(db_path)
|
|
1999
2050
|
parse_dates: Optional[list[str]] = self.attributes.get("parse_dates")
|
|
@@ -2140,7 +2191,7 @@ kind_to_driver = {
|
|
|
2140
2191
|
}
|
|
2141
2192
|
|
|
2142
2193
|
|
|
2143
|
-
def _get_target_path(driver, resource, run_id_mode=False):
|
|
2194
|
+
def _get_target_path(driver, resource, run_id_mode=False, netloc=None, scheme=""):
|
|
2144
2195
|
"""return the default target path given the resource and target kind"""
|
|
2145
2196
|
kind = driver.kind
|
|
2146
2197
|
suffix = driver.suffix
|
|
@@ -2157,11 +2208,27 @@ def _get_target_path(driver, resource, run_id_mode=False):
|
|
|
2157
2208
|
)
|
|
2158
2209
|
name = resource.metadata.name
|
|
2159
2210
|
project = resource.metadata.project or mlrun.mlconf.default_project
|
|
2160
|
-
|
|
2211
|
+
|
|
2212
|
+
default_kind_name = kind
|
|
2213
|
+
if scheme == "ds":
|
|
2214
|
+
# "dsnosql" is not an actual target like Parquet or Redis; rather, it serves
|
|
2215
|
+
# as a placeholder that can be used in any specified target
|
|
2216
|
+
default_kind_name = "dsnosql"
|
|
2217
|
+
if scheme == "redis" or scheme == "rediss":
|
|
2218
|
+
default_kind_name = TargetTypes.redisnosql
|
|
2219
|
+
|
|
2220
|
+
netloc = netloc or ""
|
|
2221
|
+
data_prefix = get_default_prefix_for_target(default_kind_name).format(
|
|
2222
|
+
ds_profile_name=netloc, # In case of ds profile, set its the name
|
|
2223
|
+
authority=netloc, # In case of redis, replace {authority} with netloc
|
|
2161
2224
|
project=project,
|
|
2162
2225
|
kind=kind,
|
|
2163
2226
|
name=name,
|
|
2164
2227
|
)
|
|
2228
|
+
|
|
2229
|
+
if scheme == "rediss":
|
|
2230
|
+
data_prefix = data_prefix.replace("redis://", "rediss://", 1)
|
|
2231
|
+
|
|
2165
2232
|
# todo: handle ver tag changes, may need to copy files?
|
|
2166
2233
|
if not run_id_mode:
|
|
2167
2234
|
version = resource.metadata.tag
|
mlrun/db/base.py
CHANGED
|
@@ -16,6 +16,7 @@ import datetime
|
|
|
16
16
|
from abc import ABC, abstractmethod
|
|
17
17
|
from typing import Optional, Union
|
|
18
18
|
|
|
19
|
+
import mlrun.alerts
|
|
19
20
|
import mlrun.common.schemas
|
|
20
21
|
import mlrun.model_monitoring
|
|
21
22
|
|
|
@@ -117,7 +118,18 @@ class RunDBInterface(ABC):
|
|
|
117
118
|
pass
|
|
118
119
|
|
|
119
120
|
@abstractmethod
|
|
120
|
-
def del_artifact(
|
|
121
|
+
def del_artifact(
|
|
122
|
+
self,
|
|
123
|
+
key,
|
|
124
|
+
tag="",
|
|
125
|
+
project="",
|
|
126
|
+
tree=None,
|
|
127
|
+
uid=None,
|
|
128
|
+
deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
|
|
129
|
+
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
130
|
+
),
|
|
131
|
+
secrets: dict = None,
|
|
132
|
+
):
|
|
121
133
|
pass
|
|
122
134
|
|
|
123
135
|
@abstractmethod
|
|
@@ -543,7 +555,7 @@ class RunDBInterface(ABC):
|
|
|
543
555
|
end: Optional[str] = None,
|
|
544
556
|
metrics: Optional[list[str]] = None,
|
|
545
557
|
features: bool = False,
|
|
546
|
-
):
|
|
558
|
+
) -> mlrun.model_monitoring.ModelEndpoint:
|
|
547
559
|
pass
|
|
548
560
|
|
|
549
561
|
@abstractmethod
|
|
@@ -664,7 +676,7 @@ class RunDBInterface(ABC):
|
|
|
664
676
|
def store_alert_config(
|
|
665
677
|
self,
|
|
666
678
|
alert_name: str,
|
|
667
|
-
alert_data: Union[dict, mlrun.
|
|
679
|
+
alert_data: Union[dict, mlrun.alerts.alert.AlertConfig],
|
|
668
680
|
project="",
|
|
669
681
|
):
|
|
670
682
|
pass
|
|
@@ -685,6 +697,14 @@ class RunDBInterface(ABC):
|
|
|
685
697
|
def reset_alert_config(self, alert_name: str, project=""):
|
|
686
698
|
pass
|
|
687
699
|
|
|
700
|
+
@abstractmethod
|
|
701
|
+
def get_alert_template(self, template_name: str):
|
|
702
|
+
pass
|
|
703
|
+
|
|
704
|
+
@abstractmethod
|
|
705
|
+
def list_alert_templates(self):
|
|
706
|
+
pass
|
|
707
|
+
|
|
688
708
|
@abstractmethod
|
|
689
709
|
def get_builder_status(
|
|
690
710
|
self,
|