mlrun 1.7.0rc26__py3-none-any.whl → 1.7.0rc31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +7 -7
- mlrun/alerts/alert.py +13 -1
- mlrun/artifacts/manager.py +5 -0
- mlrun/common/constants.py +3 -3
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/common/formatters/base.py +9 -9
- mlrun/common/schemas/alert.py +4 -8
- mlrun/common/schemas/api_gateway.py +7 -0
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +32 -13
- mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -12
- mlrun/common/schemas/project.py +10 -9
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/config.py +37 -11
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +48 -16
- mlrun/datastore/__init__.py +1 -0
- mlrun/datastore/azure_blob.py +2 -1
- mlrun/datastore/base.py +21 -13
- mlrun/datastore/datastore.py +7 -5
- mlrun/datastore/datastore_profile.py +1 -1
- mlrun/datastore/google_cloud_storage.py +1 -0
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/s3.py +2 -0
- mlrun/datastore/snowflake_utils.py +3 -1
- mlrun/datastore/sources.py +40 -11
- mlrun/datastore/store_resources.py +2 -0
- mlrun/datastore/targets.py +71 -26
- mlrun/db/base.py +11 -0
- mlrun/db/httpdb.py +50 -31
- mlrun/db/nopdb.py +11 -1
- mlrun/errors.py +4 -0
- mlrun/execution.py +18 -10
- mlrun/feature_store/retrieval/spark_merger.py +4 -32
- mlrun/launcher/local.py +2 -2
- mlrun/model.py +27 -1
- mlrun/model_monitoring/api.py +9 -55
- mlrun/model_monitoring/applications/histogram_data_drift.py +4 -1
- mlrun/model_monitoring/controller.py +57 -73
- mlrun/model_monitoring/db/stores/__init__.py +21 -9
- mlrun/model_monitoring/db/stores/base/store.py +39 -1
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +4 -2
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +41 -80
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +22 -27
- mlrun/model_monitoring/db/tsdb/__init__.py +19 -14
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +4 -2
- mlrun/model_monitoring/helpers.py +15 -17
- mlrun/model_monitoring/writer.py +2 -7
- mlrun/projects/operations.py +1 -0
- mlrun/projects/project.py +87 -75
- mlrun/render.py +10 -5
- mlrun/run.py +7 -7
- mlrun/runtimes/base.py +1 -1
- mlrun/runtimes/daskjob.py +7 -1
- mlrun/runtimes/local.py +24 -7
- mlrun/runtimes/nuclio/function.py +20 -0
- mlrun/runtimes/pod.py +5 -29
- mlrun/serving/routers.py +75 -59
- mlrun/serving/server.py +1 -0
- mlrun/serving/v2_serving.py +8 -1
- mlrun/utils/helpers.py +46 -2
- mlrun/utils/logger.py +36 -2
- mlrun/utils/notifications/notification/base.py +4 -0
- mlrun/utils/notifications/notification/git.py +21 -0
- mlrun/utils/notifications/notification/slack.py +8 -0
- mlrun/utils/notifications/notification/webhook.py +41 -1
- mlrun/utils/notifications/notification_pusher.py +2 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/METADATA +13 -8
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/RECORD +76 -78
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/model_monitoring/controller_handler.py +0 -37
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc26.dist-info → mlrun-1.7.0rc31.dist-info}/top_level.txt +0 -0
mlrun/datastore/base.py
CHANGED
|
@@ -215,6 +215,11 @@ class DataStore:
|
|
|
215
215
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
216
216
|
"When providing start_time or end_time, must provide time_column"
|
|
217
217
|
)
|
|
218
|
+
if start_time and end_time and start_time.tzinfo != end_time.tzinfo:
|
|
219
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
220
|
+
"start_time and end_time must have the same time zone"
|
|
221
|
+
)
|
|
222
|
+
|
|
218
223
|
if start_time or end_time or additional_filters:
|
|
219
224
|
partitions_time_attributes = find_partitions(url, file_system)
|
|
220
225
|
set_filters(
|
|
@@ -232,13 +237,17 @@ class DataStore:
|
|
|
232
237
|
):
|
|
233
238
|
raise ex
|
|
234
239
|
|
|
235
|
-
|
|
236
|
-
if start_time
|
|
237
|
-
start_time_inner = start_time.replace(
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
240
|
+
start_time_inner = None
|
|
241
|
+
if start_time:
|
|
242
|
+
start_time_inner = start_time.replace(
|
|
243
|
+
tzinfo=None if start_time.tzinfo else pytz.utc
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
end_time_inner = None
|
|
247
|
+
if end_time:
|
|
248
|
+
end_time_inner = end_time.replace(
|
|
249
|
+
tzinfo=None if end_time.tzinfo else pytz.utc
|
|
250
|
+
)
|
|
242
251
|
|
|
243
252
|
set_filters(
|
|
244
253
|
partitions_time_attributes,
|
|
@@ -319,11 +328,7 @@ class DataStore:
|
|
|
319
328
|
dfs.append(df_module.read_csv(*updated_args, **kwargs))
|
|
320
329
|
return df_module.concat(dfs)
|
|
321
330
|
|
|
322
|
-
elif (
|
|
323
|
-
file_url.endswith(".parquet")
|
|
324
|
-
or file_url.endswith(".pq")
|
|
325
|
-
or format == "parquet"
|
|
326
|
-
):
|
|
331
|
+
elif mlrun.utils.helpers.is_parquet_file(file_url, format):
|
|
327
332
|
if columns:
|
|
328
333
|
kwargs["columns"] = columns
|
|
329
334
|
|
|
@@ -386,7 +391,10 @@ class DataStore:
|
|
|
386
391
|
}
|
|
387
392
|
|
|
388
393
|
def rm(self, path, recursive=False, maxdepth=None):
|
|
389
|
-
|
|
394
|
+
try:
|
|
395
|
+
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
396
|
+
except FileNotFoundError:
|
|
397
|
+
pass
|
|
390
398
|
|
|
391
399
|
@staticmethod
|
|
392
400
|
def _is_dd(df_module):
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -21,7 +21,7 @@ from mlrun.datastore.datastore_profile import datastore_profile_read
|
|
|
21
21
|
from mlrun.errors import err_to_str
|
|
22
22
|
from mlrun.utils.helpers import get_local_file_schema
|
|
23
23
|
|
|
24
|
-
from ..utils import DB_SCHEMA,
|
|
24
|
+
from ..utils import DB_SCHEMA, RunKeys
|
|
25
25
|
from .base import DataItem, DataStore, HttpStore
|
|
26
26
|
from .filestore import FileStore
|
|
27
27
|
from .inmem import InMemoryStore
|
|
@@ -32,6 +32,8 @@ in_memory_store = InMemoryStore()
|
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def parse_url(url):
|
|
35
|
+
if url and url.startswith("v3io://") and not url.startswith("v3io:///"):
|
|
36
|
+
url = url.replace("v3io://", "v3io:///", 1)
|
|
35
37
|
parsed_url = urlparse(url)
|
|
36
38
|
schema = parsed_url.scheme.lower()
|
|
37
39
|
endpoint = parsed_url.hostname
|
|
@@ -94,7 +96,7 @@ def schema_to_store(schema):
|
|
|
94
96
|
from .dbfs_store import DBFSStore
|
|
95
97
|
|
|
96
98
|
return DBFSStore
|
|
97
|
-
elif schema
|
|
99
|
+
elif schema in ["hdfs", "webhdfs"]:
|
|
98
100
|
from .hdfs import HdfsStore
|
|
99
101
|
|
|
100
102
|
return HdfsStore
|
|
@@ -133,7 +135,7 @@ class StoreManager:
|
|
|
133
135
|
return self._db
|
|
134
136
|
|
|
135
137
|
def from_dict(self, struct: dict):
|
|
136
|
-
stor_list = struct.get(
|
|
138
|
+
stor_list = struct.get(RunKeys.data_stores)
|
|
137
139
|
if stor_list and isinstance(stor_list, list):
|
|
138
140
|
for stor in stor_list:
|
|
139
141
|
schema, endpoint, parsed_url = parse_url(stor.get("url"))
|
|
@@ -145,7 +147,7 @@ class StoreManager:
|
|
|
145
147
|
self._stores[stor["name"]] = new_stor
|
|
146
148
|
|
|
147
149
|
def to_dict(self, struct):
|
|
148
|
-
struct[
|
|
150
|
+
struct[RunKeys.data_stores] = [
|
|
149
151
|
stor.to_dict() for stor in self._stores.values() if stor.from_spec
|
|
150
152
|
]
|
|
151
153
|
|
|
@@ -207,7 +209,7 @@ class StoreManager:
|
|
|
207
209
|
) -> (DataStore, str, str):
|
|
208
210
|
schema, endpoint, parsed_url = parse_url(url)
|
|
209
211
|
subpath = parsed_url.path
|
|
210
|
-
store_key = f"{schema}://{endpoint}"
|
|
212
|
+
store_key = f"{schema}://{endpoint}" if endpoint else f"{schema}://"
|
|
211
213
|
|
|
212
214
|
if schema == "ds":
|
|
213
215
|
datastore_profile = datastore_profile_read(url, project_name, secrets)
|
|
@@ -412,7 +412,7 @@ class DatastoreProfileHdfs(DatastoreProfile):
|
|
|
412
412
|
return res or None
|
|
413
413
|
|
|
414
414
|
def url(self, subpath):
|
|
415
|
-
return f"
|
|
415
|
+
return f"webhdfs://{self.host}:{self.http_port}{subpath}"
|
|
416
416
|
|
|
417
417
|
|
|
418
418
|
class DatastoreProfile2Json(pydantic.BaseModel):
|
|
@@ -133,6 +133,7 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
133
133
|
|
|
134
134
|
def rm(self, path, recursive=False, maxdepth=None):
|
|
135
135
|
path = self._make_path(path)
|
|
136
|
+
self.filesystem.exists(path)
|
|
136
137
|
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
137
138
|
|
|
138
139
|
def get_spark_options(self):
|
mlrun/datastore/inmem.py
CHANGED
|
@@ -72,7 +72,7 @@ class InMemoryStore(DataStore):
|
|
|
72
72
|
if columns:
|
|
73
73
|
kwargs["usecols"] = columns
|
|
74
74
|
reader = df_module.read_csv
|
|
75
|
-
elif
|
|
75
|
+
elif mlrun.utils.helpers.is_parquet_file(url, format):
|
|
76
76
|
if columns:
|
|
77
77
|
kwargs["columns"] = columns
|
|
78
78
|
reader = df_module.read_parquet
|
|
@@ -85,3 +85,6 @@ class InMemoryStore(DataStore):
|
|
|
85
85
|
kwargs.pop(field, None)
|
|
86
86
|
|
|
87
87
|
return reader(item, **kwargs)
|
|
88
|
+
|
|
89
|
+
def rm(self, path, recursive=False, maxdepth=None):
|
|
90
|
+
self._items.pop(path, None)
|
mlrun/datastore/s3.py
CHANGED
|
@@ -201,6 +201,8 @@ class S3Store(DataStore):
|
|
|
201
201
|
def rm(self, path, recursive=False, maxdepth=None):
|
|
202
202
|
bucket, key = self.get_bucket_and_key(path)
|
|
203
203
|
path = f"{bucket}/{key}"
|
|
204
|
+
# In order to raise an error if there is connection error, ML-7056.
|
|
205
|
+
self.filesystem.exists(path=path)
|
|
204
206
|
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
205
207
|
|
|
206
208
|
|
|
@@ -30,13 +30,15 @@ def get_snowflake_password():
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
def get_snowflake_spark_options(attributes):
|
|
33
|
+
if not attributes:
|
|
34
|
+
return {}
|
|
33
35
|
return {
|
|
34
36
|
"format": "net.snowflake.spark.snowflake",
|
|
35
37
|
"sfURL": attributes.get("url"),
|
|
36
38
|
"sfUser": attributes.get("user"),
|
|
37
39
|
"sfPassword": get_snowflake_password(),
|
|
38
40
|
"sfDatabase": attributes.get("database"),
|
|
39
|
-
"sfSchema": attributes.get("
|
|
41
|
+
"sfSchema": attributes.get("db_schema"),
|
|
40
42
|
"sfWarehouse": attributes.get("warehouse"),
|
|
41
43
|
"application": "iguazio_platform",
|
|
42
44
|
"TIMESTAMP_TYPE_MAPPING": "TIMESTAMP_LTZ",
|
mlrun/datastore/sources.py
CHANGED
|
@@ -747,7 +747,7 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
747
747
|
url="...",
|
|
748
748
|
user="...",
|
|
749
749
|
database="...",
|
|
750
|
-
|
|
750
|
+
db_schema="...",
|
|
751
751
|
warehouse="...",
|
|
752
752
|
)
|
|
753
753
|
|
|
@@ -762,7 +762,8 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
762
762
|
:parameter url: URL of the snowflake cluster
|
|
763
763
|
:parameter user: snowflake user
|
|
764
764
|
:parameter database: snowflake database
|
|
765
|
-
:parameter schema: snowflake schema
|
|
765
|
+
:parameter schema: snowflake schema - deprecated, use db_schema
|
|
766
|
+
:parameter db_schema: snowflake schema
|
|
766
767
|
:parameter warehouse: snowflake warehouse
|
|
767
768
|
"""
|
|
768
769
|
|
|
@@ -774,6 +775,7 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
774
775
|
self,
|
|
775
776
|
name: str = "",
|
|
776
777
|
key_field: str = None,
|
|
778
|
+
attributes: dict[str, object] = None,
|
|
777
779
|
time_field: str = None,
|
|
778
780
|
schedule: str = None,
|
|
779
781
|
start_time=None,
|
|
@@ -783,21 +785,34 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
783
785
|
user: str = None,
|
|
784
786
|
database: str = None,
|
|
785
787
|
schema: str = None,
|
|
788
|
+
db_schema: str = None,
|
|
786
789
|
warehouse: str = None,
|
|
787
790
|
**kwargs,
|
|
788
791
|
):
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
}
|
|
792
|
+
# TODO: Remove in 1.9.0
|
|
793
|
+
if schema:
|
|
794
|
+
warnings.warn(
|
|
795
|
+
"schema is deprecated in 1.7.0, and will be removed in 1.9.0, please use db_schema"
|
|
796
|
+
)
|
|
797
|
+
db_schema = db_schema or schema # TODO: Remove in 1.9.0
|
|
798
|
+
|
|
799
|
+
attributes = attributes or {}
|
|
800
|
+
if url:
|
|
801
|
+
attributes["url"] = url
|
|
802
|
+
if user:
|
|
803
|
+
attributes["user"] = user
|
|
804
|
+
if database:
|
|
805
|
+
attributes["database"] = database
|
|
806
|
+
if db_schema:
|
|
807
|
+
attributes["db_schema"] = db_schema
|
|
808
|
+
if warehouse:
|
|
809
|
+
attributes["warehouse"] = warehouse
|
|
810
|
+
if query:
|
|
811
|
+
attributes["query"] = query
|
|
797
812
|
|
|
798
813
|
super().__init__(
|
|
799
814
|
name,
|
|
800
|
-
attributes=
|
|
815
|
+
attributes=attributes,
|
|
801
816
|
key_field=key_field,
|
|
802
817
|
time_field=time_field,
|
|
803
818
|
schedule=schedule,
|
|
@@ -811,6 +826,20 @@ class SnowflakeSource(BaseSourceDriver):
|
|
|
811
826
|
spark_options["query"] = self.attributes.get("query")
|
|
812
827
|
return spark_options
|
|
813
828
|
|
|
829
|
+
def to_dataframe(
|
|
830
|
+
self,
|
|
831
|
+
columns=None,
|
|
832
|
+
df_module=None,
|
|
833
|
+
entities=None,
|
|
834
|
+
start_time=None,
|
|
835
|
+
end_time=None,
|
|
836
|
+
time_field=None,
|
|
837
|
+
additional_filters=None,
|
|
838
|
+
):
|
|
839
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
840
|
+
f"{type(self).__name__} supports only spark engine"
|
|
841
|
+
)
|
|
842
|
+
|
|
814
843
|
|
|
815
844
|
class CustomSource(BaseSourceDriver):
|
|
816
845
|
kind = "custom"
|
mlrun/datastore/targets.py
CHANGED
|
@@ -29,7 +29,10 @@ from mergedeep import merge
|
|
|
29
29
|
import mlrun
|
|
30
30
|
import mlrun.utils.helpers
|
|
31
31
|
from mlrun.config import config
|
|
32
|
-
from mlrun.datastore.snowflake_utils import
|
|
32
|
+
from mlrun.datastore.snowflake_utils import (
|
|
33
|
+
get_snowflake_password,
|
|
34
|
+
get_snowflake_spark_options,
|
|
35
|
+
)
|
|
33
36
|
from mlrun.datastore.utils import transform_list_filters_to_tuple
|
|
34
37
|
from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
|
|
35
38
|
from mlrun.utils import logger, now_date
|
|
@@ -546,9 +549,7 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
546
549
|
os.makedirs(dir, exist_ok=True)
|
|
547
550
|
target_df = df
|
|
548
551
|
partition_cols = None # single parquet file
|
|
549
|
-
if not
|
|
550
|
-
".pq"
|
|
551
|
-
): # directory
|
|
552
|
+
if not mlrun.utils.helpers.is_parquet_file(target_path): # directory
|
|
552
553
|
partition_cols = []
|
|
553
554
|
if timestamp_key and (
|
|
554
555
|
self.partitioned or self.time_partitioning_granularity
|
|
@@ -725,6 +726,10 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
725
726
|
timestamp_key=None,
|
|
726
727
|
featureset_status=None,
|
|
727
728
|
):
|
|
729
|
+
if not self.support_storey:
|
|
730
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
731
|
+
f"{type(self).__name__} does not support storey engine"
|
|
732
|
+
)
|
|
728
733
|
raise NotImplementedError()
|
|
729
734
|
|
|
730
735
|
def purge(self):
|
|
@@ -767,6 +772,10 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
767
772
|
|
|
768
773
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
769
774
|
# options used in spark.read.load(**options)
|
|
775
|
+
if not self.support_spark:
|
|
776
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
777
|
+
f"{type(self).__name__} does not support spark engine"
|
|
778
|
+
)
|
|
770
779
|
raise NotImplementedError()
|
|
771
780
|
|
|
772
781
|
def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
|
|
@@ -775,6 +784,10 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
775
784
|
def get_dask_options(self):
|
|
776
785
|
raise NotImplementedError()
|
|
777
786
|
|
|
787
|
+
@property
|
|
788
|
+
def source_spark_attributes(self) -> dict:
|
|
789
|
+
return {}
|
|
790
|
+
|
|
778
791
|
|
|
779
792
|
class ParquetTarget(BaseStoreTarget):
|
|
780
793
|
"""Parquet target storage driver, used to materialize feature set/vector data into parquet files.
|
|
@@ -911,10 +924,8 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
911
924
|
if time_unit == time_partitioning_granularity:
|
|
912
925
|
break
|
|
913
926
|
|
|
914
|
-
if (
|
|
915
|
-
|
|
916
|
-
and not self.get_target_path().endswith(".parquet")
|
|
917
|
-
and not self.get_target_path().endswith(".pq")
|
|
927
|
+
if not self.partitioned and not mlrun.utils.helpers.is_parquet_file(
|
|
928
|
+
self.get_target_path()
|
|
918
929
|
):
|
|
919
930
|
partition_cols = []
|
|
920
931
|
|
|
@@ -1033,9 +1044,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
1033
1044
|
return result
|
|
1034
1045
|
|
|
1035
1046
|
def is_single_file(self):
|
|
1036
|
-
|
|
1037
|
-
return self.path.endswith(".parquet") or self.path.endswith(".pq")
|
|
1038
|
-
return False
|
|
1047
|
+
return mlrun.utils.helpers.is_parquet_file(self.path)
|
|
1039
1048
|
|
|
1040
1049
|
def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
|
|
1041
1050
|
# If partitioning by time, add the necessary columns
|
|
@@ -1208,19 +1217,20 @@ class SnowflakeTarget(BaseStoreTarget):
|
|
|
1208
1217
|
warehouse: str = None,
|
|
1209
1218
|
table_name: str = None,
|
|
1210
1219
|
):
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
"
|
|
1214
|
-
|
|
1215
|
-
"
|
|
1216
|
-
|
|
1217
|
-
"
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1220
|
+
attributes = attributes or {}
|
|
1221
|
+
if url:
|
|
1222
|
+
attributes["url"] = url
|
|
1223
|
+
if user:
|
|
1224
|
+
attributes["user"] = user
|
|
1225
|
+
if database:
|
|
1226
|
+
attributes["database"] = database
|
|
1227
|
+
if db_schema:
|
|
1228
|
+
attributes["db_schema"] = db_schema
|
|
1229
|
+
if warehouse:
|
|
1230
|
+
attributes["warehouse"] = warehouse
|
|
1231
|
+
if table_name:
|
|
1232
|
+
attributes["table"] = table_name
|
|
1233
|
+
|
|
1224
1234
|
super().__init__(
|
|
1225
1235
|
name,
|
|
1226
1236
|
path,
|
|
@@ -1244,7 +1254,31 @@ class SnowflakeTarget(BaseStoreTarget):
|
|
|
1244
1254
|
return spark_options
|
|
1245
1255
|
|
|
1246
1256
|
def purge(self):
|
|
1247
|
-
|
|
1257
|
+
import snowflake.connector
|
|
1258
|
+
|
|
1259
|
+
missing = [
|
|
1260
|
+
key
|
|
1261
|
+
for key in ["database", "db_schema", "table", "url", "user", "warehouse"]
|
|
1262
|
+
if self.attributes.get(key) is None
|
|
1263
|
+
]
|
|
1264
|
+
if missing:
|
|
1265
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
1266
|
+
f"Can't purge Snowflake target, "
|
|
1267
|
+
f"some attributes are missing: {', '.join(missing)}"
|
|
1268
|
+
)
|
|
1269
|
+
account = self.attributes["url"].replace(".snowflakecomputing.com", "")
|
|
1270
|
+
|
|
1271
|
+
with snowflake.connector.connect(
|
|
1272
|
+
account=account,
|
|
1273
|
+
user=self.attributes["user"],
|
|
1274
|
+
password=get_snowflake_password(),
|
|
1275
|
+
warehouse=self.attributes["warehouse"],
|
|
1276
|
+
) as snowflake_connector:
|
|
1277
|
+
drop_statement = (
|
|
1278
|
+
f"DROP TABLE IF EXISTS {self.attributes['database']}.{self.attributes['db_schema']}"
|
|
1279
|
+
f".{self.attributes['table']}"
|
|
1280
|
+
)
|
|
1281
|
+
snowflake_connector.execute_string(drop_statement)
|
|
1248
1282
|
|
|
1249
1283
|
def as_df(
|
|
1250
1284
|
self,
|
|
@@ -1257,7 +1291,18 @@ class SnowflakeTarget(BaseStoreTarget):
|
|
|
1257
1291
|
additional_filters=None,
|
|
1258
1292
|
**kwargs,
|
|
1259
1293
|
):
|
|
1260
|
-
raise
|
|
1294
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
1295
|
+
f"{type(self).__name__} does not support storey engine"
|
|
1296
|
+
)
|
|
1297
|
+
|
|
1298
|
+
@property
|
|
1299
|
+
def source_spark_attributes(self) -> dict:
|
|
1300
|
+
keys = ["url", "user", "database", "db_schema", "warehouse"]
|
|
1301
|
+
attributes = self.attributes or {}
|
|
1302
|
+
snowflake_dict = {key: attributes.get(key) for key in keys}
|
|
1303
|
+
table = attributes.get("table")
|
|
1304
|
+
snowflake_dict["query"] = f"SELECT * from {table}" if table else None
|
|
1305
|
+
return snowflake_dict
|
|
1261
1306
|
|
|
1262
1307
|
|
|
1263
1308
|
class NoSqlBaseTarget(BaseStoreTarget):
|
mlrun/db/base.py
CHANGED
|
@@ -154,6 +154,7 @@ class RunDBInterface(ABC):
|
|
|
154
154
|
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
155
155
|
),
|
|
156
156
|
secrets: dict = None,
|
|
157
|
+
iter=None,
|
|
157
158
|
):
|
|
158
159
|
pass
|
|
159
160
|
|
|
@@ -891,6 +892,7 @@ class RunDBInterface(ABC):
|
|
|
891
892
|
image: str = "mlrun/mlrun",
|
|
892
893
|
deploy_histogram_data_drift_app: bool = True,
|
|
893
894
|
rebuild_images: bool = False,
|
|
895
|
+
fetch_credentials_from_sys_config: bool = False,
|
|
894
896
|
) -> None:
|
|
895
897
|
pass
|
|
896
898
|
|
|
@@ -917,3 +919,12 @@ class RunDBInterface(ABC):
|
|
|
917
919
|
self, project: str, image: str = "mlrun/mlrun"
|
|
918
920
|
) -> None:
|
|
919
921
|
pass
|
|
922
|
+
|
|
923
|
+
@abstractmethod
|
|
924
|
+
def set_model_monitoring_credentials(
|
|
925
|
+
self,
|
|
926
|
+
project: str,
|
|
927
|
+
credentials: dict[str, str],
|
|
928
|
+
replace_creds: bool,
|
|
929
|
+
) -> None:
|
|
930
|
+
pass
|
mlrun/db/httpdb.py
CHANGED
|
@@ -38,6 +38,7 @@ import mlrun.model_monitoring.model_endpoint
|
|
|
38
38
|
import mlrun.platforms
|
|
39
39
|
import mlrun.projects
|
|
40
40
|
import mlrun.runtimes.nuclio.api_gateway
|
|
41
|
+
import mlrun.runtimes.nuclio.function
|
|
41
42
|
import mlrun.utils
|
|
42
43
|
from mlrun.alerts.alert import AlertConfig
|
|
43
44
|
from mlrun.db.auth_utils import OAuthClientIDTokenProvider, StaticTokenProvider
|
|
@@ -536,6 +537,10 @@ class HTTPRunDB(RunDBInterface):
|
|
|
536
537
|
server_cfg.get("model_monitoring_tsdb_connection")
|
|
537
538
|
or config.model_endpoint_monitoring.tsdb_connection
|
|
538
539
|
)
|
|
540
|
+
config.model_endpoint_monitoring.stream_connection = (
|
|
541
|
+
server_cfg.get("stream_connection")
|
|
542
|
+
or config.model_endpoint_monitoring.stream_connection
|
|
543
|
+
)
|
|
539
544
|
config.packagers = server_cfg.get("packagers") or config.packagers
|
|
540
545
|
server_data_prefixes = server_cfg.get("feature_store_data_prefixes") or {}
|
|
541
546
|
for prefix in ["default", "nosql", "redisnosql"]:
|
|
@@ -870,7 +875,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
870
875
|
):
|
|
871
876
|
# default to last week on no filter
|
|
872
877
|
start_time_from = datetime.now() - timedelta(days=7)
|
|
873
|
-
partition_by = mlrun.common.schemas.RunPartitionByField.
|
|
878
|
+
partition_by = mlrun.common.schemas.RunPartitionByField.project_and_name
|
|
874
879
|
partition_sort_by = mlrun.common.schemas.SortField.updated
|
|
875
880
|
|
|
876
881
|
params = {
|
|
@@ -1028,6 +1033,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1028
1033
|
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
1029
1034
|
),
|
|
1030
1035
|
secrets: dict = None,
|
|
1036
|
+
iter=None,
|
|
1031
1037
|
):
|
|
1032
1038
|
"""Delete an artifact.
|
|
1033
1039
|
|
|
@@ -1046,6 +1052,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1046
1052
|
"tag": tag,
|
|
1047
1053
|
"tree": tree,
|
|
1048
1054
|
"uid": uid,
|
|
1055
|
+
"iter": iter,
|
|
1049
1056
|
"deletion_strategy": deletion_strategy,
|
|
1050
1057
|
}
|
|
1051
1058
|
error = f"del artifact {project}/{key}"
|
|
@@ -1246,13 +1253,17 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1246
1253
|
function_name=name,
|
|
1247
1254
|
)
|
|
1248
1255
|
|
|
1249
|
-
def list_functions(
|
|
1256
|
+
def list_functions(
|
|
1257
|
+
self, name=None, project=None, tag=None, labels=None, since=None, until=None
|
|
1258
|
+
):
|
|
1250
1259
|
"""Retrieve a list of functions, filtered by specific criteria.
|
|
1251
1260
|
|
|
1252
1261
|
:param name: Return only functions with a specific name.
|
|
1253
1262
|
:param project: Return functions belonging to this project. If not specified, the default project is used.
|
|
1254
1263
|
:param tag: Return function versions with specific tags.
|
|
1255
1264
|
:param labels: Return functions that have specific labels assigned to them.
|
|
1265
|
+
:param since: Return functions updated after this date (as datetime object).
|
|
1266
|
+
:param until: Return functions updated before this date (as datetime object).
|
|
1256
1267
|
:returns: List of function objects (as dictionary).
|
|
1257
1268
|
"""
|
|
1258
1269
|
project = project or config.default_project
|
|
@@ -1260,6 +1271,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1260
1271
|
"name": name,
|
|
1261
1272
|
"tag": tag,
|
|
1262
1273
|
"label": labels or [],
|
|
1274
|
+
"since": datetime_to_iso(since),
|
|
1275
|
+
"until": datetime_to_iso(until),
|
|
1263
1276
|
}
|
|
1264
1277
|
error = "list functions"
|
|
1265
1278
|
path = f"projects/{project}/functions"
|
|
@@ -1610,20 +1623,11 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1610
1623
|
raise RunDBError("bad function build response")
|
|
1611
1624
|
|
|
1612
1625
|
if resp.headers:
|
|
1613
|
-
func.status.state = resp.headers.get("x-mlrun-function-status", "")
|
|
1614
1626
|
last_log_timestamp = float(
|
|
1615
1627
|
resp.headers.get("x-mlrun-last-timestamp", "0.0")
|
|
1616
1628
|
)
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
func.status.internal_invocation_urls = resp.headers.get(
|
|
1620
|
-
"x-mlrun-internal-invocation-urls", ""
|
|
1621
|
-
).split(",")
|
|
1622
|
-
func.status.external_invocation_urls = resp.headers.get(
|
|
1623
|
-
"x-mlrun-external-invocation-urls", ""
|
|
1624
|
-
).split(",")
|
|
1625
|
-
func.status.container_image = resp.headers.get(
|
|
1626
|
-
"x-mlrun-container-image", ""
|
|
1629
|
+
mlrun.runtimes.nuclio.function.enrich_nuclio_function_from_headers(
|
|
1630
|
+
func, resp.headers
|
|
1627
1631
|
)
|
|
1628
1632
|
|
|
1629
1633
|
text = ""
|
|
@@ -1681,16 +1685,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1681
1685
|
resp.headers.get("x-mlrun-last-timestamp", "0.0")
|
|
1682
1686
|
)
|
|
1683
1687
|
if func.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
func.status.internal_invocation_urls = resp.headers.get(
|
|
1687
|
-
"x-mlrun-internal-invocation-urls", ""
|
|
1688
|
-
).split(",")
|
|
1689
|
-
func.status.external_invocation_urls = resp.headers.get(
|
|
1690
|
-
"x-mlrun-external-invocation-urls", ""
|
|
1691
|
-
).split(",")
|
|
1692
|
-
func.status.container_image = resp.headers.get(
|
|
1693
|
-
"x-mlrun-container-image", ""
|
|
1688
|
+
mlrun.runtimes.nuclio.function.enrich_nuclio_function_from_headers(
|
|
1689
|
+
func, resp.headers
|
|
1694
1690
|
)
|
|
1695
1691
|
|
|
1696
1692
|
builder_pod = resp.headers.get("builder_pod", "")
|
|
@@ -3397,6 +3393,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3397
3393
|
image: str = "mlrun/mlrun",
|
|
3398
3394
|
deploy_histogram_data_drift_app: bool = True,
|
|
3399
3395
|
rebuild_images: bool = False,
|
|
3396
|
+
fetch_credentials_from_sys_config: bool = False,
|
|
3400
3397
|
) -> None:
|
|
3401
3398
|
"""
|
|
3402
3399
|
Deploy model monitoring application controller, writer and stream functions.
|
|
@@ -3406,14 +3403,16 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3406
3403
|
The stream function goal is to monitor the log of the data stream. It is triggered when a new log entry
|
|
3407
3404
|
is detected. It processes the new events into statistics that are then written to statistics databases.
|
|
3408
3405
|
|
|
3409
|
-
:param project:
|
|
3410
|
-
:param base_period:
|
|
3411
|
-
|
|
3412
|
-
:param image:
|
|
3413
|
-
|
|
3414
|
-
|
|
3415
|
-
:param deploy_histogram_data_drift_app:
|
|
3416
|
-
:param rebuild_images:
|
|
3406
|
+
:param project: Project name.
|
|
3407
|
+
:param base_period: The time period in minutes in which the model monitoring controller
|
|
3408
|
+
function triggers. By default, the base period is 10 minutes.
|
|
3409
|
+
:param image: The image of the model monitoring controller, writer & monitoring
|
|
3410
|
+
stream functions, which are real time nuclio functions.
|
|
3411
|
+
By default, the image is mlrun/mlrun.
|
|
3412
|
+
:param deploy_histogram_data_drift_app: If true, deploy the default histogram-based data drift application.
|
|
3413
|
+
:param rebuild_images: If true, force rebuild of model monitoring infrastructure images.
|
|
3414
|
+
:param fetch_credentials_from_sys_config: If true, fetch the credentials from the system configuration.
|
|
3415
|
+
|
|
3417
3416
|
"""
|
|
3418
3417
|
self.api_call(
|
|
3419
3418
|
method=mlrun.common.types.HTTPMethod.POST,
|
|
@@ -3423,6 +3422,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3423
3422
|
"image": image,
|
|
3424
3423
|
"deploy_histogram_data_drift_app": deploy_histogram_data_drift_app,
|
|
3425
3424
|
"rebuild_images": rebuild_images,
|
|
3425
|
+
"fetch_credentials_from_sys_config": fetch_credentials_from_sys_config,
|
|
3426
3426
|
},
|
|
3427
3427
|
)
|
|
3428
3428
|
|
|
@@ -3548,6 +3548,25 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3548
3548
|
params={"image": image},
|
|
3549
3549
|
)
|
|
3550
3550
|
|
|
3551
|
+
def set_model_monitoring_credentials(
|
|
3552
|
+
self,
|
|
3553
|
+
project: str,
|
|
3554
|
+
credentials: dict[str, str],
|
|
3555
|
+
replace_creds: bool,
|
|
3556
|
+
) -> None:
|
|
3557
|
+
"""
|
|
3558
|
+
Set the credentials for the model monitoring application.
|
|
3559
|
+
|
|
3560
|
+
:param project: Project name.
|
|
3561
|
+
:param credentials: Credentials to set.
|
|
3562
|
+
:param replace_creds: If True, will override the existing credentials.
|
|
3563
|
+
"""
|
|
3564
|
+
self.api_call(
|
|
3565
|
+
method=mlrun.common.types.HTTPMethod.POST,
|
|
3566
|
+
path=f"projects/{project}/model-monitoring/set-model-monitoring-credentials",
|
|
3567
|
+
params={**credentials, "replace_creds": replace_creds},
|
|
3568
|
+
)
|
|
3569
|
+
|
|
3551
3570
|
def create_hub_source(
|
|
3552
3571
|
self, source: Union[dict, mlrun.common.schemas.IndexedHubSource]
|
|
3553
3572
|
):
|