mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/api/api/deps.py +14 -1
- mlrun/api/api/endpoints/frontend_spec.py +0 -2
- mlrun/api/api/endpoints/functions.py +15 -27
- mlrun/api/api/endpoints/grafana_proxy.py +435 -74
- mlrun/api/api/endpoints/healthz.py +5 -18
- mlrun/api/api/endpoints/model_endpoints.py +33 -37
- mlrun/api/api/utils.py +6 -13
- mlrun/api/crud/__init__.py +14 -16
- mlrun/api/crud/logs.py +5 -7
- mlrun/api/crud/model_monitoring/__init__.py +2 -2
- mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
- mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
- mlrun/api/crud/pipelines.py +2 -3
- mlrun/api/db/sqldb/models/models_mysql.py +52 -19
- mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
- mlrun/api/db/sqldb/session.py +19 -26
- mlrun/api/schemas/__init__.py +2 -0
- mlrun/api/schemas/constants.py +0 -13
- mlrun/api/schemas/frontend_spec.py +0 -1
- mlrun/api/schemas/model_endpoints.py +38 -195
- mlrun/api/schemas/schedule.py +2 -2
- mlrun/api/utils/clients/log_collector.py +5 -0
- mlrun/builder.py +9 -41
- mlrun/config.py +1 -76
- mlrun/data_types/__init__.py +1 -6
- mlrun/data_types/data_types.py +1 -3
- mlrun/datastore/__init__.py +2 -9
- mlrun/datastore/sources.py +20 -25
- mlrun/datastore/store_resources.py +1 -1
- mlrun/datastore/targets.py +34 -67
- mlrun/datastore/utils.py +4 -26
- mlrun/db/base.py +2 -4
- mlrun/db/filedb.py +5 -13
- mlrun/db/httpdb.py +32 -64
- mlrun/db/sqldb.py +2 -4
- mlrun/errors.py +0 -5
- mlrun/execution.py +0 -2
- mlrun/feature_store/api.py +8 -24
- mlrun/feature_store/feature_set.py +6 -28
- mlrun/feature_store/feature_vector.py +0 -2
- mlrun/feature_store/ingestion.py +11 -8
- mlrun/feature_store/retrieval/base.py +43 -271
- mlrun/feature_store/retrieval/dask_merger.py +153 -55
- mlrun/feature_store/retrieval/job.py +3 -12
- mlrun/feature_store/retrieval/local_merger.py +130 -48
- mlrun/feature_store/retrieval/spark_merger.py +125 -126
- mlrun/features.py +2 -7
- mlrun/model_monitoring/constants.py +6 -48
- mlrun/model_monitoring/helpers.py +35 -118
- mlrun/model_monitoring/model_monitoring_batch.py +260 -293
- mlrun/model_monitoring/stream_processing_fs.py +253 -220
- mlrun/platforms/iguazio.py +0 -33
- mlrun/projects/project.py +72 -34
- mlrun/runtimes/base.py +0 -5
- mlrun/runtimes/daskjob.py +0 -2
- mlrun/runtimes/function.py +3 -29
- mlrun/runtimes/kubejob.py +15 -39
- mlrun/runtimes/local.py +45 -7
- mlrun/runtimes/mpijob/abstract.py +0 -2
- mlrun/runtimes/mpijob/v1.py +0 -2
- mlrun/runtimes/pod.py +0 -2
- mlrun/runtimes/remotesparkjob.py +0 -2
- mlrun/runtimes/serving.py +0 -6
- mlrun/runtimes/sparkjob/abstract.py +2 -39
- mlrun/runtimes/sparkjob/spark3job.py +0 -2
- mlrun/serving/__init__.py +1 -2
- mlrun/serving/routers.py +35 -35
- mlrun/serving/server.py +12 -22
- mlrun/serving/states.py +30 -162
- mlrun/serving/v2_serving.py +10 -13
- mlrun/utils/clones.py +1 -1
- mlrun/utils/model_monitoring.py +96 -122
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
- mlrun/api/crud/model_monitoring/grafana.py +0 -427
- mlrun/datastore/spark_udf.py +0 -40
- mlrun/model_monitoring/__init__.py +0 -44
- mlrun/model_monitoring/common.py +0 -112
- mlrun/model_monitoring/model_endpoint.py +0 -141
- mlrun/model_monitoring/stores/__init__.py +0 -106
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -23
- mlrun/model_monitoring/stores/models/base.py +0 -18
- mlrun/model_monitoring/stores/models/mysql.py +0 -100
- mlrun/model_monitoring/stores/models/sqlite.py +0 -98
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
- mlrun/utils/db.py +0 -52
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0
mlrun/datastore/sources.py
CHANGED
|
@@ -62,11 +62,6 @@ class BaseSourceDriver(DataSource):
|
|
|
62
62
|
def to_step(self, key_field=None, time_field=None, context=None):
|
|
63
63
|
import storey
|
|
64
64
|
|
|
65
|
-
if not self.support_storey:
|
|
66
|
-
raise mlrun.errors.MLRunRuntimeError(
|
|
67
|
-
f"{type(self).__name__} does not support storey engine"
|
|
68
|
-
)
|
|
69
|
-
|
|
70
65
|
return storey.SyncEmitSource(context=context)
|
|
71
66
|
|
|
72
67
|
def get_table_object(self):
|
|
@@ -251,6 +246,7 @@ class ParquetSource(BaseSourceDriver):
|
|
|
251
246
|
start_time: Optional[Union[datetime, str]] = None,
|
|
252
247
|
end_time: Optional[Union[datetime, str]] = None,
|
|
253
248
|
):
|
|
249
|
+
|
|
254
250
|
super().__init__(
|
|
255
251
|
name,
|
|
256
252
|
path,
|
|
@@ -380,15 +376,6 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
380
376
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
381
377
|
"cannot specify both table and query args"
|
|
382
378
|
)
|
|
383
|
-
# Otherwise, the client library does not fully respect the limit
|
|
384
|
-
if (
|
|
385
|
-
max_results_for_table
|
|
386
|
-
and chunksize
|
|
387
|
-
and max_results_for_table % chunksize != 0
|
|
388
|
-
):
|
|
389
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
390
|
-
"max_results_for_table must be a multiple of chunksize"
|
|
391
|
-
)
|
|
392
379
|
attrs = {
|
|
393
380
|
"query": query,
|
|
394
381
|
"table": table,
|
|
@@ -408,6 +395,7 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
408
395
|
start_time=start_time,
|
|
409
396
|
end_time=end_time,
|
|
410
397
|
)
|
|
398
|
+
self._rows_iterator = None
|
|
411
399
|
|
|
412
400
|
def _get_credentials_string(self):
|
|
413
401
|
gcp_project = self.attributes.get("gcp_project", None)
|
|
@@ -450,28 +438,35 @@ class BigQuerySource(BaseSourceDriver):
|
|
|
450
438
|
if query:
|
|
451
439
|
query_job = bqclient.query(query)
|
|
452
440
|
|
|
453
|
-
|
|
441
|
+
self._rows_iterator = query_job.result(page_size=chunksize)
|
|
442
|
+
dtypes = schema_to_dtypes(self._rows_iterator.schema)
|
|
443
|
+
if chunksize:
|
|
444
|
+
# passing bqstorage_client greatly improves performance
|
|
445
|
+
return self._rows_iterator.to_dataframe_iterable(
|
|
446
|
+
bqstorage_client=BigQueryReadClient(), dtypes=dtypes
|
|
447
|
+
)
|
|
448
|
+
else:
|
|
449
|
+
return self._rows_iterator.to_dataframe(dtypes=dtypes)
|
|
454
450
|
elif table:
|
|
455
451
|
table = self.attributes.get("table")
|
|
456
452
|
max_results = self.attributes.get("max_results")
|
|
457
453
|
|
|
458
|
-
|
|
454
|
+
rows = bqclient.list_rows(
|
|
459
455
|
table, page_size=chunksize, max_results=max_results
|
|
460
456
|
)
|
|
457
|
+
dtypes = schema_to_dtypes(rows.schema)
|
|
458
|
+
if chunksize:
|
|
459
|
+
# passing bqstorage_client greatly improves performance
|
|
460
|
+
return rows.to_dataframe_iterable(
|
|
461
|
+
bqstorage_client=BigQueryReadClient(), dtypes=dtypes
|
|
462
|
+
)
|
|
463
|
+
else:
|
|
464
|
+
return rows.to_dataframe(dtypes=dtypes)
|
|
461
465
|
else:
|
|
462
466
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
463
467
|
"table or query args must be specified"
|
|
464
468
|
)
|
|
465
469
|
|
|
466
|
-
dtypes = schema_to_dtypes(rows_iterator.schema)
|
|
467
|
-
if chunksize:
|
|
468
|
-
# passing bqstorage_client greatly improves performance
|
|
469
|
-
return rows_iterator.to_dataframe_iterable(
|
|
470
|
-
bqstorage_client=BigQueryReadClient(), dtypes=dtypes
|
|
471
|
-
)
|
|
472
|
-
else:
|
|
473
|
-
return rows_iterator.to_dataframe(dtypes=dtypes)
|
|
474
|
-
|
|
475
470
|
def is_iterator(self):
|
|
476
471
|
return bool(self.attributes.get("chunksize"))
|
|
477
472
|
|
|
@@ -81,7 +81,7 @@ class ResourceCache:
|
|
|
81
81
|
endpoint, uri = parse_path(uri)
|
|
82
82
|
self._tabels[uri] = Table(
|
|
83
83
|
uri,
|
|
84
|
-
V3ioDriver(webapi=endpoint
|
|
84
|
+
V3ioDriver(webapi=endpoint),
|
|
85
85
|
flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
|
|
86
86
|
)
|
|
87
87
|
return self._tabels[uri]
|
mlrun/datastore/targets.py
CHANGED
|
@@ -15,7 +15,6 @@ import ast
|
|
|
15
15
|
import datetime
|
|
16
16
|
import os
|
|
17
17
|
import random
|
|
18
|
-
import sys
|
|
19
18
|
import time
|
|
20
19
|
from collections import Counter
|
|
21
20
|
from copy import copy
|
|
@@ -526,8 +525,8 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
526
525
|
("minute", "%M"),
|
|
527
526
|
]:
|
|
528
527
|
partition_cols.append(unit)
|
|
529
|
-
target_df[unit] =
|
|
530
|
-
|
|
528
|
+
target_df[unit] = getattr(
|
|
529
|
+
pd.DatetimeIndex(target_df[timestamp_key]), unit
|
|
531
530
|
)
|
|
532
531
|
if unit == time_partitioning_granularity:
|
|
533
532
|
break
|
|
@@ -1051,11 +1050,24 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1051
1050
|
**self.attributes,
|
|
1052
1051
|
)
|
|
1053
1052
|
|
|
1054
|
-
def prepare_spark_df(self, df, key_columns):
|
|
1055
|
-
raise NotImplementedError()
|
|
1056
|
-
|
|
1057
1053
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1058
|
-
|
|
1054
|
+
spark_options = {
|
|
1055
|
+
"path": store_path_to_spark(self.get_target_path()),
|
|
1056
|
+
"format": "io.iguaz.v3io.spark.sql.kv",
|
|
1057
|
+
}
|
|
1058
|
+
if isinstance(key_column, list) and len(key_column) >= 1:
|
|
1059
|
+
if len(key_column) > 2:
|
|
1060
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1061
|
+
f"Spark supports maximun of 2 keys and {key_column} are provided"
|
|
1062
|
+
)
|
|
1063
|
+
spark_options["key"] = key_column[0]
|
|
1064
|
+
if len(key_column) > 1:
|
|
1065
|
+
spark_options["sorting-key"] = key_column[1]
|
|
1066
|
+
else:
|
|
1067
|
+
spark_options["key"] = key_column
|
|
1068
|
+
if not overwrite:
|
|
1069
|
+
spark_options["columnUpdate"] = True
|
|
1070
|
+
return spark_options
|
|
1059
1071
|
|
|
1060
1072
|
def get_dask_options(self):
|
|
1061
1073
|
return {"format": "csv"}
|
|
@@ -1063,6 +1075,15 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1063
1075
|
def as_df(self, columns=None, df_module=None, **kwargs):
|
|
1064
1076
|
raise NotImplementedError()
|
|
1065
1077
|
|
|
1078
|
+
def prepare_spark_df(self, df, key_columns):
|
|
1079
|
+
import pyspark.sql.functions as funcs
|
|
1080
|
+
|
|
1081
|
+
for col_name, col_type in df.dtypes:
|
|
1082
|
+
if col_type.startswith("decimal("):
|
|
1083
|
+
# V3IO does not support this level of precision
|
|
1084
|
+
df = df.withColumn(col_name, funcs.col(col_name).cast("double"))
|
|
1085
|
+
return df
|
|
1086
|
+
|
|
1066
1087
|
def write_dataframe(
|
|
1067
1088
|
self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
|
|
1068
1089
|
):
|
|
@@ -1102,52 +1123,10 @@ class NoSqlTarget(NoSqlBaseTarget):
|
|
|
1102
1123
|
endpoint, uri = parse_path(self.get_target_path())
|
|
1103
1124
|
return Table(
|
|
1104
1125
|
uri,
|
|
1105
|
-
V3ioDriver(webapi=endpoint
|
|
1126
|
+
V3ioDriver(webapi=endpoint),
|
|
1106
1127
|
flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
|
|
1107
1128
|
)
|
|
1108
1129
|
|
|
1109
|
-
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1110
|
-
spark_options = {
|
|
1111
|
-
"path": store_path_to_spark(self.get_target_path()),
|
|
1112
|
-
"format": "io.iguaz.v3io.spark.sql.kv",
|
|
1113
|
-
}
|
|
1114
|
-
if isinstance(key_column, list) and len(key_column) >= 1:
|
|
1115
|
-
spark_options["key"] = key_column[0]
|
|
1116
|
-
if len(key_column) > 2:
|
|
1117
|
-
spark_options["sorting-key"] = "_spark_object_name"
|
|
1118
|
-
if len(key_column) == 2:
|
|
1119
|
-
spark_options["sorting-key"] = key_column[1]
|
|
1120
|
-
else:
|
|
1121
|
-
spark_options["key"] = key_column
|
|
1122
|
-
if not overwrite:
|
|
1123
|
-
spark_options["columnUpdate"] = True
|
|
1124
|
-
return spark_options
|
|
1125
|
-
|
|
1126
|
-
def prepare_spark_df(self, df, key_columns):
|
|
1127
|
-
from pyspark.sql.functions import col
|
|
1128
|
-
|
|
1129
|
-
spark_udf_directory = os.path.dirname(os.path.abspath(__file__))
|
|
1130
|
-
sys.path.append(spark_udf_directory)
|
|
1131
|
-
try:
|
|
1132
|
-
import spark_udf
|
|
1133
|
-
|
|
1134
|
-
df.rdd.context.addFile(spark_udf.__file__)
|
|
1135
|
-
|
|
1136
|
-
for col_name, col_type in df.dtypes:
|
|
1137
|
-
if col_type.startswith("decimal("):
|
|
1138
|
-
# V3IO does not support this level of precision
|
|
1139
|
-
df = df.withColumn(col_name, col(col_name).cast("double"))
|
|
1140
|
-
if len(key_columns) > 2:
|
|
1141
|
-
return df.withColumn(
|
|
1142
|
-
"_spark_object_name",
|
|
1143
|
-
spark_udf.hash_and_concat_v3io_udf(
|
|
1144
|
-
*[col(c) for c in key_columns[1:]]
|
|
1145
|
-
),
|
|
1146
|
-
)
|
|
1147
|
-
finally:
|
|
1148
|
-
sys.path.remove(spark_udf_directory)
|
|
1149
|
-
return df
|
|
1150
|
-
|
|
1151
1130
|
|
|
1152
1131
|
class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
1153
1132
|
kind = TargetTypes.redisnosql
|
|
@@ -1207,23 +1186,11 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1207
1186
|
return endpoint
|
|
1208
1187
|
|
|
1209
1188
|
def prepare_spark_df(self, df, key_columns):
|
|
1210
|
-
from pyspark.sql.functions import
|
|
1189
|
+
from pyspark.sql.functions import udf
|
|
1190
|
+
from pyspark.sql.types import StringType
|
|
1211
1191
|
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
try:
|
|
1215
|
-
import spark_udf
|
|
1216
|
-
|
|
1217
|
-
df.rdd.context.addFile(spark_udf.__file__)
|
|
1218
|
-
|
|
1219
|
-
df = df.withColumn(
|
|
1220
|
-
"_spark_object_name",
|
|
1221
|
-
spark_udf.hash_and_concat_redis_udf(*[col(c) for c in key_columns]),
|
|
1222
|
-
)
|
|
1223
|
-
finally:
|
|
1224
|
-
sys.path.remove(spark_udf_directory)
|
|
1225
|
-
|
|
1226
|
-
return df
|
|
1192
|
+
udf1 = udf(lambda x: str(x) + "}:static", StringType())
|
|
1193
|
+
return df.withColumn("_spark_object_name", udf1(key_columns[0]))
|
|
1227
1194
|
|
|
1228
1195
|
|
|
1229
1196
|
class StreamTarget(BaseStoreTarget):
|
|
@@ -1257,7 +1224,7 @@ class StreamTarget(BaseStoreTarget):
|
|
|
1257
1224
|
graph_shape="cylinder",
|
|
1258
1225
|
class_name="storey.StreamTarget",
|
|
1259
1226
|
columns=column_list,
|
|
1260
|
-
storage=V3ioDriver(webapi=endpoint
|
|
1227
|
+
storage=V3ioDriver(webapi=endpoint),
|
|
1261
1228
|
stream_path=uri,
|
|
1262
1229
|
**self.attributes,
|
|
1263
1230
|
)
|
mlrun/datastore/utils.py
CHANGED
|
@@ -12,8 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
-
import
|
|
16
|
-
from urllib.parse import parse_qs, urlparse
|
|
15
|
+
from urllib.parse import urlparse
|
|
17
16
|
|
|
18
17
|
|
|
19
18
|
def store_path_to_spark(path):
|
|
@@ -37,32 +36,11 @@ def store_path_to_spark(path):
|
|
|
37
36
|
return path
|
|
38
37
|
|
|
39
38
|
|
|
40
|
-
def parse_kafka_url(
|
|
41
|
-
url: str, bootstrap_servers: typing.List = None
|
|
42
|
-
) -> typing.Tuple[str, typing.List]:
|
|
43
|
-
"""Generating Kafka topic and adjusting a list of bootstrap servers.
|
|
44
|
-
|
|
45
|
-
:param url: URL path to parse using urllib.parse.urlparse.
|
|
46
|
-
:param bootstrap_servers: List of bootstrap servers for the kafka brokers.
|
|
47
|
-
|
|
48
|
-
:return: A tuple of:
|
|
49
|
-
[0] = Kafka topic value
|
|
50
|
-
[1] = List of bootstrap servers
|
|
51
|
-
"""
|
|
39
|
+
def parse_kafka_url(url, bootstrap_servers=None):
|
|
52
40
|
bootstrap_servers = bootstrap_servers or []
|
|
53
|
-
|
|
54
|
-
# Parse the provided URL into six components according to the general structure of a URL
|
|
55
41
|
url = urlparse(url)
|
|
56
|
-
|
|
57
|
-
# Add the network location to the bootstrap servers list
|
|
58
42
|
if url.netloc:
|
|
59
43
|
bootstrap_servers = [url.netloc] + bootstrap_servers
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
query_dict = parse_qs(url.query)
|
|
63
|
-
if "topic" in query_dict:
|
|
64
|
-
topic = query_dict["topic"][0]
|
|
65
|
-
else:
|
|
66
|
-
topic = url.path
|
|
67
|
-
topic = topic.lstrip("/")
|
|
44
|
+
topic = url.path
|
|
45
|
+
topic = topic.lstrip("/")
|
|
68
46
|
return topic, bootstrap_servers
|
mlrun/db/base.py
CHANGED
|
@@ -17,8 +17,8 @@ import warnings
|
|
|
17
17
|
from abc import ABC, abstractmethod
|
|
18
18
|
from typing import List, Optional, Union
|
|
19
19
|
|
|
20
|
-
import mlrun.model_monitoring.model_endpoint
|
|
21
20
|
from mlrun.api import schemas
|
|
21
|
+
from mlrun.api.schemas import ModelEndpoint
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class RunDBError(Exception):
|
|
@@ -479,9 +479,7 @@ class RunDBInterface(ABC):
|
|
|
479
479
|
self,
|
|
480
480
|
project: str,
|
|
481
481
|
endpoint_id: str,
|
|
482
|
-
model_endpoint:
|
|
483
|
-
mlrun.model_monitoring.model_endpoint.ModelEndpoint, dict
|
|
484
|
-
],
|
|
482
|
+
model_endpoint: ModelEndpoint,
|
|
485
483
|
):
|
|
486
484
|
pass
|
|
487
485
|
|
mlrun/db/filedb.py
CHANGED
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
16
|
import pathlib
|
|
17
|
-
import typing
|
|
18
17
|
from datetime import datetime, timedelta, timezone
|
|
19
18
|
from os import listdir, makedirs, path, remove, scandir
|
|
20
19
|
from typing import List, Optional, Union
|
|
@@ -24,9 +23,9 @@ from dateutil.parser import parse as parse_time
|
|
|
24
23
|
|
|
25
24
|
import mlrun.api.schemas
|
|
26
25
|
import mlrun.errors
|
|
27
|
-
import mlrun.model_monitoring.model_endpoint
|
|
28
26
|
|
|
29
27
|
from ..api import schemas
|
|
28
|
+
from ..api.schemas import ModelEndpoint
|
|
30
29
|
from ..config import config
|
|
31
30
|
from ..datastore import store_manager
|
|
32
31
|
from ..lists import ArtifactList, RunList
|
|
@@ -60,8 +59,7 @@ class FileRunDB(RunDBInterface):
|
|
|
60
59
|
self.dirpath = dirpath
|
|
61
60
|
self._datastore = None
|
|
62
61
|
self._subpath = None
|
|
63
|
-
self._secrets
|
|
64
|
-
self._projects = {}
|
|
62
|
+
self._secrets = None
|
|
65
63
|
makedirs(self.schedules_dir, exist_ok=True)
|
|
66
64
|
|
|
67
65
|
def connect(self, secrets=None):
|
|
@@ -554,10 +552,7 @@ class FileRunDB(RunDBInterface):
|
|
|
554
552
|
self,
|
|
555
553
|
project: mlrun.api.schemas.Project,
|
|
556
554
|
) -> mlrun.api.schemas.Project:
|
|
557
|
-
|
|
558
|
-
project = mlrun.api.schemas.Project(**project)
|
|
559
|
-
self._projects[project.metadata.name] = project
|
|
560
|
-
return project
|
|
555
|
+
raise NotImplementedError()
|
|
561
556
|
|
|
562
557
|
@property
|
|
563
558
|
def schedules_dir(self):
|
|
@@ -743,8 +738,7 @@ class FileRunDB(RunDBInterface):
|
|
|
743
738
|
provider: str = mlrun.api.schemas.SecretProviderName.kubernetes.value,
|
|
744
739
|
secrets: dict = None,
|
|
745
740
|
):
|
|
746
|
-
|
|
747
|
-
self._secrets._secrets[key] = value
|
|
741
|
+
raise NotImplementedError()
|
|
748
742
|
|
|
749
743
|
def list_project_secrets(
|
|
750
744
|
self,
|
|
@@ -786,9 +780,7 @@ class FileRunDB(RunDBInterface):
|
|
|
786
780
|
self,
|
|
787
781
|
project: str,
|
|
788
782
|
endpoint_id: str,
|
|
789
|
-
model_endpoint:
|
|
790
|
-
mlrun.model_monitoring.model_endpoint.ModelEndpoint, dict
|
|
791
|
-
],
|
|
783
|
+
model_endpoint: ModelEndpoint,
|
|
792
784
|
):
|
|
793
785
|
raise NotImplementedError()
|
|
794
786
|
|
mlrun/db/httpdb.py
CHANGED
|
@@ -27,11 +27,11 @@ import requests
|
|
|
27
27
|
import semver
|
|
28
28
|
|
|
29
29
|
import mlrun
|
|
30
|
-
import mlrun.model_monitoring.model_endpoint
|
|
31
30
|
import mlrun.projects
|
|
32
31
|
from mlrun.api import schemas
|
|
33
32
|
from mlrun.errors import MLRunInvalidArgumentError, err_to_str
|
|
34
33
|
|
|
34
|
+
from ..api.schemas import ModelEndpoint
|
|
35
35
|
from ..artifacts import Artifact
|
|
36
36
|
from ..config import config
|
|
37
37
|
from ..feature_store import FeatureSet, FeatureVector
|
|
@@ -1425,9 +1425,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1425
1425
|
:param page_size: Size of a single page when applying pagination.
|
|
1426
1426
|
"""
|
|
1427
1427
|
|
|
1428
|
-
if project != "*" and (page_token or page_size):
|
|
1428
|
+
if project != "*" and (page_token or page_size or sort_by):
|
|
1429
1429
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1430
|
-
"Filtering by project can not be used together with pagination"
|
|
1430
|
+
"Filtering by project can not be used together with pagination, or sorting"
|
|
1431
1431
|
)
|
|
1432
1432
|
params = {
|
|
1433
1433
|
"namespace": namespace,
|
|
@@ -2533,9 +2533,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2533
2533
|
self,
|
|
2534
2534
|
project: str,
|
|
2535
2535
|
endpoint_id: str,
|
|
2536
|
-
model_endpoint:
|
|
2537
|
-
mlrun.model_monitoring.model_endpoint.ModelEndpoint, dict
|
|
2538
|
-
],
|
|
2536
|
+
model_endpoint: ModelEndpoint,
|
|
2539
2537
|
):
|
|
2540
2538
|
"""
|
|
2541
2539
|
Creates a DB record with the given model_endpoint record.
|
|
@@ -2545,16 +2543,11 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2545
2543
|
:param model_endpoint: An object representing the model endpoint.
|
|
2546
2544
|
"""
|
|
2547
2545
|
|
|
2548
|
-
if isinstance(
|
|
2549
|
-
model_endpoint, mlrun.model_monitoring.model_endpoint.ModelEndpoint
|
|
2550
|
-
):
|
|
2551
|
-
model_endpoint = model_endpoint.to_dict()
|
|
2552
|
-
|
|
2553
2546
|
path = f"projects/{project}/model-endpoints/{endpoint_id}"
|
|
2554
2547
|
self.api_call(
|
|
2555
2548
|
method="POST",
|
|
2556
2549
|
path=path,
|
|
2557
|
-
body=
|
|
2550
|
+
body=model_endpoint.json(),
|
|
2558
2551
|
)
|
|
2559
2552
|
|
|
2560
2553
|
def delete_model_endpoint(
|
|
@@ -2563,7 +2556,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2563
2556
|
endpoint_id: str,
|
|
2564
2557
|
):
|
|
2565
2558
|
"""
|
|
2566
|
-
Deletes the
|
|
2559
|
+
Deletes the KV record of a given model endpoint, project and endpoint_id are used for lookup
|
|
2567
2560
|
|
|
2568
2561
|
:param project: The name of the project
|
|
2569
2562
|
:param endpoint_id: The id of the endpoint
|
|
@@ -2586,15 +2579,13 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2586
2579
|
metrics: Optional[List[str]] = None,
|
|
2587
2580
|
top_level: bool = False,
|
|
2588
2581
|
uids: Optional[List[str]] = None,
|
|
2589
|
-
) ->
|
|
2582
|
+
) -> schemas.ModelEndpointList:
|
|
2590
2583
|
"""
|
|
2591
|
-
Returns a list of
|
|
2592
|
-
|
|
2584
|
+
Returns a list of ModelEndpointState objects. Each object represents the current state of a model endpoint.
|
|
2585
|
+
This functions supports filtering by the following parameters:
|
|
2593
2586
|
1) model
|
|
2594
2587
|
2) function
|
|
2595
2588
|
3) labels
|
|
2596
|
-
4) top level
|
|
2597
|
-
5) uids
|
|
2598
2589
|
By default, when no filters are applied, all available endpoints for the given project will be listed.
|
|
2599
2590
|
|
|
2600
2591
|
In addition, this functions provides a facade for listing endpoint related metrics. This facade is time-based
|
|
@@ -2604,8 +2595,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2604
2595
|
:param project: The name of the project
|
|
2605
2596
|
:param model: The name of the model to filter by
|
|
2606
2597
|
:param function: The name of the function to filter by
|
|
2607
|
-
:param labels: A list of labels to filter by. Label filters work by either filtering a specific value of a
|
|
2608
|
-
|
|
2598
|
+
:param labels: A list of labels to filter by. Label filters work by either filtering a specific value of a label
|
|
2599
|
+
(i.e. list("key==value")) or by looking for the existence of a given key (i.e. "key")
|
|
2609
2600
|
:param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric'
|
|
2610
2601
|
:param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
|
|
2611
2602
|
time, a Unix timestamp in milliseconds, a relative time (`'now'` or
|
|
@@ -2616,14 +2607,10 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2616
2607
|
`'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` =
|
|
2617
2608
|
days), or 0 for the earliest time.
|
|
2618
2609
|
:param top_level: if true will return only routers and endpoint that are NOT children of any router
|
|
2619
|
-
:param uids: if passed will return
|
|
2610
|
+
:param uids: if passed will return ModelEndpointList of endpoints with uid in uids
|
|
2620
2611
|
"""
|
|
2621
2612
|
|
|
2622
2613
|
path = f"projects/{project}/model-endpoints"
|
|
2623
|
-
|
|
2624
|
-
if labels and isinstance(labels, dict):
|
|
2625
|
-
labels = [f"{key}={value}" for key, value in labels.items()]
|
|
2626
|
-
|
|
2627
2614
|
response = self.api_call(
|
|
2628
2615
|
method="GET",
|
|
2629
2616
|
path=path,
|
|
@@ -2638,15 +2625,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2638
2625
|
"uid": uids,
|
|
2639
2626
|
},
|
|
2640
2627
|
)
|
|
2641
|
-
|
|
2642
|
-
# Generate a list of a model endpoint dictionaries
|
|
2643
|
-
model_endpoints = response.json()["endpoints"]
|
|
2644
|
-
if model_endpoints:
|
|
2645
|
-
return [
|
|
2646
|
-
mlrun.model_monitoring.model_endpoint.ModelEndpoint.from_dict(obj)
|
|
2647
|
-
for obj in model_endpoints
|
|
2648
|
-
]
|
|
2649
|
-
return []
|
|
2628
|
+
return schemas.ModelEndpointList(**response.json())
|
|
2650
2629
|
|
|
2651
2630
|
def get_model_endpoint(
|
|
2652
2631
|
self,
|
|
@@ -2656,29 +2635,21 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2656
2635
|
end: Optional[str] = None,
|
|
2657
2636
|
metrics: Optional[List[str]] = None,
|
|
2658
2637
|
feature_analysis: bool = False,
|
|
2659
|
-
) ->
|
|
2660
|
-
"""
|
|
2661
|
-
Returns a
|
|
2662
|
-
|
|
2663
|
-
:param project:
|
|
2664
|
-
:param endpoint_id:
|
|
2665
|
-
:param
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
|
|
2669
|
-
:param end:
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
metrics for model endpoints such as predictions_per_second and
|
|
2675
|
-
latency_avg_5m but also custom metrics defined by the user. Please note that
|
|
2676
|
-
these metrics are stored in the time series DB and the results will be
|
|
2677
|
-
appeared under model_endpoint.spec.metrics.
|
|
2678
|
-
:param feature_analysis: When True, the base feature statistics and current feature statistics will
|
|
2679
|
-
be added to the output of the resulting object.
|
|
2680
|
-
|
|
2681
|
-
:return: A `ModelEndpoint` object.
|
|
2638
|
+
) -> schemas.ModelEndpoint:
|
|
2639
|
+
"""
|
|
2640
|
+
Returns a ModelEndpoint object with additional metrics and feature related data.
|
|
2641
|
+
|
|
2642
|
+
:param project: The name of the project
|
|
2643
|
+
:param endpoint_id: The id of the model endpoint
|
|
2644
|
+
:param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric'
|
|
2645
|
+
:param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
|
|
2646
|
+
time, a Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`,
|
|
2647
|
+
where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the earliest time.
|
|
2648
|
+
:param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
|
|
2649
|
+
time, a Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`,
|
|
2650
|
+
where `m` = minutes, `h` = hours, and `'d'` = days), or 0 for the earliest time.
|
|
2651
|
+
:param feature_analysis: When True, the base feature statistics and current feature statistics will be added to
|
|
2652
|
+
the output of the resulting object
|
|
2682
2653
|
"""
|
|
2683
2654
|
|
|
2684
2655
|
path = f"projects/{project}/model-endpoints/{endpoint_id}"
|
|
@@ -2692,10 +2663,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2692
2663
|
"feature_analysis": feature_analysis,
|
|
2693
2664
|
},
|
|
2694
2665
|
)
|
|
2695
|
-
|
|
2696
|
-
return mlrun.model_monitoring.model_endpoint.ModelEndpoint.from_dict(
|
|
2697
|
-
response.json()
|
|
2698
|
-
)
|
|
2666
|
+
return schemas.ModelEndpoint(**response.json())
|
|
2699
2667
|
|
|
2700
2668
|
def patch_model_endpoint(
|
|
2701
2669
|
self,
|
|
@@ -2709,9 +2677,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
2709
2677
|
:param project: The name of the project.
|
|
2710
2678
|
:param endpoint_id: The id of the endpoint.
|
|
2711
2679
|
:param attributes: Dictionary of attributes that will be used for update the model endpoint. The keys
|
|
2712
|
-
of this dictionary should exist in the target table.
|
|
2713
|
-
from type string or from a valid numerical type such as int or float.
|
|
2714
|
-
|
|
2680
|
+
of this dictionary should exist in the target table. The values should be
|
|
2681
|
+
from type string or from a valid numerical type such as int or float. More details
|
|
2682
|
+
about the model endpoint available attributes can be found under
|
|
2715
2683
|
:py:class:`~mlrun.api.schemas.ModelEndpoint`.
|
|
2716
2684
|
|
|
2717
2685
|
Example::
|
mlrun/db/sqldb.py
CHANGED
|
@@ -16,7 +16,6 @@ import datetime
|
|
|
16
16
|
from typing import List, Optional, Union
|
|
17
17
|
|
|
18
18
|
import mlrun.api.schemas
|
|
19
|
-
import mlrun.model_monitoring.model_endpoint
|
|
20
19
|
from mlrun.api.db.base import DBError
|
|
21
20
|
from mlrun.api.db.sqldb.db import SQLDB as SQLAPIDB
|
|
22
21
|
from mlrun.api.db.sqldb.session import create_session
|
|
@@ -30,6 +29,7 @@ from mlrun.api.db.sqldb.session import create_session
|
|
|
30
29
|
# will be httpdb to that same api service) we have this class which is kind of a proxy between the RunDB interface to
|
|
31
30
|
# the api service's DB interface
|
|
32
31
|
from ..api import schemas
|
|
32
|
+
from ..api.schemas import ModelEndpoint
|
|
33
33
|
from .base import RunDBError, RunDBInterface
|
|
34
34
|
|
|
35
35
|
|
|
@@ -773,9 +773,7 @@ class SQLDB(RunDBInterface):
|
|
|
773
773
|
self,
|
|
774
774
|
project: str,
|
|
775
775
|
endpoint_id: str,
|
|
776
|
-
model_endpoint:
|
|
777
|
-
mlrun.model_monitoring.model_endpoint.ModelEndpoint, dict
|
|
778
|
-
],
|
|
776
|
+
model_endpoint: ModelEndpoint,
|
|
779
777
|
):
|
|
780
778
|
raise NotImplementedError()
|
|
781
779
|
|
mlrun/errors.py
CHANGED
|
@@ -179,10 +179,6 @@ class MLRunInternalServerError(MLRunHTTPStatusError):
|
|
|
179
179
|
error_status_code = HTTPStatus.INTERNAL_SERVER_ERROR.value
|
|
180
180
|
|
|
181
181
|
|
|
182
|
-
class MLRunServiceUnavailableError(MLRunHTTPStatusError):
|
|
183
|
-
error_status_code = HTTPStatus.SERVICE_UNAVAILABLE.value
|
|
184
|
-
|
|
185
|
-
|
|
186
182
|
class MLRunRuntimeError(MLRunHTTPStatusError, RuntimeError):
|
|
187
183
|
error_status_code = HTTPStatus.INTERNAL_SERVER_ERROR.value
|
|
188
184
|
|
|
@@ -217,5 +213,4 @@ STATUS_ERRORS = {
|
|
|
217
213
|
HTTPStatus.CONFLICT.value: MLRunConflictError,
|
|
218
214
|
HTTPStatus.PRECONDITION_FAILED.value: MLRunPreconditionFailedError,
|
|
219
215
|
HTTPStatus.INTERNAL_SERVER_ERROR.value: MLRunInternalServerError,
|
|
220
|
-
HTTPStatus.SERVICE_UNAVAILABLE.value: MLRunServiceUnavailableError,
|
|
221
216
|
}
|
mlrun/execution.py
CHANGED
|
@@ -20,7 +20,6 @@ from typing import List, Union
|
|
|
20
20
|
|
|
21
21
|
import numpy as np
|
|
22
22
|
import yaml
|
|
23
|
-
from dateutil import parser
|
|
24
23
|
|
|
25
24
|
import mlrun
|
|
26
25
|
from mlrun.artifacts import ModelArtifact
|
|
@@ -315,7 +314,6 @@ class MLClientCtx(object):
|
|
|
315
314
|
|
|
316
315
|
start = get_in(attrs, "status.start_time")
|
|
317
316
|
if start:
|
|
318
|
-
start = parser.parse(start) if isinstance(start, str) else start
|
|
319
317
|
self._start_time = start
|
|
320
318
|
self._state = "running"
|
|
321
319
|
if store_run:
|