mlrun 1.8.0rc37__py3-none-any.whl → 1.8.0rc39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +1 -8
- mlrun/artifacts/base.py +3 -3
- mlrun/artifacts/manager.py +1 -1
- mlrun/common/model_monitoring/helpers.py +0 -13
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +2 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +5 -11
- mlrun/datastore/__init__.py +57 -16
- mlrun/datastore/base.py +0 -11
- mlrun/datastore/datastore_profile.py +10 -7
- mlrun/datastore/sources.py +6 -17
- mlrun/datastore/storeytargets.py +29 -15
- mlrun/datastore/utils.py +73 -0
- mlrun/db/base.py +1 -0
- mlrun/db/httpdb.py +16 -0
- mlrun/db/nopdb.py +1 -0
- mlrun/feature_store/__init__.py +2 -0
- mlrun/feature_store/api.py +77 -0
- mlrun/model_monitoring/api.py +2 -20
- mlrun/model_monitoring/controller.py +18 -2
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +21 -6
- mlrun/model_monitoring/helpers.py +15 -27
- mlrun/model_monitoring/stream_processing.py +7 -34
- mlrun/projects/operations.py +3 -3
- mlrun/projects/pipelines.py +5 -0
- mlrun/projects/project.py +4 -4
- mlrun/run.py +4 -4
- mlrun/runtimes/kubejob.py +2 -2
- mlrun/runtimes/nuclio/application/application.py +0 -2
- mlrun/runtimes/nuclio/function.py +1 -46
- mlrun/runtimes/pod.py +37 -145
- mlrun/serving/routers.py +80 -64
- mlrun/serving/states.py +30 -1
- mlrun/serving/v2_serving.py +24 -62
- mlrun/utils/async_http.py +1 -2
- mlrun/utils/helpers.py +1 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/METADATA +1 -1
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/RECORD +43 -43
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/WHEEL +1 -1
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/top_level.txt +0 -0
mlrun/__main__.py
CHANGED
|
@@ -17,7 +17,6 @@ import json
|
|
|
17
17
|
import pathlib
|
|
18
18
|
import socket
|
|
19
19
|
import traceback
|
|
20
|
-
import warnings
|
|
21
20
|
from ast import literal_eval
|
|
22
21
|
from base64 import b64decode
|
|
23
22
|
from os import environ, path, remove
|
|
@@ -864,14 +863,8 @@ def version():
|
|
|
864
863
|
)
|
|
865
864
|
@click.option("--offset", type=int, default=0, help="byte offset")
|
|
866
865
|
@click.option("--db", help="api and db service path/url")
|
|
867
|
-
|
|
868
|
-
def logs(uid, project, offset, db, watch):
|
|
866
|
+
def logs(uid, project, offset, db):
|
|
869
867
|
"""Get or watch task logs"""
|
|
870
|
-
if watch:
|
|
871
|
-
warnings.warn(
|
|
872
|
-
"'--watch' is deprecated in 1.6.0, and will be removed in 1.8.0, "
|
|
873
|
-
# TODO: Remove in 1.8.0
|
|
874
|
-
)
|
|
875
868
|
mldb = get_run_db(db or mlconf.dbpath)
|
|
876
869
|
if mldb.kind == "http":
|
|
877
870
|
state, _ = mldb.watch_log(uid, project, watch=False, offset=offset)
|
mlrun/artifacts/base.py
CHANGED
|
@@ -893,7 +893,7 @@ def generate_target_path(item: Artifact, artifact_path, producer):
|
|
|
893
893
|
return f"{artifact_path}{item.key}{suffix}"
|
|
894
894
|
|
|
895
895
|
|
|
896
|
-
# TODO:
|
|
896
|
+
# TODO: Remove once data migration v5 is obsolete
|
|
897
897
|
def convert_legacy_artifact_to_new_format(
|
|
898
898
|
legacy_artifact: dict,
|
|
899
899
|
) -> Artifact:
|
|
@@ -905,9 +905,9 @@ def convert_legacy_artifact_to_new_format(
|
|
|
905
905
|
artifact_tag = legacy_artifact.get("tag", "")
|
|
906
906
|
if artifact_tag:
|
|
907
907
|
artifact_key = f"{artifact_key}:{artifact_tag}"
|
|
908
|
-
# TODO:
|
|
908
|
+
# TODO: Remove once data migration v5 is obsolete
|
|
909
909
|
warnings.warn(
|
|
910
|
-
f"Converting legacy artifact '{artifact_key}' to new format. This will not be supported in MLRun 1.
|
|
910
|
+
f"Converting legacy artifact '{artifact_key}' to new format. This will not be supported in MLRun 1.9.0. "
|
|
911
911
|
f"Make sure to save the artifact/project in the new format.",
|
|
912
912
|
FutureWarning,
|
|
913
913
|
)
|
mlrun/artifacts/manager.py
CHANGED
|
@@ -108,7 +108,7 @@ class ArtifactProducer:
|
|
|
108
108
|
def dict_to_artifact(struct: dict) -> Artifact:
|
|
109
109
|
kind = struct.get("kind", "")
|
|
110
110
|
|
|
111
|
-
# TODO:
|
|
111
|
+
# TODO: Remove once data migration v5 is obsolete
|
|
112
112
|
if mlrun.utils.is_legacy_artifact(struct):
|
|
113
113
|
return mlrun.artifacts.base.convert_legacy_artifact_to_new_format(struct)
|
|
114
114
|
|
|
@@ -50,19 +50,6 @@ def get_kafka_topic(project: str, function_name: typing.Optional[str] = None) ->
|
|
|
50
50
|
)
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
def parse_monitoring_stream_path(
|
|
54
|
-
stream_uri: str, project: str, function_name: typing.Optional[str] = None
|
|
55
|
-
) -> str:
|
|
56
|
-
if stream_uri.startswith("kafka://"):
|
|
57
|
-
if "?topic" in stream_uri:
|
|
58
|
-
raise mlrun.errors.MLRunValueError("Custom kafka topic is not allowed")
|
|
59
|
-
# Add topic to stream kafka uri
|
|
60
|
-
topic = get_kafka_topic(project=project, function_name=function_name)
|
|
61
|
-
stream_uri += f"?topic={topic}"
|
|
62
|
-
|
|
63
|
-
return stream_uri
|
|
64
|
-
|
|
65
|
-
|
|
66
53
|
def _get_counts(hist: Histogram) -> BinCounts:
|
|
67
54
|
"""Return the histogram counts"""
|
|
68
55
|
return BinCounts(hist[0])
|
|
@@ -42,12 +42,10 @@ class ModelEndpointSchema(MonitoringStrEnum):
|
|
|
42
42
|
# spec
|
|
43
43
|
FUNCTION_NAME = "function_name"
|
|
44
44
|
FUNCTION_TAG = "function_tag"
|
|
45
|
-
FUNCTION_UID = "function_uid"
|
|
46
45
|
MODEL_NAME = "model_name"
|
|
47
|
-
|
|
48
|
-
|
|
46
|
+
MODEL_TAGS = "model_tags"
|
|
47
|
+
MODEL_PATH = "model_path"
|
|
49
48
|
MODEL_CLASS = "model_class"
|
|
50
|
-
MODEL_UID = "model_uid"
|
|
51
49
|
FEATURE_NAMES = "feature_names"
|
|
52
50
|
LABEL_NAMES = "label_names"
|
|
53
51
|
FEATURE_STATS = "feature_stats"
|
|
@@ -117,14 +117,13 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
|
|
120
|
-
model_uid: Optional[str] = ""
|
|
121
|
-
model_name: Optional[str] = ""
|
|
122
|
-
model_db_key: Optional[str] = ""
|
|
123
|
-
model_tag: Optional[str] = ""
|
|
124
120
|
model_class: Optional[str] = ""
|
|
125
121
|
function_name: Optional[str] = ""
|
|
126
122
|
function_tag: Optional[str] = ""
|
|
127
|
-
|
|
123
|
+
model_path: Optional[str] = ""
|
|
124
|
+
model_name: Optional[str] = ""
|
|
125
|
+
model_tags: Optional[list[str]] = []
|
|
126
|
+
_model_id: Optional[int] = ""
|
|
128
127
|
feature_names: Optional[list[str]] = []
|
|
129
128
|
label_names: Optional[list[str]] = []
|
|
130
129
|
feature_stats: Optional[dict] = {}
|
|
@@ -137,12 +136,8 @@ class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
|
|
|
137
136
|
@classmethod
|
|
138
137
|
def mutable_fields(cls):
|
|
139
138
|
return [
|
|
140
|
-
"
|
|
141
|
-
"model_name",
|
|
142
|
-
"model_db_key",
|
|
143
|
-
"model_tag",
|
|
139
|
+
"model_path",
|
|
144
140
|
"model_class",
|
|
145
|
-
"function_uid",
|
|
146
141
|
"feature_names",
|
|
147
142
|
"label_names",
|
|
148
143
|
"children",
|
|
@@ -206,7 +201,6 @@ class ModelEndpoint(BaseModel):
|
|
|
206
201
|
ModelEndpointSchema.CURRENT_STATS,
|
|
207
202
|
ModelEndpointSchema.DRIFT_MEASURES,
|
|
208
203
|
ModelEndpointSchema.FUNCTION_URI,
|
|
209
|
-
ModelEndpointSchema.MODEL_URI,
|
|
210
204
|
}
|
|
211
205
|
# Initialize a flattened dictionary that will be filled with the model endpoint dictionary attributes
|
|
212
206
|
flatten_dict = {}
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -34,9 +34,17 @@ __all__ = [
|
|
|
34
34
|
"VectorStoreCollection",
|
|
35
35
|
]
|
|
36
36
|
|
|
37
|
+
from urllib.parse import urlparse
|
|
38
|
+
|
|
37
39
|
import fsspec
|
|
40
|
+
from mergedeep import merge
|
|
38
41
|
|
|
39
42
|
import mlrun.datastore.wasbfs
|
|
43
|
+
from mlrun.datastore.datastore_profile import (
|
|
44
|
+
DatastoreProfileKafkaSource,
|
|
45
|
+
DatastoreProfileKafkaTarget,
|
|
46
|
+
DatastoreProfileV3io,
|
|
47
|
+
)
|
|
40
48
|
from mlrun.platforms.iguazio import (
|
|
41
49
|
HTTPOutputStream,
|
|
42
50
|
KafkaOutputStream,
|
|
@@ -106,23 +114,56 @@ def get_stream_pusher(stream_path: str, **kwargs):
|
|
|
106
114
|
|
|
107
115
|
:param stream_path: path/url of stream
|
|
108
116
|
"""
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
117
|
+
if stream_path.startswith("ds://"):
|
|
118
|
+
datastore_profile = mlrun.datastore.datastore_profile.datastore_profile_read(
|
|
119
|
+
stream_path
|
|
120
|
+
)
|
|
121
|
+
if isinstance(
|
|
122
|
+
datastore_profile,
|
|
123
|
+
(DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
|
|
124
|
+
):
|
|
125
|
+
attributes = datastore_profile.attributes()
|
|
126
|
+
brokers = attributes.pop("brokers", None)
|
|
127
|
+
# Override the topic with the one in the url (if any)
|
|
128
|
+
parsed_url = urlparse(stream_path)
|
|
129
|
+
topic = (
|
|
130
|
+
parsed_url.path.strip("/")
|
|
131
|
+
if parsed_url.path
|
|
132
|
+
else datastore_profile.get_topic()
|
|
133
|
+
)
|
|
134
|
+
producer_options = mlrun.datastore.utils.KafkaParameters(
|
|
135
|
+
attributes
|
|
136
|
+
).producer()
|
|
137
|
+
return KafkaOutputStream(topic, brokers, producer_options=producer_options)
|
|
138
|
+
|
|
139
|
+
elif isinstance(datastore_profile, DatastoreProfileV3io):
|
|
140
|
+
parsed_url = urlparse(stream_path)
|
|
141
|
+
stream_path = datastore_profile.url(parsed_url.path)
|
|
142
|
+
endpoint, stream_path = parse_path(stream_path)
|
|
143
|
+
return OutputStream(stream_path, endpoint=endpoint, **kwargs)
|
|
144
|
+
else:
|
|
145
|
+
raise ValueError(
|
|
146
|
+
f"Unsupported datastore profile type: {type(datastore_profile)}"
|
|
147
|
+
)
|
|
124
148
|
else:
|
|
125
|
-
|
|
149
|
+
kafka_brokers = get_kafka_brokers_from_dict(kwargs)
|
|
150
|
+
if stream_path.startswith("kafka://") or kafka_brokers:
|
|
151
|
+
topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
|
|
152
|
+
return KafkaOutputStream(
|
|
153
|
+
topic, brokers, kwargs.get("kafka_producer_options")
|
|
154
|
+
)
|
|
155
|
+
elif stream_path.startswith("http://") or stream_path.startswith("https://"):
|
|
156
|
+
return HTTPOutputStream(stream_path=stream_path)
|
|
157
|
+
elif "://" not in stream_path:
|
|
158
|
+
return OutputStream(stream_path, **kwargs)
|
|
159
|
+
elif stream_path.startswith("v3io"):
|
|
160
|
+
endpoint, stream_path = parse_path(stream_path)
|
|
161
|
+
endpoint = kwargs.pop("endpoint", None) or endpoint
|
|
162
|
+
return OutputStream(stream_path, endpoint=endpoint, **kwargs)
|
|
163
|
+
elif stream_path.startswith("dummy://"):
|
|
164
|
+
return _DummyStream(**kwargs)
|
|
165
|
+
else:
|
|
166
|
+
raise ValueError(f"unsupported stream path {stream_path}")
|
|
126
167
|
|
|
127
168
|
|
|
128
169
|
class _DummyStream:
|
mlrun/datastore/base.py
CHANGED
|
@@ -24,7 +24,6 @@ import pandas as pd
|
|
|
24
24
|
import pyarrow
|
|
25
25
|
import pytz
|
|
26
26
|
import requests
|
|
27
|
-
from deprecated import deprecated
|
|
28
27
|
|
|
29
28
|
import mlrun.config
|
|
30
29
|
import mlrun.errors
|
|
@@ -95,16 +94,6 @@ class DataStore:
|
|
|
95
94
|
def uri_to_ipython(endpoint, subpath):
|
|
96
95
|
return ""
|
|
97
96
|
|
|
98
|
-
# TODO: remove in 1.8.0
|
|
99
|
-
@deprecated(
|
|
100
|
-
version="1.8.0",
|
|
101
|
-
reason="'get_filesystem()' will be removed in 1.8.0, use "
|
|
102
|
-
"'filesystem' property instead",
|
|
103
|
-
category=FutureWarning,
|
|
104
|
-
)
|
|
105
|
-
def get_filesystem(self):
|
|
106
|
-
return self.filesystem
|
|
107
|
-
|
|
108
97
|
@property
|
|
109
98
|
def filesystem(self) -> Optional[fsspec.AbstractFileSystem]:
|
|
110
99
|
"""return fsspec file system object, if supported"""
|
|
@@ -171,6 +171,9 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
|
|
|
171
171
|
FutureWarning,
|
|
172
172
|
)
|
|
173
173
|
|
|
174
|
+
def get_topic(self) -> typing.Optional[str]:
|
|
175
|
+
return self.topic
|
|
176
|
+
|
|
174
177
|
def attributes(self):
|
|
175
178
|
attributes = {"brokers": self.brokers or self.bootstrap_servers}
|
|
176
179
|
if self.kwargs_public:
|
|
@@ -193,6 +196,10 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
|
|
|
193
196
|
kwargs_public: typing.Optional[dict]
|
|
194
197
|
kwargs_private: typing.Optional[dict]
|
|
195
198
|
|
|
199
|
+
def get_topic(self) -> typing.Optional[str]:
|
|
200
|
+
topics = [self.topics] if isinstance(self.topics, str) else self.topics
|
|
201
|
+
return topics[0] if topics else None
|
|
202
|
+
|
|
196
203
|
def attributes(self) -> dict[str, typing.Any]:
|
|
197
204
|
attributes = {}
|
|
198
205
|
if self.kwargs_public:
|
|
@@ -209,13 +216,9 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
|
|
|
209
216
|
attributes["initial_offset"] = self.initial_offset
|
|
210
217
|
if self.partitions is not None:
|
|
211
218
|
attributes["partitions"] = self.partitions
|
|
212
|
-
sasl
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
sasl["user"] = self.sasl_user
|
|
216
|
-
sasl["password"] = self.sasl_pass
|
|
217
|
-
sasl["mechanism"] = "PLAIN"
|
|
218
|
-
if sasl:
|
|
219
|
+
if sasl := mlrun.datastore.utils.KafkaParameters(attributes).sasl(
|
|
220
|
+
usr=self.sasl_user, pwd=self.sasl_pass
|
|
221
|
+
):
|
|
219
222
|
attributes["sasl"] = sasl
|
|
220
223
|
return attributes
|
|
221
224
|
|
mlrun/datastore/sources.py
CHANGED
|
@@ -1100,13 +1100,9 @@ class KafkaSource(OnlineSource):
|
|
|
1100
1100
|
attributes["initial_offset"] = initial_offset
|
|
1101
1101
|
if partitions is not None:
|
|
1102
1102
|
attributes["partitions"] = partitions
|
|
1103
|
-
sasl
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
sasl["user"] = sasl_user
|
|
1107
|
-
sasl["password"] = sasl_pass
|
|
1108
|
-
sasl["mechanism"] = "PLAIN"
|
|
1109
|
-
if sasl:
|
|
1103
|
+
if sasl := mlrun.datastore.utils.KafkaParameters(attributes).sasl(
|
|
1104
|
+
usr=sasl_user, pwd=sasl_pass
|
|
1105
|
+
):
|
|
1110
1106
|
attributes["sasl"] = sasl
|
|
1111
1107
|
super().__init__(attributes=attributes, **kwargs)
|
|
1112
1108
|
|
|
@@ -1207,16 +1203,9 @@ class KafkaSource(OnlineSource):
|
|
|
1207
1203
|
]
|
|
1208
1204
|
|
|
1209
1205
|
kafka_admin_kwargs = {}
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
{
|
|
1214
|
-
"security_protocol": "SASL_PLAINTEXT",
|
|
1215
|
-
"sasl_mechanism": sasl["mechanism"],
|
|
1216
|
-
"sasl_plain_username": sasl["user"],
|
|
1217
|
-
"sasl_plain_password": sasl["password"],
|
|
1218
|
-
}
|
|
1219
|
-
)
|
|
1206
|
+
kafka_admin_kwargs = mlrun.datastore.utils.KafkaParameters(
|
|
1207
|
+
self.attributes
|
|
1208
|
+
).admin()
|
|
1220
1209
|
|
|
1221
1210
|
kafka_admin = KafkaAdminClient(bootstrap_servers=brokers, **kafka_admin_kwargs)
|
|
1222
1211
|
try:
|
mlrun/datastore/storeytargets.py
CHANGED
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
from urllib.parse import urlparse
|
|
15
|
+
|
|
14
16
|
import storey
|
|
15
17
|
from mergedeep import merge
|
|
16
18
|
from storey import V3ioDriver
|
|
@@ -18,6 +20,12 @@ from storey import V3ioDriver
|
|
|
18
20
|
import mlrun
|
|
19
21
|
import mlrun.model_monitoring.helpers
|
|
20
22
|
from mlrun.datastore.base import DataStore
|
|
23
|
+
from mlrun.datastore.datastore_profile import (
|
|
24
|
+
DatastoreProfileKafkaSource,
|
|
25
|
+
DatastoreProfileKafkaTarget,
|
|
26
|
+
TDEngineDatastoreProfile,
|
|
27
|
+
datastore_profile_read,
|
|
28
|
+
)
|
|
21
29
|
|
|
22
30
|
from ..platforms.iguazio import parse_path
|
|
23
31
|
from .utils import (
|
|
@@ -44,13 +52,8 @@ def get_url_and_storage_options(path, external_storage_options=None):
|
|
|
44
52
|
class TDEngineStoreyTarget(storey.TDEngineTarget):
|
|
45
53
|
def __init__(self, *args, url: str, **kwargs):
|
|
46
54
|
if url.startswith("ds://"):
|
|
47
|
-
datastore_profile = (
|
|
48
|
-
|
|
49
|
-
)
|
|
50
|
-
if not isinstance(
|
|
51
|
-
datastore_profile,
|
|
52
|
-
mlrun.datastore.datastore_profile.TDEngineDatastoreProfile,
|
|
53
|
-
):
|
|
55
|
+
datastore_profile = datastore_profile_read(url)
|
|
56
|
+
if not isinstance(datastore_profile, TDEngineDatastoreProfile):
|
|
54
57
|
raise ValueError(
|
|
55
58
|
f"Unexpected datastore profile type:{datastore_profile.type}."
|
|
56
59
|
"Only TDEngineDatastoreProfile is supported"
|
|
@@ -126,16 +129,24 @@ class StreamStoreyTarget(storey.StreamTarget):
|
|
|
126
129
|
class KafkaStoreyTarget(storey.KafkaTarget):
|
|
127
130
|
def __init__(self, *args, **kwargs):
|
|
128
131
|
path = kwargs.pop("path")
|
|
129
|
-
attributes = kwargs.pop("attributes",
|
|
132
|
+
attributes = kwargs.pop("attributes", {})
|
|
130
133
|
if path and path.startswith("ds://"):
|
|
131
|
-
datastore_profile = (
|
|
132
|
-
|
|
133
|
-
|
|
134
|
+
datastore_profile = datastore_profile_read(path)
|
|
135
|
+
if not isinstance(
|
|
136
|
+
datastore_profile,
|
|
137
|
+
(DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
|
|
138
|
+
):
|
|
139
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
140
|
+
f"Unsupported datastore profile type: {type(datastore_profile)}"
|
|
141
|
+
)
|
|
142
|
+
|
|
134
143
|
attributes = merge(attributes, datastore_profile.attributes())
|
|
135
|
-
brokers = attributes.pop(
|
|
136
|
-
|
|
144
|
+
brokers = attributes.pop("brokers", None)
|
|
145
|
+
# Override the topic with the one in the url (if any)
|
|
146
|
+
parsed = urlparse(path)
|
|
147
|
+
topic = (
|
|
148
|
+
parsed.path.strip("/") if parsed.path else datastore_profile.get_topic()
|
|
137
149
|
)
|
|
138
|
-
topic = datastore_profile.topic
|
|
139
150
|
else:
|
|
140
151
|
brokers = attributes.pop(
|
|
141
152
|
"brokers", attributes.pop("bootstrap_servers", None)
|
|
@@ -146,7 +157,10 @@ class KafkaStoreyTarget(storey.KafkaTarget):
|
|
|
146
157
|
raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
|
|
147
158
|
kwargs["brokers"] = brokers
|
|
148
159
|
kwargs["topic"] = topic
|
|
149
|
-
|
|
160
|
+
|
|
161
|
+
attributes = mlrun.datastore.utils.KafkaParameters(attributes).producer()
|
|
162
|
+
|
|
163
|
+
super().__init__(*args, **kwargs, producer_options=attributes)
|
|
150
164
|
|
|
151
165
|
|
|
152
166
|
class NoSqlStoreyTarget(storey.NoSqlTarget):
|
mlrun/datastore/utils.py
CHANGED
|
@@ -222,3 +222,76 @@ def validate_additional_filters(additional_filters):
|
|
|
222
222
|
for sub_value in value:
|
|
223
223
|
if isinstance(sub_value, float) and math.isnan(sub_value):
|
|
224
224
|
raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class KafkaParameters:
|
|
228
|
+
def __init__(self, kwargs: dict):
|
|
229
|
+
import kafka
|
|
230
|
+
|
|
231
|
+
self._kafka = kafka
|
|
232
|
+
self._kwargs = kwargs
|
|
233
|
+
self._client_configs = {
|
|
234
|
+
"consumer": self._kafka.KafkaConsumer.DEFAULT_CONFIG,
|
|
235
|
+
"producer": self._kafka.KafkaProducer.DEFAULT_CONFIG,
|
|
236
|
+
"admin": self._kafka.KafkaAdminClient.DEFAULT_CONFIG,
|
|
237
|
+
}
|
|
238
|
+
self._custom_attributes = {
|
|
239
|
+
"max_workers": "",
|
|
240
|
+
"brokers": "",
|
|
241
|
+
"topics": "",
|
|
242
|
+
"group": "",
|
|
243
|
+
"initial_offset": "",
|
|
244
|
+
"partitions": "",
|
|
245
|
+
"sasl": "",
|
|
246
|
+
"worker_allocation_mode": "",
|
|
247
|
+
}
|
|
248
|
+
self._validate_keys()
|
|
249
|
+
|
|
250
|
+
def _validate_keys(self) -> None:
|
|
251
|
+
reference_dicts = (
|
|
252
|
+
self._custom_attributes,
|
|
253
|
+
self._kafka.KafkaAdminClient.DEFAULT_CONFIG,
|
|
254
|
+
self._kafka.KafkaProducer.DEFAULT_CONFIG,
|
|
255
|
+
self._kafka.KafkaConsumer.DEFAULT_CONFIG,
|
|
256
|
+
)
|
|
257
|
+
for key in self._kwargs:
|
|
258
|
+
if all(key not in d for d in reference_dicts):
|
|
259
|
+
raise ValueError(
|
|
260
|
+
f"Key '{key}' not found in any of the Kafka reference dictionaries"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
def _get_config(self, client_type: str) -> dict:
|
|
264
|
+
res = {
|
|
265
|
+
k: self._kwargs[k]
|
|
266
|
+
for k in self._kwargs.keys() & self._client_configs[client_type].keys()
|
|
267
|
+
}
|
|
268
|
+
if sasl := self._kwargs.get("sasl"):
|
|
269
|
+
res |= {
|
|
270
|
+
"security_protocol": "SASL_PLAINTEXT",
|
|
271
|
+
"sasl_mechanism": sasl["mechanism"],
|
|
272
|
+
"sasl_plain_username": sasl["user"],
|
|
273
|
+
"sasl_plain_password": sasl["password"],
|
|
274
|
+
}
|
|
275
|
+
return res
|
|
276
|
+
|
|
277
|
+
def consumer(self) -> dict:
|
|
278
|
+
return self._get_config("consumer")
|
|
279
|
+
|
|
280
|
+
def producer(self) -> dict:
|
|
281
|
+
return self._get_config("producer")
|
|
282
|
+
|
|
283
|
+
def admin(self) -> dict:
|
|
284
|
+
return self._get_config("admin")
|
|
285
|
+
|
|
286
|
+
def sasl(
|
|
287
|
+
self, *, usr: typing.Optional[str] = None, pwd: typing.Optional[str] = None
|
|
288
|
+
) -> dict:
|
|
289
|
+
usr = usr or self._kwargs.get("sasl_plain_username", None)
|
|
290
|
+
pwd = pwd or self._kwargs.get("sasl_plain_password", None)
|
|
291
|
+
res = self._kwargs.get("sasl", {})
|
|
292
|
+
if usr and pwd:
|
|
293
|
+
res["enable"] = True
|
|
294
|
+
res["user"] = usr
|
|
295
|
+
res["password"] = pwd
|
|
296
|
+
res["mechanism"] = self._kwargs.get("sasl_mechanism", "PLAIN")
|
|
297
|
+
return res
|
mlrun/db/base.py
CHANGED
|
@@ -257,6 +257,7 @@ class RunDBInterface(ABC):
|
|
|
257
257
|
tag: Optional[str] = None,
|
|
258
258
|
kind: Optional[str] = None,
|
|
259
259
|
labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
|
|
260
|
+
states: Optional[list[mlrun.common.schemas.FunctionState]] = None,
|
|
260
261
|
format_: mlrun.common.formatters.FunctionFormat = mlrun.common.formatters.FunctionFormat.full,
|
|
261
262
|
since: Optional[datetime.datetime] = None,
|
|
262
263
|
until: Optional[datetime.datetime] = None,
|
mlrun/db/httpdb.py
CHANGED
|
@@ -566,6 +566,17 @@ class HTTPRunDB(RunDBInterface):
|
|
|
566
566
|
)
|
|
567
567
|
config.alerts.mode = server_cfg.get("alerts_mode") or config.alerts.mode
|
|
568
568
|
config.system_id = server_cfg.get("system_id") or config.system_id
|
|
569
|
+
model_monitoring_store_prefixes = (
|
|
570
|
+
server_cfg.get("model_endpoint_monitoring_store_prefixes") or {}
|
|
571
|
+
)
|
|
572
|
+
for prefix in ["default", "user_space", "monitoring_application"]:
|
|
573
|
+
store_prefix_value = model_monitoring_store_prefixes.get(prefix)
|
|
574
|
+
if server_prefix_value is not None:
|
|
575
|
+
setattr(
|
|
576
|
+
config.model_endpoint_monitoring.store_prefixes,
|
|
577
|
+
prefix,
|
|
578
|
+
store_prefix_value,
|
|
579
|
+
)
|
|
569
580
|
|
|
570
581
|
except Exception as exc:
|
|
571
582
|
logger.warning(
|
|
@@ -1497,6 +1508,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1497
1508
|
until: Optional[datetime] = None,
|
|
1498
1509
|
kind: Optional[str] = None,
|
|
1499
1510
|
format_: mlrun.common.formatters.FunctionFormat = mlrun.common.formatters.FunctionFormat.full,
|
|
1511
|
+
states: typing.Optional[list[mlrun.common.schemas.FunctionState]] = None,
|
|
1500
1512
|
):
|
|
1501
1513
|
"""Retrieve a list of functions, filtered by specific criteria.
|
|
1502
1514
|
|
|
@@ -1514,6 +1526,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1514
1526
|
:param until: Return functions updated before this date (as datetime object).
|
|
1515
1527
|
:param kind: Return only functions of a specific kind.
|
|
1516
1528
|
:param format_: The format in which to return the functions. Default is 'full'.
|
|
1529
|
+
:param states: Return only functions whose state is one of the provided states.
|
|
1517
1530
|
:returns: List of function objects (as dictionary).
|
|
1518
1531
|
"""
|
|
1519
1532
|
functions, _ = self._list_functions(
|
|
@@ -1525,6 +1538,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1525
1538
|
format_=format_,
|
|
1526
1539
|
since=since,
|
|
1527
1540
|
until=until,
|
|
1541
|
+
states=states,
|
|
1528
1542
|
return_all=True,
|
|
1529
1543
|
)
|
|
1530
1544
|
return functions
|
|
@@ -5135,6 +5149,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
5135
5149
|
format_: Optional[str] = None,
|
|
5136
5150
|
since: Optional[datetime] = None,
|
|
5137
5151
|
until: Optional[datetime] = None,
|
|
5152
|
+
states: typing.Optional[list[mlrun.common.schemas.FunctionState]] = None,
|
|
5138
5153
|
page: Optional[int] = None,
|
|
5139
5154
|
page_size: Optional[int] = None,
|
|
5140
5155
|
page_token: Optional[str] = None,
|
|
@@ -5152,6 +5167,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
5152
5167
|
"since": datetime_to_iso(since),
|
|
5153
5168
|
"until": datetime_to_iso(until),
|
|
5154
5169
|
"format": format_,
|
|
5170
|
+
"state": states or None,
|
|
5155
5171
|
"page": page,
|
|
5156
5172
|
"page-size": page_size,
|
|
5157
5173
|
"page-token": page_token,
|
mlrun/db/nopdb.py
CHANGED
|
@@ -274,6 +274,7 @@ class NopDB(RunDBInterface):
|
|
|
274
274
|
tag: Optional[str] = None,
|
|
275
275
|
kind: Optional[str] = None,
|
|
276
276
|
labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
|
|
277
|
+
states: Optional[list[mlrun.common.schemas.FunctionState]] = None,
|
|
277
278
|
format_: mlrun.common.formatters.FunctionFormat = mlrun.common.formatters.FunctionFormat.full,
|
|
278
279
|
since: Optional[datetime.datetime] = None,
|
|
279
280
|
until: Optional[datetime.datetime] = None,
|
mlrun/feature_store/__init__.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
__all__ = [
|
|
16
|
+
"ingest",
|
|
16
17
|
"delete_feature_set",
|
|
17
18
|
"delete_feature_vector",
|
|
18
19
|
"get_feature_set",
|
|
@@ -35,6 +36,7 @@ from .api import (
|
|
|
35
36
|
delete_feature_vector,
|
|
36
37
|
get_feature_set,
|
|
37
38
|
get_feature_vector,
|
|
39
|
+
ingest,
|
|
38
40
|
)
|
|
39
41
|
from .common import RunConfig
|
|
40
42
|
from .feature_set import FeatureSet
|
mlrun/feature_store/api.py
CHANGED
|
@@ -244,6 +244,83 @@ def _get_namespace(run_config: RunConfig) -> dict[str, Any]:
|
|
|
244
244
|
return get_caller_globals()
|
|
245
245
|
|
|
246
246
|
|
|
247
|
+
def ingest(
|
|
248
|
+
mlrun_context: Union["mlrun.MLrunProject", "mlrun.MLClientCtx"],
|
|
249
|
+
featureset: Union[FeatureSet, str] = None,
|
|
250
|
+
source=None,
|
|
251
|
+
targets: Optional[list[DataTargetBase]] = None,
|
|
252
|
+
namespace=None,
|
|
253
|
+
return_df: bool = True,
|
|
254
|
+
infer_options: InferOptions = InferOptions.default(),
|
|
255
|
+
run_config: RunConfig = None,
|
|
256
|
+
spark_context=None,
|
|
257
|
+
overwrite=None,
|
|
258
|
+
) -> Optional[pd.DataFrame]:
|
|
259
|
+
"""Read local DataFrame, file, URL, or source into the feature store
|
|
260
|
+
Ingest reads from the source, run the graph transformations, infers metadata and stats
|
|
261
|
+
and writes the results to the default of specified targets
|
|
262
|
+
|
|
263
|
+
when targets are not specified data is stored in the configured default targets
|
|
264
|
+
(will usually be NoSQL for real-time and Parquet for offline).
|
|
265
|
+
|
|
266
|
+
the `run_config` parameter allow specifying the function and job configuration,
|
|
267
|
+
see: :py:class:`~mlrun.feature_store.RunConfig`
|
|
268
|
+
|
|
269
|
+
example::
|
|
270
|
+
|
|
271
|
+
stocks_set = FeatureSet("stocks", entities=[Entity("ticker")])
|
|
272
|
+
stocks = pd.read_csv("stocks.csv")
|
|
273
|
+
df = ingest(stocks_set, stocks, infer_options=fstore.InferOptions.default())
|
|
274
|
+
|
|
275
|
+
# for running as remote job
|
|
276
|
+
config = RunConfig(image="mlrun/mlrun")
|
|
277
|
+
df = ingest(stocks_set, stocks, run_config=config)
|
|
278
|
+
|
|
279
|
+
# specify source and targets
|
|
280
|
+
source = CSVSource("mycsv", path="measurements.csv")
|
|
281
|
+
targets = [CSVTarget("mycsv", path="./mycsv.csv")]
|
|
282
|
+
ingest(measurements, source, targets)
|
|
283
|
+
|
|
284
|
+
:param mlrun_context: mlrun context
|
|
285
|
+
:param featureset: feature set object or featureset.uri. (uri must be of a feature set that is in the DB,
|
|
286
|
+
call `.save()` if it's not)
|
|
287
|
+
:param source: source dataframe or other sources (e.g. parquet source see:
|
|
288
|
+
:py:class:`~mlrun.datastore.ParquetSource` and other classes in mlrun.datastore with suffix
|
|
289
|
+
Source)
|
|
290
|
+
:param targets: optional list of data target objects
|
|
291
|
+
:param namespace: namespace or module containing graph classes
|
|
292
|
+
:param return_df: indicate if to return a dataframe with the graph results
|
|
293
|
+
:param infer_options: schema (for discovery of entities, features in featureset), index, stats,
|
|
294
|
+
histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
|
|
295
|
+
:param run_config: function and/or run configuration for remote jobs,
|
|
296
|
+
see :py:class:`~mlrun.feature_store.RunConfig`
|
|
297
|
+
:param spark_context: local spark session for spark ingestion, example for creating the spark context:
|
|
298
|
+
`spark = SparkSession.builder.appName("Spark function").getOrCreate()`
|
|
299
|
+
For remote spark ingestion, this should contain the remote spark service name
|
|
300
|
+
:param overwrite: delete the targets' data prior to ingestion
|
|
301
|
+
(default: True for non scheduled ingest - deletes the targets that are about to be ingested.
|
|
302
|
+
False for scheduled ingest - does not delete the target)
|
|
303
|
+
:return: if return_df is True, a dataframe will be returned based on the graph
|
|
304
|
+
"""
|
|
305
|
+
if not mlrun_context:
|
|
306
|
+
raise mlrun.errors.MLRunValueError(
|
|
307
|
+
"mlrun_context must be defined when calling ingest()"
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
return _ingest(
|
|
311
|
+
featureset,
|
|
312
|
+
source,
|
|
313
|
+
targets,
|
|
314
|
+
namespace,
|
|
315
|
+
return_df,
|
|
316
|
+
infer_options,
|
|
317
|
+
run_config,
|
|
318
|
+
mlrun_context,
|
|
319
|
+
spark_context,
|
|
320
|
+
overwrite,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
|
|
247
324
|
def _ingest(
|
|
248
325
|
featureset: Union[FeatureSet, str] = None,
|
|
249
326
|
source=None,
|