mlrun 1.7.0rc1__py3-none-any.whl → 1.7.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/model.py +8 -1
- mlrun/common/db/sql_session.py +3 -0
- mlrun/config.py +10 -3
- mlrun/datastore/base.py +0 -28
- mlrun/datastore/datastore_profile.py +12 -0
- mlrun/datastore/sources.py +1 -5
- mlrun/datastore/targets.py +9 -5
- mlrun/datastore/v3io.py +70 -46
- mlrun/feature_store/api.py +56 -56
- mlrun/feature_store/feature_set.py +0 -2
- mlrun/feature_store/feature_vector.py +120 -0
- mlrun/feature_store/steps.py +1 -9
- mlrun/features.py +0 -2
- mlrun/k8s_utils.py +51 -0
- mlrun/model_monitoring/stream_processing.py +3 -21
- mlrun/projects/project.py +45 -7
- mlrun/serving/remote.py +0 -4
- mlrun/serving/routers.py +14 -6
- mlrun/serving/states.py +1 -0
- mlrun/serving/v2_serving.py +45 -3
- mlrun/utils/helpers.py +5 -2
- mlrun/utils/regex.py +5 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/METADATA +10 -10
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/RECORD +29 -29
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc1.dist-info → mlrun-1.7.0rc2.dist-info}/top_level.txt +0 -0
mlrun/artifacts/model.py
CHANGED
|
@@ -13,7 +13,9 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import tempfile
|
|
15
15
|
from os import path
|
|
16
|
+
from typing import Any
|
|
16
17
|
|
|
18
|
+
import pandas as pd
|
|
17
19
|
import yaml
|
|
18
20
|
from deprecated import deprecated
|
|
19
21
|
|
|
@@ -259,6 +261,7 @@ class ModelArtifact(Artifact):
|
|
|
259
261
|
"""
|
|
260
262
|
subset = df
|
|
261
263
|
inferer = get_infer_interface(subset)
|
|
264
|
+
numeric_columns = self._extract_numeric_features(df)
|
|
262
265
|
if label_columns:
|
|
263
266
|
if not isinstance(label_columns, list):
|
|
264
267
|
label_columns = [label_columns]
|
|
@@ -272,9 +275,13 @@ class ModelArtifact(Artifact):
|
|
|
272
275
|
)
|
|
273
276
|
if with_stats:
|
|
274
277
|
self.spec.feature_stats = inferer.get_stats(
|
|
275
|
-
df, options=InferOptions.Histogram, num_bins=num_bins
|
|
278
|
+
df[numeric_columns], options=InferOptions.Histogram, num_bins=num_bins
|
|
276
279
|
)
|
|
277
280
|
|
|
281
|
+
@staticmethod
|
|
282
|
+
def _extract_numeric_features(df: pd.DataFrame) -> list[Any]:
|
|
283
|
+
return [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col])]
|
|
284
|
+
|
|
278
285
|
@property
|
|
279
286
|
def is_dir(self):
|
|
280
287
|
return True
|
mlrun/common/db/sql_session.py
CHANGED
|
@@ -62,9 +62,12 @@ def _init_engine(dsn=None):
|
|
|
62
62
|
max_overflow = config.httpdb.db.connections_pool_max_overflow
|
|
63
63
|
if max_overflow is None:
|
|
64
64
|
max_overflow = config.httpdb.max_workers
|
|
65
|
+
|
|
65
66
|
kwargs = {
|
|
66
67
|
"pool_size": pool_size,
|
|
67
68
|
"max_overflow": max_overflow,
|
|
69
|
+
"pool_pre_ping": config.httpdb.db.connections_pool_pre_ping,
|
|
70
|
+
"pool_recycle": config.httpdb.db.connections_pool_recycle,
|
|
68
71
|
}
|
|
69
72
|
engine = create_engine(dsn, **kwargs)
|
|
70
73
|
_engines[dsn] = engine
|
mlrun/config.py
CHANGED
|
@@ -109,7 +109,10 @@ default_config = {
|
|
|
109
109
|
"runs": {
|
|
110
110
|
# deleting runs is a heavy operation that includes deleting runtime resources, therefore we do it in chunks
|
|
111
111
|
"batch_delete_runs_chunk_size": 10,
|
|
112
|
-
}
|
|
112
|
+
},
|
|
113
|
+
"resources": {
|
|
114
|
+
"delete_crd_resources_timeout": "5 minutes",
|
|
115
|
+
},
|
|
113
116
|
},
|
|
114
117
|
# the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
|
|
115
118
|
# before deleting them (4 hours)
|
|
@@ -303,7 +306,11 @@ default_config = {
|
|
|
303
306
|
# default is 16MB, max 1G, for more info https://dev.mysql.com/doc/refman/8.0/en/packet-too-large.html
|
|
304
307
|
"max_allowed_packet": 64000000, # 64MB
|
|
305
308
|
},
|
|
306
|
-
#
|
|
309
|
+
# tests connections for liveness upon each checkout
|
|
310
|
+
"connections_pool_pre_ping": True,
|
|
311
|
+
# this setting causes the pool to recycle connections after the given number of seconds has passed
|
|
312
|
+
"connections_pool_recycle": 60 * 60,
|
|
313
|
+
# None defaults to httpdb.max_workers
|
|
307
314
|
"connections_pool_size": None,
|
|
308
315
|
"connections_pool_max_overflow": None,
|
|
309
316
|
# below is a db-specific configuration
|
|
@@ -408,7 +415,7 @@ default_config = {
|
|
|
408
415
|
"iguazio_access_key": "",
|
|
409
416
|
"iguazio_list_projects_default_page_size": 200,
|
|
410
417
|
"iguazio_client_job_cache_ttl": "20 minutes",
|
|
411
|
-
"nuclio_project_deletion_verification_timeout": "
|
|
418
|
+
"nuclio_project_deletion_verification_timeout": "300 seconds",
|
|
412
419
|
"nuclio_project_deletion_verification_interval": "5 seconds",
|
|
413
420
|
},
|
|
414
421
|
# The API needs to know what is its k8s svc url so it could enrich it in the jobs it creates
|
mlrun/datastore/base.py
CHANGED
|
@@ -654,34 +654,6 @@ def http_get(url, headers=None, auth=None):
|
|
|
654
654
|
return response.content
|
|
655
655
|
|
|
656
656
|
|
|
657
|
-
def http_head(url, headers=None, auth=None):
|
|
658
|
-
try:
|
|
659
|
-
response = requests.head(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
660
|
-
except OSError as exc:
|
|
661
|
-
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
662
|
-
|
|
663
|
-
mlrun.errors.raise_for_status(response)
|
|
664
|
-
|
|
665
|
-
return response.headers
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
def http_put(url, data, headers=None, auth=None, session=None):
|
|
669
|
-
try:
|
|
670
|
-
put_api = session.put if session else requests.put
|
|
671
|
-
response = put_api(
|
|
672
|
-
url, data=data, headers=headers, auth=auth, verify=verify_ssl
|
|
673
|
-
)
|
|
674
|
-
except OSError as exc:
|
|
675
|
-
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}") from exc
|
|
676
|
-
|
|
677
|
-
mlrun.errors.raise_for_status(response)
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
def http_upload(url, file_path, headers=None, auth=None):
|
|
681
|
-
with open(file_path, "rb") as data:
|
|
682
|
-
http_put(url, data, headers, auth)
|
|
683
|
-
|
|
684
|
-
|
|
685
657
|
class HttpStore(DataStore):
|
|
686
658
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
687
659
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
@@ -382,6 +382,18 @@ def datastore_profile_read(url, project_name="", secrets: dict = None):
|
|
|
382
382
|
public_profile = mlrun.db.get_run_db().get_datastore_profile(
|
|
383
383
|
profile_name, project_name
|
|
384
384
|
)
|
|
385
|
+
# The mlrun.db.get_run_db().get_datastore_profile() function is capable of returning
|
|
386
|
+
# two distinct types of objects based on its execution context.
|
|
387
|
+
# If it operates from the client or within the pod (which is the common scenario),
|
|
388
|
+
# it yields an instance of `mlrun.datastore.DatastoreProfile`. Conversely,
|
|
389
|
+
# when executed on the server with a direct call to `sqldb`, it produces an instance of
|
|
390
|
+
# mlrun.common.schemas.DatastoreProfile.
|
|
391
|
+
# In the latter scenario, an extra conversion step is required to transform the object
|
|
392
|
+
# into mlrun.datastore.DatastoreProfile.
|
|
393
|
+
if isinstance(public_profile, mlrun.common.schemas.DatastoreProfile):
|
|
394
|
+
public_profile = DatastoreProfile2Json.create_from_json(
|
|
395
|
+
public_json=public_profile.object
|
|
396
|
+
)
|
|
385
397
|
project_ds_name_private = DatastoreProfile.generate_secret_key(
|
|
386
398
|
profile_name, project_name
|
|
387
399
|
)
|
mlrun/datastore/sources.py
CHANGED
|
@@ -848,8 +848,6 @@ class HttpSource(OnlineSource):
|
|
|
848
848
|
|
|
849
849
|
|
|
850
850
|
class StreamSource(OnlineSource):
|
|
851
|
-
"""Sets stream source for the flow. If stream doesn't exist it will create it"""
|
|
852
|
-
|
|
853
851
|
kind = "v3ioStream"
|
|
854
852
|
|
|
855
853
|
def __init__(
|
|
@@ -863,7 +861,7 @@ class StreamSource(OnlineSource):
|
|
|
863
861
|
**kwargs,
|
|
864
862
|
):
|
|
865
863
|
"""
|
|
866
|
-
Sets stream source for the flow. If stream doesn't exist it will create it
|
|
864
|
+
Sets the stream source for the flow. If the stream doesn't exist it will create it.
|
|
867
865
|
|
|
868
866
|
:param name: stream name. Default "stream"
|
|
869
867
|
:param group: consumer group. Default "serving"
|
|
@@ -915,8 +913,6 @@ class StreamSource(OnlineSource):
|
|
|
915
913
|
|
|
916
914
|
|
|
917
915
|
class KafkaSource(OnlineSource):
|
|
918
|
-
"""Sets kafka source for the flow"""
|
|
919
|
-
|
|
920
916
|
kind = "kafka"
|
|
921
917
|
|
|
922
918
|
def __init__(
|
mlrun/datastore/targets.py
CHANGED
|
@@ -727,7 +727,7 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
727
727
|
|
|
728
728
|
|
|
729
729
|
class ParquetTarget(BaseStoreTarget):
|
|
730
|
-
"""
|
|
730
|
+
"""Parquet target storage driver, used to materialize feature set/vector data into parquet files.
|
|
731
731
|
|
|
732
732
|
:param name: optional, target name. By default will be called ParquetTarget
|
|
733
733
|
:param path: optional, Output path. Can be either a file or directory.
|
|
@@ -1911,12 +1911,16 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1911
1911
|
# creat new table with the given name
|
|
1912
1912
|
columns = []
|
|
1913
1913
|
for col, col_type in self.schema.items():
|
|
1914
|
-
|
|
1915
|
-
if
|
|
1916
|
-
raise TypeError(
|
|
1914
|
+
col_type_sql = TYPE_TO_SQL_TYPE.get(col_type)
|
|
1915
|
+
if col_type_sql is None:
|
|
1916
|
+
raise TypeError(
|
|
1917
|
+
f"'{col_type}' unsupported type for column '{col}'"
|
|
1918
|
+
)
|
|
1917
1919
|
columns.append(
|
|
1918
1920
|
sqlalchemy.Column(
|
|
1919
|
-
col,
|
|
1921
|
+
col,
|
|
1922
|
+
col_type_sql,
|
|
1923
|
+
primary_key=(col in primary_key_for_check),
|
|
1920
1924
|
)
|
|
1921
1925
|
)
|
|
1922
1926
|
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -15,12 +15,11 @@
|
|
|
15
15
|
import mmap
|
|
16
16
|
import os
|
|
17
17
|
import time
|
|
18
|
-
from copy import deepcopy
|
|
19
18
|
from datetime import datetime
|
|
20
19
|
|
|
21
20
|
import fsspec
|
|
22
|
-
import
|
|
23
|
-
|
|
21
|
+
import v3io
|
|
22
|
+
from v3io.dataplane.response import HttpResponseError
|
|
24
23
|
|
|
25
24
|
import mlrun
|
|
26
25
|
from mlrun.datastore.helpers import ONE_GB, ONE_MB
|
|
@@ -30,11 +29,6 @@ from .base import (
|
|
|
30
29
|
DataStore,
|
|
31
30
|
FileStats,
|
|
32
31
|
basic_auth_header,
|
|
33
|
-
get_range,
|
|
34
|
-
http_get,
|
|
35
|
-
http_head,
|
|
36
|
-
http_put,
|
|
37
|
-
http_upload,
|
|
38
32
|
)
|
|
39
33
|
|
|
40
34
|
V3IO_LOCAL_ROOT = "v3io"
|
|
@@ -47,17 +41,18 @@ class V3ioStore(DataStore):
|
|
|
47
41
|
|
|
48
42
|
self.headers = None
|
|
49
43
|
self.secure = self.kind == "v3ios"
|
|
44
|
+
|
|
45
|
+
token = self._get_secret_or_env("V3IO_ACCESS_KEY")
|
|
46
|
+
username = self._get_secret_or_env("V3IO_USERNAME")
|
|
47
|
+
password = self._get_secret_or_env("V3IO_PASSWORD")
|
|
50
48
|
if self.endpoint.startswith("https://"):
|
|
51
49
|
self.endpoint = self.endpoint[len("https://") :]
|
|
52
50
|
self.secure = True
|
|
53
51
|
elif self.endpoint.startswith("http://"):
|
|
54
52
|
self.endpoint = self.endpoint[len("http://") :]
|
|
55
53
|
self.secure = False
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
username = self._get_secret_or_env("V3IO_USERNAME")
|
|
59
|
-
password = self._get_secret_or_env("V3IO_PASSWORD")
|
|
60
|
-
|
|
54
|
+
self.client = v3io.dataplane.Client(access_key=token, endpoint=self.url)
|
|
55
|
+
self.object = self.client.object
|
|
61
56
|
self.auth = None
|
|
62
57
|
self.token = token
|
|
63
58
|
if token:
|
|
@@ -65,6 +60,16 @@ class V3ioStore(DataStore):
|
|
|
65
60
|
elif username and password:
|
|
66
61
|
self.headers = basic_auth_header(username, password)
|
|
67
62
|
|
|
63
|
+
@staticmethod
|
|
64
|
+
def _do_object_request(function: callable, *args, **kwargs):
|
|
65
|
+
try:
|
|
66
|
+
return function(*args, **kwargs)
|
|
67
|
+
except HttpResponseError as http_response_error:
|
|
68
|
+
raise mlrun.errors.err_for_status_code(
|
|
69
|
+
status_code=http_response_error.status_code,
|
|
70
|
+
message=mlrun.errors.err_to_str(http_response_error),
|
|
71
|
+
)
|
|
72
|
+
|
|
68
73
|
@staticmethod
|
|
69
74
|
def uri_to_ipython(endpoint, subpath):
|
|
70
75
|
return V3IO_LOCAL_ROOT + subpath
|
|
@@ -91,13 +96,19 @@ class V3ioStore(DataStore):
|
|
|
91
96
|
|
|
92
97
|
def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
|
|
93
98
|
"""helper function for upload method, allows for controlling max_chunk_size in testing"""
|
|
99
|
+
container, path = split_path(self._join(key))
|
|
94
100
|
file_size = os.path.getsize(src_path) # in bytes
|
|
95
101
|
if file_size <= ONE_MB:
|
|
96
|
-
|
|
102
|
+
with open(src_path, "rb") as source_file:
|
|
103
|
+
data = source_file.read()
|
|
104
|
+
self._do_object_request(
|
|
105
|
+
self.object.put,
|
|
106
|
+
container=container,
|
|
107
|
+
path=path,
|
|
108
|
+
body=data,
|
|
109
|
+
append=False,
|
|
110
|
+
)
|
|
97
111
|
return
|
|
98
|
-
append_header = deepcopy(self.headers)
|
|
99
|
-
append_header["Range"] = "-1"
|
|
100
|
-
|
|
101
112
|
# chunk must be a multiple of the ALLOCATIONGRANULARITY
|
|
102
113
|
# https://docs.python.org/3/library/mmap.html
|
|
103
114
|
if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
|
|
@@ -114,11 +125,13 @@ class V3ioStore(DataStore):
|
|
|
114
125
|
access=mmap.ACCESS_READ,
|
|
115
126
|
offset=file_offset,
|
|
116
127
|
) as mmap_obj:
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
128
|
+
append = file_offset != 0
|
|
129
|
+
self._do_object_request(
|
|
130
|
+
self.object.put,
|
|
131
|
+
container=container,
|
|
132
|
+
path=path,
|
|
133
|
+
body=mmap_obj,
|
|
134
|
+
append=append,
|
|
122
135
|
)
|
|
123
136
|
file_offset += chunk_size
|
|
124
137
|
|
|
@@ -126,43 +139,55 @@ class V3ioStore(DataStore):
|
|
|
126
139
|
return self._upload(key, src_path)
|
|
127
140
|
|
|
128
141
|
def get(self, key, size=None, offset=0):
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
142
|
+
container, path = split_path(self._join(key))
|
|
143
|
+
return self._do_object_request(
|
|
144
|
+
function=self.object.get,
|
|
145
|
+
container=container,
|
|
146
|
+
path=path,
|
|
147
|
+
offset=offset,
|
|
148
|
+
num_bytes=size,
|
|
149
|
+
).body
|
|
134
150
|
|
|
135
|
-
def _put(self, key, data, max_chunk_size: int = ONE_GB):
|
|
151
|
+
def _put(self, key, data, append=False, max_chunk_size: int = ONE_GB):
|
|
136
152
|
"""helper function for put method, allows for controlling max_chunk_size in testing"""
|
|
153
|
+
container, path = split_path(self._join(key))
|
|
137
154
|
buffer_size = len(data) # in bytes
|
|
138
155
|
if buffer_size <= ONE_MB:
|
|
139
|
-
|
|
156
|
+
self._do_object_request(
|
|
157
|
+
self.object.put,
|
|
158
|
+
container=container,
|
|
159
|
+
path=path,
|
|
160
|
+
body=data,
|
|
161
|
+
append=append,
|
|
162
|
+
)
|
|
140
163
|
return
|
|
141
|
-
append_header = deepcopy(self.headers)
|
|
142
|
-
append_header["Range"] = "-1"
|
|
143
164
|
buffer_offset = 0
|
|
144
165
|
try:
|
|
145
166
|
data = memoryview(data)
|
|
146
167
|
except TypeError:
|
|
147
168
|
pass
|
|
148
169
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
170
|
+
while buffer_offset < buffer_size:
|
|
171
|
+
chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
|
|
172
|
+
append = True if buffer_offset or append else False
|
|
173
|
+
self._do_object_request(
|
|
174
|
+
self.object.put,
|
|
175
|
+
container=container,
|
|
176
|
+
path=path,
|
|
177
|
+
body=data[buffer_offset : buffer_offset + chunk_size],
|
|
178
|
+
append=append,
|
|
179
|
+
)
|
|
180
|
+
buffer_offset += chunk_size
|
|
160
181
|
|
|
161
182
|
def put(self, key, data, append=False):
|
|
162
|
-
return self._put(key, data)
|
|
183
|
+
return self._put(key, data, append)
|
|
163
184
|
|
|
164
185
|
def stat(self, key):
|
|
165
|
-
|
|
186
|
+
container, path = split_path(self._join(key))
|
|
187
|
+
response = self._do_object_request(
|
|
188
|
+
function=self.object.head, container=container, path=path
|
|
189
|
+
)
|
|
190
|
+
head = dict(response.headers)
|
|
166
191
|
size = int(head.get("Content-Length", "0"))
|
|
167
192
|
datestr = head.get("Last-Modified", "0")
|
|
168
193
|
modified = time.mktime(
|
|
@@ -171,7 +196,6 @@ class V3ioStore(DataStore):
|
|
|
171
196
|
return FileStats(size, modified)
|
|
172
197
|
|
|
173
198
|
def listdir(self, key):
|
|
174
|
-
v3io_client = v3io.dataplane.Client(endpoint=self.url, access_key=self.token)
|
|
175
199
|
container, subpath = split_path(self._join(key))
|
|
176
200
|
if not subpath.endswith("/"):
|
|
177
201
|
subpath += "/"
|
|
@@ -180,7 +204,7 @@ class V3ioStore(DataStore):
|
|
|
180
204
|
subpath_length = len(subpath) - 1
|
|
181
205
|
|
|
182
206
|
try:
|
|
183
|
-
response =
|
|
207
|
+
response = self.client.container.list(
|
|
184
208
|
container=container,
|
|
185
209
|
path=subpath,
|
|
186
210
|
get_all_attributes=False,
|
mlrun/feature_store/api.py
CHANGED
|
@@ -114,44 +114,6 @@ def get_offline_features(
|
|
|
114
114
|
spark_service: str = None,
|
|
115
115
|
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
116
116
|
):
|
|
117
|
-
return _get_offline_features(
|
|
118
|
-
feature_vector,
|
|
119
|
-
entity_rows,
|
|
120
|
-
entity_timestamp_column,
|
|
121
|
-
target,
|
|
122
|
-
run_config,
|
|
123
|
-
drop_columns,
|
|
124
|
-
start_time,
|
|
125
|
-
end_time,
|
|
126
|
-
with_indexes,
|
|
127
|
-
update_stats,
|
|
128
|
-
engine,
|
|
129
|
-
engine_args,
|
|
130
|
-
query,
|
|
131
|
-
order_by,
|
|
132
|
-
spark_service,
|
|
133
|
-
timestamp_for_filtering,
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def _get_offline_features(
|
|
138
|
-
feature_vector: Union[str, FeatureVector],
|
|
139
|
-
entity_rows=None,
|
|
140
|
-
entity_timestamp_column: str = None,
|
|
141
|
-
target: DataTargetBase = None,
|
|
142
|
-
run_config: RunConfig = None,
|
|
143
|
-
drop_columns: list[str] = None,
|
|
144
|
-
start_time: Union[str, datetime] = None,
|
|
145
|
-
end_time: Union[str, datetime] = None,
|
|
146
|
-
with_indexes: bool = False,
|
|
147
|
-
update_stats: bool = False,
|
|
148
|
-
engine: str = None,
|
|
149
|
-
engine_args: dict = None,
|
|
150
|
-
query: str = None,
|
|
151
|
-
order_by: Union[str, list[str]] = None,
|
|
152
|
-
spark_service: str = None,
|
|
153
|
-
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
154
|
-
) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
|
|
155
117
|
"""retrieve offline feature vector results
|
|
156
118
|
|
|
157
119
|
specify a feature vector object/uri and retrieve the desired features, their metadata
|
|
@@ -212,6 +174,44 @@ def _get_offline_features(
|
|
|
212
174
|
merge process using start_time and end_time params.
|
|
213
175
|
|
|
214
176
|
"""
|
|
177
|
+
return _get_offline_features(
|
|
178
|
+
feature_vector,
|
|
179
|
+
entity_rows,
|
|
180
|
+
entity_timestamp_column,
|
|
181
|
+
target,
|
|
182
|
+
run_config,
|
|
183
|
+
drop_columns,
|
|
184
|
+
start_time,
|
|
185
|
+
end_time,
|
|
186
|
+
with_indexes,
|
|
187
|
+
update_stats,
|
|
188
|
+
engine,
|
|
189
|
+
engine_args,
|
|
190
|
+
query,
|
|
191
|
+
order_by,
|
|
192
|
+
spark_service,
|
|
193
|
+
timestamp_for_filtering,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _get_offline_features(
|
|
198
|
+
feature_vector: Union[str, FeatureVector],
|
|
199
|
+
entity_rows=None,
|
|
200
|
+
entity_timestamp_column: str = None,
|
|
201
|
+
target: DataTargetBase = None,
|
|
202
|
+
run_config: RunConfig = None,
|
|
203
|
+
drop_columns: list[str] = None,
|
|
204
|
+
start_time: Union[str, datetime] = None,
|
|
205
|
+
end_time: Union[str, datetime] = None,
|
|
206
|
+
with_indexes: bool = False,
|
|
207
|
+
update_stats: bool = False,
|
|
208
|
+
engine: str = None,
|
|
209
|
+
engine_args: dict = None,
|
|
210
|
+
query: str = None,
|
|
211
|
+
order_by: Union[str, list[str]] = None,
|
|
212
|
+
spark_service: str = None,
|
|
213
|
+
timestamp_for_filtering: Union[str, dict[str, str]] = None,
|
|
214
|
+
) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
|
|
215
215
|
if entity_rows is None and entity_timestamp_column is not None:
|
|
216
216
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
217
217
|
"entity_timestamp_column param "
|
|
@@ -281,24 +281,6 @@ def get_online_feature_service(
|
|
|
281
281
|
update_stats: bool = False,
|
|
282
282
|
entity_keys: list[str] = None,
|
|
283
283
|
):
|
|
284
|
-
return _get_online_feature_service(
|
|
285
|
-
feature_vector,
|
|
286
|
-
run_config,
|
|
287
|
-
fixed_window_type,
|
|
288
|
-
impute_policy,
|
|
289
|
-
update_stats,
|
|
290
|
-
entity_keys,
|
|
291
|
-
)
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
def _get_online_feature_service(
|
|
295
|
-
feature_vector: Union[str, FeatureVector],
|
|
296
|
-
run_config: RunConfig = None,
|
|
297
|
-
fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
|
|
298
|
-
impute_policy: dict = None,
|
|
299
|
-
update_stats: bool = False,
|
|
300
|
-
entity_keys: list[str] = None,
|
|
301
|
-
) -> OnlineVectorService:
|
|
302
284
|
"""initialize and return online feature vector service api,
|
|
303
285
|
returns :py:class:`~mlrun.feature_store.OnlineVectorService`
|
|
304
286
|
|
|
@@ -362,6 +344,24 @@ def _get_online_feature_service(
|
|
|
362
344
|
:return: Initialize the `OnlineVectorService`.
|
|
363
345
|
Will be used in subclasses where `support_online=True`.
|
|
364
346
|
"""
|
|
347
|
+
return _get_online_feature_service(
|
|
348
|
+
feature_vector,
|
|
349
|
+
run_config,
|
|
350
|
+
fixed_window_type,
|
|
351
|
+
impute_policy,
|
|
352
|
+
update_stats,
|
|
353
|
+
entity_keys,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _get_online_feature_service(
|
|
358
|
+
feature_vector: Union[str, FeatureVector],
|
|
359
|
+
run_config: RunConfig = None,
|
|
360
|
+
fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
|
|
361
|
+
impute_policy: dict = None,
|
|
362
|
+
update_stats: bool = False,
|
|
363
|
+
entity_keys: list[str] = None,
|
|
364
|
+
) -> OnlineVectorService:
|
|
365
365
|
if isinstance(feature_vector, FeatureVector):
|
|
366
366
|
update_stats = True
|
|
367
367
|
feature_vector = _features_to_vector_and_check_permissions(
|
|
@@ -318,8 +318,6 @@ def emit_policy_to_dict(policy: EmitPolicy):
|
|
|
318
318
|
|
|
319
319
|
|
|
320
320
|
class FeatureSet(ModelObj):
|
|
321
|
-
"""Feature set object, defines a set of features and their data pipeline"""
|
|
322
|
-
|
|
323
321
|
kind = mlrun.common.schemas.ObjectKind.feature_set.value
|
|
324
322
|
_dict_fields = ["kind", "metadata", "spec", "status"]
|
|
325
323
|
|